diff --git a/cmake/dependencies.cmake b/cmake/dependencies.cmake index ee8fa156b11795..0cef392e167f95 100644 --- a/cmake/dependencies.cmake +++ b/cmake/dependencies.cmake @@ -177,10 +177,11 @@ function(ov_download_tbbbind_2_5) if(WIN32 AND X86_64) RESOLVE_DEPENDENCY(TBBBIND_2_5 - ARCHIVE_WIN "tbbbind_2_5_static_win_v1.zip" + ARCHIVE_WIN "tbbbind_2_5_static_win_v2.zip" TARGET_PATH "${TEMP}/tbbbind_2_5" ENVIRONMENT "TBBBIND_2_5_ROOT" - SHA256 "a67afeea8cf194f97968c800dab5b5459972908295242e282045d6b8953573c1") + SHA256 "49ae93b13a13953842ff9ae8d01681b269b5b0bc205daf18619ea9a828c44bee" + USE_NEW_LOCATION TRUE) elseif(LINUX AND X86_64) RESOLVE_DEPENDENCY(TBBBIND_2_5 ARCHIVE_LIN "tbbbind_2_5_static_lin_v2.tgz" diff --git a/cmake/developer_package/plugins/create_plugins_hpp.cmake b/cmake/developer_package/plugins/create_plugins_hpp.cmake index cddcad738470ca..1fedf858ce58ca 100644 --- a/cmake/developer_package/plugins/create_plugins_hpp.cmake +++ b/cmake/developer_package/plugins/create_plugins_hpp.cmake @@ -2,7 +2,7 @@ # SPDX-License-Identifier: Apache-2.0 # -foreach(var IE_DEVICE_MAPPING IE_PLUGINS_HPP_HEADER IE_PLUGINS_HPP_HEADER_IN) +foreach(var OV_DEVICE_MAPPING BUILD_SHARED_LIBS OV_PLUGINS_HPP_HEADER OV_PLUGINS_HPP_HEADER_IN) if(NOT DEFINED ${var}) message(FATAL_ERROR "${var} is required, but not defined") endif() @@ -10,29 +10,15 @@ endforeach() # configure variables -set(IE_PLUGINS_DECLARATIONS "") -set(IE_PLUGINS_MAP_DEFINITION +set(OV_PLUGINS_DECLARATIONS "") +set(OV_PLUGINS_MAP_DEFINITION " static const std::map plugins_hpp = {") -foreach(dev_map IN LISTS IE_DEVICE_MAPPING) +foreach(dev_map IN LISTS OV_DEVICE_MAPPING) string(REPLACE ":" ";" dev_map "${dev_map}") list(GET dev_map 0 mapped_dev_name) list(GET dev_map 1 actual_dev_name) - # common - set(_IE_CREATE_PLUGIN_FUNC "CreatePluginEngine${actual_dev_name}") - set(_IE_CREATE_EXTENSION_FUNC "CreateExtensionShared${actual_dev_name}") - - # declarations - set(IE_PLUGINS_DECLARATIONS "${IE_PLUGINS_DECLARATIONS} -IE_DEFINE_PLUGIN_CREATE_FUNCTION_DECLARATION(${_IE_CREATE_PLUGIN_FUNC});") - if(${actual_dev_name}_AS_EXTENSION) - set(IE_PLUGINS_DECLARATIONS "${IE_PLUGINS_DECLARATIONS} -IE_DEFINE_EXTENSION_CREATE_FUNCTION_DECLARATION(${_IE_CREATE_EXTENSION_FUNC});") - else() - set(_IE_CREATE_EXTENSION_FUNC "nullptr") - endif() - # definitions set(dev_config "{") if(${mapped_dev_name}_CONFIG) @@ -48,11 +34,31 @@ IE_DEFINE_EXTENSION_CREATE_FUNCTION_DECLARATION(${_IE_CREATE_EXTENSION_FUNC});") endif() set(dev_config "${dev_config}}") - set(IE_PLUGINS_MAP_DEFINITION "${IE_PLUGINS_MAP_DEFINITION} - { \"${mapped_dev_name}\", Value { ${_IE_CREATE_PLUGIN_FUNC}, ${_IE_CREATE_EXTENSION_FUNC}, ${dev_config} } },") + + if(NOT BUILD_SHARED_LIBS) + # common + set(_OV_CREATE_PLUGIN_FUNC "CreatePluginEngine${actual_dev_name}") + set(_OV_CREATE_EXTENSION_FUNC "CreateExtensionShared${actual_dev_name}") + + # declarations + set(OV_PLUGINS_DECLARATIONS "${OV_PLUGINS_DECLARATIONS} + IE_DEFINE_PLUGIN_CREATE_FUNCTION_DECLARATION(${_OV_CREATE_PLUGIN_FUNC});") + if(${actual_dev_name}_AS_EXTENSION) + set(OV_PLUGINS_DECLARATIONS "${OV_PLUGINS_DECLARATIONS} + IE_DEFINE_EXTENSION_CREATE_FUNCTION_DECLARATION(${_OV_CREATE_EXTENSION_FUNC});") + else() + set(_OV_CREATE_EXTENSION_FUNC "nullptr") + endif() + + set(OV_PLUGINS_MAP_DEFINITION "${OV_PLUGINS_MAP_DEFINITION} + { \"${mapped_dev_name}\", Value { ${_OV_CREATE_PLUGIN_FUNC}, ${_OV_CREATE_EXTENSION_FUNC}, ${dev_config} } },") + else() + set(OV_PLUGINS_MAP_DEFINITION "${OV_PLUGINS_MAP_DEFINITION} + { \"${mapped_dev_name}\", Value { \"${actual_dev_name}\", ${dev_config} } },") + endif() endforeach() -set(IE_PLUGINS_MAP_DEFINITION "${IE_PLUGINS_MAP_DEFINITION} +set(OV_PLUGINS_MAP_DEFINITION "${OV_PLUGINS_MAP_DEFINITION} };\n") -configure_file("${IE_PLUGINS_HPP_HEADER_IN}" "${IE_PLUGINS_HPP_HEADER}" @ONLY) +configure_file("${OV_PLUGINS_HPP_HEADER_IN}" "${OV_PLUGINS_HPP_HEADER}" @ONLY) diff --git a/cmake/developer_package/plugins/plugins.cmake b/cmake/developer_package/plugins/plugins.cmake index b4cfe20bd024e5..0d8db5561e5ada 100644 --- a/cmake/developer_package/plugins/plugins.cmake +++ b/cmake/developer_package/plugins/plugins.cmake @@ -113,7 +113,7 @@ function(ie_add_plugin) if(IE_PLUGIN_PSEUDO_DEVICE) set(plugin_hidden HIDDEN) endif() - ie_cpack_add_component(${install_component} + ie_cpack_add_component(${install_component} DISPLAY_NAME "${IE_PLUGIN_DEVICE_NAME} runtime" DESCRIPTION "${IE_PLUGIN_DEVICE_NAME} runtime" ${plugin_hidden} @@ -227,16 +227,18 @@ macro(ie_register_plugins_dynamic) # Combine all .xml files into plugins.xml - add_custom_command(TARGET ${IE_REGISTER_MAIN_TARGET} POST_BUILD - COMMAND - "${CMAKE_COMMAND}" - -D "CMAKE_SHARED_MODULE_PREFIX=${CMAKE_SHARED_MODULE_PREFIX}" - -D "IE_CONFIG_OUTPUT_FILE=${config_output_file}" - -D "IE_CONFIGS_DIR=${CMAKE_BINARY_DIR}/plugins" - -P "${IEDevScripts_DIR}/plugins/register_plugin_cmake.cmake" - COMMENT - "Registering plugins to plugins.xml config file" - VERBATIM) + if(ENABLE_PLUGINS_XML) + add_custom_command(TARGET ${IE_REGISTER_MAIN_TARGET} POST_BUILD + COMMAND + "${CMAKE_COMMAND}" + -D "CMAKE_SHARED_MODULE_PREFIX=${CMAKE_SHARED_MODULE_PREFIX}" + -D "IE_CONFIG_OUTPUT_FILE=${config_output_file}" + -D "IE_CONFIGS_DIR=${CMAKE_BINARY_DIR}/plugins" + -P "${IEDevScripts_DIR}/plugins/register_plugin_cmake.cmake" + COMMENT + "Registering plugins to plugins.xml config file" + VERBATIM) + endif() endmacro() # @@ -279,13 +281,9 @@ function(ie_target_link_plugins TARGET_NAME) endfunction() # -# ie_generate_plugins_hpp() +# ov_generate_plugins_hpp() # -function(ie_generate_plugins_hpp) - if(BUILD_SHARED_LIBS) - return() - endif() - +function(ov_generate_plugins_hpp) set(device_mapping) set(device_configs) set(as_extension) @@ -296,17 +294,23 @@ function(ie_generate_plugins_hpp) message(FATAL_ERROR "Unexpected error, please, contact developer of this script") endif() - # create device mapping: preudo device => actual device + # create device mapping: pseudo device => actual device list(GET name 0 device_name) - if(${device_name}_PSEUDO_PLUGIN_FOR) - list(APPEND device_mapping "${device_name}:${${device_name}_PSEUDO_PLUGIN_FOR}") + if(BUILD_SHARED_LIBS) + list(GET name 1 library_name) + ie_plugin_get_file_name(${library_name} library_name) + list(APPEND device_mapping "${device_name}:${library_name}") else() - list(APPEND device_mapping "${device_name}:${device_name}") - endif() + if(${device_name}_PSEUDO_PLUGIN_FOR) + list(APPEND device_mapping "${device_name}:${${device_name}_PSEUDO_PLUGIN_FOR}") + else() + list(APPEND device_mapping "${device_name}:${device_name}") + endif() - # register plugin as extension - if(${device_name}_AS_EXTENSION) - list(APPEND as_extension -D "${device_name}_AS_EXTENSION=ON") + # register plugin as extension + if(${device_name}_AS_EXTENSION) + list(APPEND as_extension -D "${device_name}_AS_EXTENSION=ON") + endif() endif() # add default plugin config options @@ -317,21 +321,22 @@ function(ie_generate_plugins_hpp) endif() endforeach() - # add plugins to libraries including ie_plugins.hpp + # add plugins to libraries including ov_plugins.hpp ie_target_link_plugins(openvino) if(TARGET inference_engine_s) ie_target_link_plugins(inference_engine_s) endif() - set(ie_plugins_hpp "${CMAKE_BINARY_DIR}/src/inference/ie_plugins.hpp") + set(ov_plugins_hpp "${CMAKE_BINARY_DIR}/src/inference/ov_plugins.hpp") set(plugins_hpp_in "${IEDevScripts_DIR}/plugins/plugins.hpp.in") - add_custom_command(OUTPUT "${ie_plugins_hpp}" + add_custom_command(OUTPUT "${ov_plugins_hpp}" COMMAND "${CMAKE_COMMAND}" - -D "IE_DEVICE_MAPPING=${device_mapping}" - -D "IE_PLUGINS_HPP_HEADER_IN=${plugins_hpp_in}" - -D "IE_PLUGINS_HPP_HEADER=${ie_plugins_hpp}" + -D "BUILD_SHARED_LIBS=${BUILD_SHARED_LIBS}" + -D "OV_DEVICE_MAPPING=${device_mapping}" + -D "OV_PLUGINS_HPP_HEADER_IN=${plugins_hpp_in}" + -D "OV_PLUGINS_HPP_HEADER=${ov_plugins_hpp}" ${device_configs} ${as_extension} -P "${IEDevScripts_DIR}/plugins/create_plugins_hpp.cmake" @@ -339,13 +344,13 @@ function(ie_generate_plugins_hpp) "${plugins_hpp_in}" "${IEDevScripts_DIR}/plugins/create_plugins_hpp.cmake" COMMENT - "Generate ie_plugins.hpp for static build" + "Generate ov_plugins.hpp for build" VERBATIM) # for some reason dependency on source files does not work # so, we have to use explicit target and make it dependency for inference_engine - add_custom_target(_ie_plugins_hpp DEPENDS ${ie_plugins_hpp}) - add_dependencies(inference_engine_obj _ie_plugins_hpp) + add_custom_target(_ov_plugins_hpp DEPENDS ${ov_plugins_hpp}) + add_dependencies(inference_engine_obj _ov_plugins_hpp) # add dependency for object files get_target_property(sources inference_engine_obj SOURCES) @@ -362,5 +367,5 @@ function(ie_generate_plugins_hpp) endforeach() # add dependency on header file generation for all inference_engine source files - set_source_files_properties(${all_sources} PROPERTIES OBJECT_DEPENDS ${ie_plugins_hpp}) + set_source_files_properties(${all_sources} PROPERTIES OBJECT_DEPENDS ${ov_plugins_hpp}) endfunction() diff --git a/cmake/developer_package/plugins/plugins.hpp.in b/cmake/developer_package/plugins/plugins.hpp.in index fa8119756b82e8..224f77c8cb980b 100644 --- a/cmake/developer_package/plugins/plugins.hpp.in +++ b/cmake/developer_package/plugins/plugins.hpp.in @@ -4,9 +4,14 @@ #pragma once +#include +#include + +#ifdef OPENVINO_STATIC_LIBRARY + #include "cpp_interfaces/interface/ie_iplugin_internal.hpp" -@IE_PLUGINS_DECLARATIONS@ +@OV_PLUGINS_DECLARATIONS@ struct Value { InferenceEngine::CreatePluginEngineFunc * m_create_plugin_func; @@ -14,10 +19,20 @@ struct Value { std::map m_default_config; }; +#else + +struct Value { + std::string m_plugin_path; + std::map m_default_config; +}; + +#endif + using Key = std::string; using PluginsStaticRegistry = std::map; -inline const std::map getStaticPluginsRegistry() { -@IE_PLUGINS_MAP_DEFINITION@ + +inline const std::map getCompiledPluginsRegistry() { +@OV_PLUGINS_MAP_DEFINITION@ return plugins_hpp; } diff --git a/cmake/extra_modules.cmake b/cmake/extra_modules.cmake index 7b843341c11159..a5b1cd22e82ccf 100644 --- a/cmake/extra_modules.cmake +++ b/cmake/extra_modules.cmake @@ -169,9 +169,9 @@ ov_generate_dev_package_config() # with all imported developer targets register_extra_modules() -# for static libraries case we need to generate final ie_plugins.hpp +# for static libraries case we need to generate final ov_plugins.hpp # with all the information about plugins -ie_generate_plugins_hpp() +ov_generate_plugins_hpp() # used for static build ov_generate_frontends_hpp() diff --git a/cmake/features.cmake b/cmake/features.cmake index 17c5ccc1b3c7e5..24dfaef46e89a7 100644 --- a/cmake/features.cmake +++ b/cmake/features.cmake @@ -94,6 +94,8 @@ ie_option (ENABLE_HETERO "Enables Hetero Device Plugin" ON) ie_option (ENABLE_TEMPLATE "Enable template plugin" ON) +ie_dependent_option (ENABLE_PLUGINS_XML "Generate plugins.xml configuration file or not" OFF "NOT BUILD_SHARED_LIBS" OFF) + ie_dependent_option (GAPI_TEST_PERF "if GAPI unit tests should examine performance" OFF "ENABLE_TESTS;ENABLE_GAPI_PREPROCESSING" OFF) ie_dependent_option (ENABLE_DATA "fetch models from testdata repo" ON "ENABLE_FUNCTIONAL_TESTS;NOT ANDROID" OFF) diff --git a/cmake/templates/OpenVINODeveloperPackageConfig.cmake.in b/cmake/templates/OpenVINODeveloperPackageConfig.cmake.in index 24238be0604c1b..d530ea36d1d9c8 100644 --- a/cmake/templates/OpenVINODeveloperPackageConfig.cmake.in +++ b/cmake/templates/OpenVINODeveloperPackageConfig.cmake.in @@ -28,6 +28,9 @@ foreach(option IN LISTS ov_options) endforeach() message(" ") +# activate generation of plugins.xml +set(ENABLE_PLUGINS_XML ON) + # for samples in 3rd party projects if(ENABLE_SAMPLES) set_and_check(gflags_DIR "@gflags_BINARY_DIR@") diff --git a/docs/Documentation/deployment_guide_introduction.md b/docs/Documentation/deployment_guide_introduction.md index 4e3162d6de5383..6496a3cf494250 100644 --- a/docs/Documentation/deployment_guide_introduction.md +++ b/docs/Documentation/deployment_guide_introduction.md @@ -9,12 +9,9 @@ Run and Deploy Locally Deploy via Model Serving -@endsphinxdirective - Once you have a model that meets both OpenVINO™ and your requirements, you can choose how to deploy it with your application. -@sphinxdirective .. panels:: :doc:`Deploy via OpenVINO Runtime ` @@ -30,8 +27,7 @@ Once you have a model that meets both OpenVINO™ and your requirements, you can Deployment via OpenVINO Model Server allows the application to connect to the inference server set up remotely. This way inference can use external resources instead of those available to the application itself. -@endsphinxdirective - +Apart from the default deployment options, you may also :doc:`deploy your application for the TensorFlow framework with OpenVINO Integration ` -Apart from the default deployment options, you may also [deploy your application for the TensorFlow framework with OpenVINO Integration](./openvino_ecosystem_ovtf.md). +@endsphinxdirective \ No newline at end of file diff --git a/docs/Documentation/inference_modes_overview.md b/docs/Documentation/inference_modes_overview.md index 970820236d7417..7372a047466098 100644 --- a/docs/Documentation/inference_modes_overview.md +++ b/docs/Documentation/inference_modes_overview.md @@ -10,15 +10,15 @@ openvino_docs_OV_UG_Running_on_multiple_devices openvino_docs_OV_UG_Hetero_execution openvino_docs_OV_UG_Automatic_Batching - -@endsphinxdirective -OpenVINO Runtime offers multiple inference modes to allow optimum hardware utilization under different conditions. The most basic one is a single-device mode, which defines just one device responsible for the entire inference workload. It supports a range of Intel hardware by means of plugins embedded in the Runtime library, each set up to offer the best possible performance. For a complete list of supported devices and instructions on how to use them, refer to the [guide on inference devices](../OV_Runtime_UG/supported_plugins/Device_Plugins.md). + +OpenVINO Runtime offers multiple inference modes to allow optimum hardware utilization under different conditions. The most basic one is a single-device mode, which defines just one device responsible for the entire inference workload. It supports a range of Intel hardware by means of plugins embedded in the Runtime library, each set up to offer the best possible performance. For a complete list of supported devices and instructions on how to use them, refer to the :doc:`guide on inference devices `. The remaining modes assume certain levels of automation in selecting devices for inference. Using them in the deployed solution may potentially increase its performance and portability. The automated modes are: -* [Automatic Device Selection (AUTO)](../OV_Runtime_UG/auto_device_selection.md) -* [Multi-Device Execution (MULTI)](../OV_Runtime_UG/multi_device.md) -* [Heterogeneous Execution (HETERO)](../OV_Runtime_UG/hetero_execution.md) -* [Automatic Batching Execution (Auto-batching)](../OV_Runtime_UG/automatic_batching.md) +* :doc:`Automatic Device Selection (AUTO) ` +* :doc:``Multi-Device Execution (MULTI) ` +* :doc:`Heterogeneous Execution (HETERO) ` +* :doc:`Automatic Batching Execution (Auto-batching) ` +@endsphinxdirective diff --git a/docs/IE_PLUGIN_DG/AsyncInferRequest.md b/docs/IE_PLUGIN_DG/AsyncInferRequest.md index c42df76d6fff27..e45a5799deea8b 100644 --- a/docs/IE_PLUGIN_DG/AsyncInferRequest.md +++ b/docs/IE_PLUGIN_DG/AsyncInferRequest.md @@ -1,49 +1,45 @@ -# Asynchronous Inference Request {#openvino_docs_ie_plugin_dg_async_infer_request} +# Asynchronous Inference Request {#openvino_docs_ov_plugin_dg_async_infer_request} Asynchronous Inference Request runs an inference pipeline asynchronously in one or several task executors depending on a device pipeline structure. -OpenVINO Runtime Plugin API provides the base InferenceEngine::AsyncInferRequestThreadSafeDefault class: +OpenVINO Runtime Plugin API provides the base ov::IAsyncInferRequest class: -- The class has the `_pipeline` field of `std::vector >`, which contains pairs of an executor and executed task. +- The class has the `m_pipeline` field of `std::vector, ov::threading::Task> >`, which contains pairs of an executor and executed task. - All executors are passed as arguments to a class constructor and they are in the running state and ready to run tasks. -- The class has the InferenceEngine::AsyncInferRequestThreadSafeDefault::StopAndWait method, which waits for `_pipeline` to finish in a class destructor. The method does not stop task executors and they are still in the running stage, because they belong to the executable network instance and are not destroyed. +- The class has the ov::IAsyncInferRequest::stop_and_wait method, which waits for `m_pipeline` to finish in a class destructor. The method does not stop task executors and they are still in the running stage, because they belong to the compiled model instance and are not destroyed. -`AsyncInferRequest` Class +AsyncInferRequest Class ------------------------ -OpenVINO Runtime Plugin API provides the base InferenceEngine::AsyncInferRequestThreadSafeDefault class for a custom asynchronous inference request implementation: +OpenVINO Runtime Plugin API provides the base ov::IAsyncInferRequest class for a custom asynchronous inference request implementation: @snippet src/async_infer_request.hpp async_infer_request:header -#### Class Fields +### Class Fields -- `_inferRequest` - a reference to the [synchronous inference request](@ref openvino_docs_ie_plugin_dg_infer_request) implementation. Its methods are reused in the `AsyncInferRequest` constructor to define a device pipeline. -- `_waitExecutor` - a task executor that waits for a response from a device about device tasks completion +- `m_wait_executor` - a task executor that waits for a response from a device about device tasks completion -> **NOTE**: If a plugin can work with several instances of a device, `_waitExecutor` must be device-specific. Otherwise, having a single task executor for several devices does not allow them to work in parallel. +> **NOTE**: If a plugin can work with several instances of a device, `m_wait_executor` must be device-specific. Otherwise, having a single task executor for several devices does not allow them to work in parallel. -### `AsyncInferRequest()` +### AsyncInferRequest() -The main goal of the `AsyncInferRequest` constructor is to define a device pipeline `_pipeline`. The example below demonstrates `_pipeline` creation with the following stages: +The main goal of the `AsyncInferRequest` constructor is to define a device pipeline `m_pipeline`. The example below demonstrates `m_pipeline` creation with the following stages: -- `inferPreprocess` is a CPU compute task. -- `startPipeline` is a CPU ligthweight task to submit tasks to a remote device. -- `waitPipeline` is a CPU non-compute task that waits for a response from a remote device. -- `inferPostprocess` is a CPU compute task. +- `infer_preprocess_and_start_pipeline` is a CPU ligthweight task to submit tasks to a remote device. +- `wait_pipeline` is a CPU non-compute task that waits for a response from a remote device. +- `infer_postprocess` is a CPU compute task. @snippet src/async_infer_request.cpp async_infer_request:ctor The stages are distributed among two task executors in the following way: -- `inferPreprocess` and `startPipeline` are combined into a single task and run on `_requestExecutor`, which computes CPU tasks. +- `infer_preprocess_and_start_pipeline` prepare input tensors and run on `m_request_executor`, which computes CPU tasks. - You need at least two executors to overlap compute tasks of a CPU and a remote device the plugin works with. Otherwise, CPU and device tasks are executed serially one by one. -- `waitPipeline` is sent to `_waitExecutor`, which works with the device. +- `wait_pipeline` is sent to `m_wait_executor`, which works with the device. -> **NOTE**: `callbackExecutor` is also passed to the constructor and it is used in the base InferenceEngine::AsyncInferRequestThreadSafeDefault class, which adds a pair of `callbackExecutor` and a callback function set by the user to the end of the pipeline. +> **NOTE**: `m_callback_executor` is also passed to the constructor and it is used in the base ov::IAsyncInferRequest class, which adds a pair of `callback_executor` and a callback function set by the user to the end of the pipeline. -Inference request stages are also profiled using IE_PROFILING_AUTO_SCOPE, which shows how pipelines of multiple asynchronous inference requests are run in parallel via the [Intel® VTune™ Profiler](https://software.intel.com/en-us/vtune) tool. +### ~AsyncInferRequest() -### `~AsyncInferRequest()` - -In the asynchronous request destructor, it is necessary to wait for a pipeline to finish. It can be done using the InferenceEngine::AsyncInferRequestThreadSafeDefault::StopAndWait method of the base class. +In the asynchronous request destructor, it is necessary to wait for a pipeline to finish. It can be done using the ov::IAsyncInferRequest::stop_and_wait method of the base class. @snippet src/async_infer_request.cpp async_infer_request:dtor diff --git a/docs/IE_PLUGIN_DG/Building.md b/docs/IE_PLUGIN_DG/Building.md index ebd594291fd278..f40f2296e85122 100644 --- a/docs/IE_PLUGIN_DG/Building.md +++ b/docs/IE_PLUGIN_DG/Building.md @@ -1,4 +1,4 @@ -# Build Plugin Using CMake {#openvino_docs_ie_plugin_dg_plugin_build} +# Build Plugin Using CMake {#openvino_docs_ov_plugin_dg_plugin_build} OpenVINO build infrastructure provides the OpenVINO Developer Package for plugin development. @@ -30,6 +30,7 @@ Once the commands above are executed, the OpenVINO Developer Package is generate * `openvino::unitTestUtils` - static library with unit tests utilities * `openvino::ngraphFunctions` - static library with the set of `ov::Model` builders * `openvino::funcSharedTests` - static library with common functional tests + * `openvino::ngraph_reference` - static library with operation reference implementations. > **NOTE**: it's enough just to run `cmake --build . --target ov_dev_targets` command to build only targets from the > OpenVINO Developer package. @@ -61,7 +62,7 @@ $ cmake -DENABLE_FUNCTIONAL_TESTS=OFF -DOpenVINODeveloperPackage_DIR=../openvino - `src/CMakeLists.txt` to build a plugin shared library from sources: @snippet template/src/CMakeLists.txt cmake:plugin - > **NOTE**: `openvino::runtime` target is imported from the OpenVINO Developer Package. + > **NOTE**: `openvino::...` targets are imported from the OpenVINO Developer Package. - `tests/functional/CMakeLists.txt` to build a set of functional plugin tests: @snippet template/tests/functional/CMakeLists.txt cmake:functional_tests diff --git a/docs/IE_PLUGIN_DG/CompiledModel.md b/docs/IE_PLUGIN_DG/CompiledModel.md new file mode 100644 index 00000000000000..fa80ee81cb75ab --- /dev/null +++ b/docs/IE_PLUGIN_DG/CompiledModel.md @@ -0,0 +1,89 @@ +# Compiled Model {#openvino_docs_ov_plugin_dg_compiled_model} + +ov::CompiledModel class functionality: +- Compile an ov::Model instance to a backend specific graph representation +- Create an arbitrary number of ov::InferRequest objects +- Hold some common resources shared between different instances of ov::InferRequest. For example: + - ov::ICompiledModel::m_task_executor task executor to implement asynchronous execution + - ov::ICompiledModel::m_callback_executor task executor to run an asynchronous inference request callback in a separate thread + +CompiledModel Class +------------------------ + +OpenVINO Plugin API provides the interface ov::ICompiledModel which should be used as a base class for a compiled model. Based on that, a declaration of an compiled model class can look as follows: + +@snippet src/compiled_model.hpp compiled_model:header + +### Class Fields + +The example class has several fields: + +- `m_request_id` - Tracks a number of created inference requests, which is used to distinguish different inference requests during profiling via the Intel® Instrumentation and Tracing Technology (ITT) library. +- `m_cfg` - Defines a configuration a compiled model was compiled with. +- `m_model` - Keeps a reference to transformed `ov::Model` which is used in OpenVINO reference backend computations. Note, in case of other backends with backend specific graph representation `m_model` has different type and represents backend specific graph or just a set of computational kernels to perform an inference. +- `m_loaded_from_cache` - Allows to understand that model was loaded from cache. + +### CompiledModel Constructor + +This constructor accepts a generic representation of a model as an ov::Model and is compiled into a backend specific device graph: + +@snippet src/compiled_model.cpp compiled_model:ctor + +The implementation `compile_model()` is fully device-specific. + +### compile_model() + +The function accepts a const shared pointer to `ov::Model` object and applies OpenVINO passes using `transform_model()` function, which defines plugin-specific conversion pipeline. To support low precision inference, the pipeline can include Low Precision Transformations. These transformations are usually hardware specific. You can find how to use and configure Low Precisions Transformations in [Low Precision Transformations](@ref openvino_docs_OV_UG_lpt) guide. + +@snippet src/compiled_model.cpp compiled_model:compile_model + +> **NOTE**: After all these steps, the backend specific graph is ready to create inference requests and perform inference. + +### export_model() + +The implementation of the method should write all data to the `model_stream`, which is required to import a backend specific graph later in the `Plugin::import_model` method: + +@snippet src/compiled_model.cpp compiled_model:export_model + +### create_sync_infer_request() + +The method creates an synchronous inference request and returns it. + +@snippet src/compiled_model.cpp compiled_model:create_sync_infer_request + +While the public OpenVINO API has a single interface for inference request, which can be executed in synchronous and asynchronous modes, a plugin library implementation has two separate classes: + +- [Synchronous inference request](@ref openvino_docs_ov_plugin_dg_infer_request), which defines pipeline stages and runs them synchronously in the `infer` method. +- [Asynchronous inference request](@ref openvino_docs_ov_plugin_dg_async_infer_request), which is a wrapper for a synchronous inference request and can run a pipeline asynchronously. Depending on a device pipeline structure, it can has one or several stages: + - For single-stage pipelines, there is no need to define this method and create a class derived from ov::IAsyncInferRequest. For single stage pipelines, a default implementation of this method creates ov::IAsyncInferRequest wrapping a synchronous inference request and runs it asynchronously in the `m_request_executor` executor. + - For pipelines with multiple stages, such as performing some preprocessing on host, uploading input data to a device, running inference on a device, or downloading and postprocessing output data, schedule stages on several task executors to achieve better device use and performance. You can do it by creating a sufficient number of inference requests running in parallel. In this case, device stages of different inference requests are overlapped with preprocessing and postprocessing stage giving better performance. + > **IMPORTANT**: It is up to you to decide how many task executors you need to optimally execute a device pipeline. + + +### create_infer_request() + +The method creates an asynchronous inference request and returns it. + +@snippet src/compiled_model.cpp compiled_model:create_infer_request + +### get_property() + +Returns a current value for a property with the name `name`. The method extracts configuration values a compiled model is compiled with. + +@snippet src/compiled_model.cpp compiled_model:get_property + +This function is the only way to get configuration values when a model is imported and compiled by other developers and tools. + +### set_property() + +The methods allows to set compiled model specific properties. + +@snippet src/compiled_model.cpp compiled_model:set_property + +### get_runtime_model() + +The methods returns the runtime model with backend specific information. + +@snippet src/compiled_model.cpp compiled_model:get_runtime_model + +The next step in plugin library implementation is the [Synchronous Inference Request](@ref openvino_docs_ov_plugin_dg_infer_request) class. diff --git a/docs/IE_PLUGIN_DG/ExecutableNetwork.md b/docs/IE_PLUGIN_DG/ExecutableNetwork.md deleted file mode 100644 index 4498888cb79eab..00000000000000 --- a/docs/IE_PLUGIN_DG/ExecutableNetwork.md +++ /dev/null @@ -1,90 +0,0 @@ -# Executable Network {#openvino_docs_ie_plugin_dg_executable_network} - -`ExecutableNetwork` class functionality: -- Compile an InferenceEngine::ICNNNetwork instance to a backend specific graph representation -- Create an arbitrary number of `InferRequest` objects -- Hold some common resources shared between different instances of `InferRequest`. For example: - - InferenceEngine::IExecutableNetworkInternal::_taskExecutor task executor to implement asynchronous execution - - InferenceEngine::IExecutableNetworkInternal::_callbackExecutor task executor to run an asynchronous inference request callback in a separate thread - -`ExecutableNetwork` Class ------------------------- - -Inference Engine Plugin API provides the helper InferenceEngine::ExecutableNetworkThreadSafeDefault class recommended to use as a base class for an executable network. Based on that, a declaration of an executable network class can look as follows: - -@snippet src/compiled_model.hpp executable_network:header - -#### Class Fields - -The example class has several fields: - -- `_requestId` - Tracks a number of created inference requests, which is used to distinguish different inference requests during profiling via the Intel® Instrumentation and Tracing Technology (ITT) library. -- `_cfg` - Defines a configuration an executable network was compiled with. -- `_plugin` - Refers to a plugin instance. -- `_function` - Keeps a reference to transformed `ngraph::Function` which is used in ngraph reference backend computations. Note, in case of other backends with backend specific graph representation `_function` has different type and represents backend specific graph or just a set of computational kernels to perform an inference. -- `_inputIndex` - maps a name of input with its index among all network inputs. -- `_outputIndex` - maps a name of output with its index among all network outputs. - -### `ExecutableNetwork` Constructor with `ICNNNetwork` - -This constructor accepts a generic representation of a neural network as an InferenceEngine::ICNNNetwork reference and is compiled into a backend specific device graph: - -@snippet src/compiled_model.cpp executable_network:ctor_cnnnetwork - -The implementation `CompileNetwork` is fully device-specific. - -### `CompileNetwork()` - -The function accepts a const shared pointer to `ngraph::Function` object and performs the following steps: - -1. Applies nGraph passes using `TransformNetwork` function, which defines plugin-specific conversion pipeline. To support low precision inference, the pipeline can include Low Precision Transformations. These transformations are usually hardware specific. You can find how to use and configure Low Precisions Transformations in [Low Precision Transformations](@ref openvino_docs_OV_UG_lpt) guide. -2. Maps the transformed graph to a backend specific graph representation (for example, to CPU plugin internal graph representation). -3. Allocates and fills memory for graph weights, backend specific memory handles and so on. - -@snippet src/compiled_model.cpp executable_network:map_graph - -> **NOTE**: After all these steps, the backend specific graph is ready to create inference requests and perform inference. - -### `ExecutableNetwork` Constructor Importing from Stream - -This constructor creates a backend specific graph by importing from a stream object: - -> **NOTE**: The export of backend specific graph is done in the `Export` method, and data formats must be the same for both import and export. - -### `Export()` - -The implementation of the method should write all data to the `model` stream, which is required to import a backend specific graph later in the `Plugin::Import` method: - -@snippet src/compiled_model.cpp executable_network:export - -### `CreateInferRequest()` - -The method creates an asynchronous inference request and returns it. While the public Inference Engine API has a single interface for inference request, which can be executed in synchronous and asynchronous modes, a plugin library implementation has two separate classes: - -- [Synchronous inference request](@ref openvino_docs_ie_plugin_dg_infer_request), which defines pipeline stages and runs them synchronously in the `Infer` method. -- [Asynchronous inference request](@ref openvino_docs_ie_plugin_dg_async_infer_request), which is a wrapper for a synchronous inference request and can run a pipeline asynchronously. Depending on a device pipeline structure, it can has one or several stages: - - For single-stage pipelines, there is no need to define this method and create a class derived from InferenceEngine::AsyncInferRequestThreadSafeDefault. For single stage pipelines, a default implementation of this method creates InferenceEngine::AsyncInferRequestThreadSafeDefault wrapping a synchronous inference request and runs it asynchronously in the `_taskExecutor` executor. - - For pipelines with multiple stages, such as performing some preprocessing on host, uploading input data to a device, running inference on a device, or downloading and postprocessing output data, schedule stages on several task executors to achieve better device use and performance. You can do it by creating a sufficient number of inference requests running in parallel. In this case, device stages of different inference requests are overlapped with preprocessing and postprocessing stage giving better performance. - > **IMPORTANT**: It is up to you to decide how many task executors you need to optimally execute a device pipeline. - -@snippet src/compiled_model.cpp executable_network:create_infer_request - -### `GetMetric()` - -Returns a metric value for a metric with the name `name`. A metric is a static type of information about an executable network. Examples of metrics: - -- EXEC_NETWORK_METRIC_KEY(NETWORK_NAME) - name of an executable network -- EXEC_NETWORK_METRIC_KEY(OPTIMAL_NUMBER_OF_INFER_REQUESTS) - heuristic to denote an optimal (or at least sub-optimal) number of inference requests needed to run asynchronously to use the current device fully -- Any other executable network metric specific for a particular device. Such metrics and possible values must be declared in a plugin configuration public header, for example, `template/config.hpp` - -The IE_SET_METRIC_RETURN helper macro sets metric value and checks that the actual metric type matches a type of the specified value. - -### `GetConfig()` - -Returns a current value for a configuration key with the name `name`. The method extracts configuration values an executable network is compiled with. - -@snippet src/compiled_model.cpp executable_network:get_config - -This function is the only way to get configuration values when a network is imported and compiled by other developers and tools (for example, the [Compile tool](@ref openvino_inference_engine_tools_compile_tool_README). - -The next step in plugin library implementation is the [Synchronous Inference Request](@ref openvino_docs_ie_plugin_dg_infer_request) class. diff --git a/docs/IE_PLUGIN_DG/InferRequest.md b/docs/IE_PLUGIN_DG/InferRequest.md index db03bb3b06d54b..b40f23221fe90f 100644 --- a/docs/IE_PLUGIN_DG/InferRequest.md +++ b/docs/IE_PLUGIN_DG/InferRequest.md @@ -1,83 +1,84 @@ -# Synchronous Inference Request {#openvino_docs_ie_plugin_dg_infer_request} +# Synchronous Inference Request {#openvino_docs_ov_plugin_dg_infer_request} `InferRequest` class functionality: -- Allocate input and output blobs needed for a backend-dependent network inference. -- Define functions for inference process stages (for example, `preprocess`, `upload`, `infer`, `download`, `postprocess`). These functions can later be used to define an execution pipeline during [Asynchronous Inference Request](@ref openvino_docs_ie_plugin_dg_async_infer_request) implementation. +- Allocate input and output tensors needed for a backend-dependent network inference. +- Define functions for inference process stages (for example, `preprocess`, `upload`, `infer`, `download`, `postprocess`). These functions can later be used to define an execution pipeline during [Asynchronous Inference Request](@ref openvino_docs_ov_plugin_dg_async_infer_request) implementation. - Call inference stages one by one synchronously. -`InferRequest` Class +InferRequest Class ------------------------ -Inference Engine Plugin API provides the helper InferenceEngine::IInferRequestInternal class recommended -to use as a base class for a synchronous inference request implementation. Based of that, a declaration +OpenVINO Plugin API provides the interface ov::ISyncInferRequest which should be +used as a base class for a synchronous inference request implementation. Based of that, a declaration of a synchronous request class can look as follows: @snippet src/sync_infer_request.hpp infer_request:header -#### Class Fields +### Class Fields The example class has several fields: -- `_executableNetwork` - reference to an executable network instance. From this reference, an inference request instance can take a task executor, use counter for a number of created inference requests, and so on. -- `_profilingTask` - array of the `std::array` type. Defines names for pipeline stages. Used to profile an inference pipeline execution with the Intel® instrumentation and tracing technology (ITT). -- `_durations` - array of durations of each pipeline stage. -- `_networkInputBlobs` - input blob map. -- `_networkOutputBlobs` - output blob map. -- `_parameters` - `ngraph::Function` parameter operations. -- `_results` - `ngraph::Function` result operations. +- `m_profiling_task` - array of the `std::array` type. Defines names for pipeline stages. Used to profile an inference pipeline execution with the Intel® instrumentation and tracing technology (ITT). +- `m_durations` - array of durations of each pipeline stage. - backend specific fields: - - `_inputTensors` - inputs tensors which wrap `_networkInputBlobs` blobs. They are used as inputs to backend `_executable` computational graph. - - `_outputTensors` - output tensors which wrap `_networkOutputBlobs` blobs. They are used as outputs from backend `_executable` computational graph. - - `_executable` - an executable object / backend computational graph. + - `m_backend_input_tensors` - input backend tensors. + - `m_backend_output_tensors` - output backend tensors. + - `m_executable` - an executable object / backend computational graph. -### `InferRequest` Constructor +### InferRequest Constructor -The constructor initializes helper fields and calls methods which allocate blobs: +The constructor initializes helper fields and calls methods which allocate tensors: @snippet src/sync_infer_request.cpp infer_request:ctor -> **NOTE**: Call InferenceEngine::CNNNetwork::getInputsInfo and InferenceEngine::CNNNetwork::getOutputsInfo to specify both layout and precision of blobs, which you can set with InferenceEngine::InferRequest::SetBlob and get with InferenceEngine::InferRequest::GetBlob. A plugin uses these hints to determine its internal layouts and precisions for input and output blobs if needed. +> **NOTE**: Use inputs/outputs information from the compiled model to understand shape and element type of tensors, which you can set with ov::InferRequest::set_tensor and get with ov::InferRequest::get_tensor. A plugin uses these hints to determine its internal layouts and element types for input and output tensors if needed. -### `~InferRequest` Destructor +### ~InferRequest Destructor -Decrements a number of created inference requests: +Destructor can contain plugin specific logic to finish and destroy infer request. @snippet src/sync_infer_request.cpp infer_request:dtor -### `InferImpl()` +### set_tensors_impl() -**Implementation details:** Base IInferRequestInternal class implements the public InferenceEngine::IInferRequestInternal::Infer method as following: -- Checks blobs set by users -- Calls the `InferImpl` method defined in a derived class to call actual pipeline stages synchronously +The method allows to set batched tensors in case if the plugin supports it. -@snippet src/sync_infer_request.cpp infer_request:infer_impl +@snippet src/sync_infer_request.cpp infer_request:set_tensors_impl -#### 1. `inferPreprocess` +### query_state() -Below is the code of the `inferPreprocess` method to demonstrate Inference Engine common preprocessing step handling: +The method returns variable states from the model. -@snippet src/sync_infer_request.cpp infer_request:infer_preprocess +@snippet src/sync_infer_request.cpp infer_request:query_state + +### infer() + +The method calls actual pipeline stages synchronously. Inside the method plugin should check input/output tensors, move external tensors to backend and run the inference. + +@snippet src/sync_infer_request.cpp infer_request:infer -**Details:** -* `InferImpl` must call the InferenceEngine::IInferRequestInternal::execDataPreprocessing function, which executes common Inference Engine preprocessing step (for example, applies resize or color conversion operations) if it is set by the user. The output dimensions, layout and precision matches the input information set via InferenceEngine::CNNNetwork::getInputsInfo. -* If `inputBlob` passed by user differs in terms of precisions from precision expected by plugin, `blobCopy` is performed which does actual precision conversion. +#### 1. infer_preprocess() + +Below is the code of the `infer_preprocess()` method. The method checks user input/output tensors and demonstrates conversion from user tensor to backend specific representation: + +@snippet src/sync_infer_request.cpp infer_request:infer_preprocess -#### 2. `startPipeline` +#### 2. start_pipeline() -Executes a pipeline synchronously using `_executable` object: +Executes a pipeline synchronously using `m_executable` object: @snippet src/sync_infer_request.cpp infer_request:start_pipeline -#### 3. `inferPostprocess` +#### 3. infer_postprocess() -Converts output blobs if precisions of backend output blobs and blobs passed by user are different: +Converts backend specific tensors to tensors passed by user: @snippet src/sync_infer_request.cpp infer_request:infer_postprocess -### `GetPerformanceCounts()` +### get_profiling_info() -The method sets performance counters which were measured during pipeline stages execution: +The method returns the profiling info which was measured during pipeline stages execution: -@snippet src/sync_infer_request.cpp infer_request:get_performance_counts +@snippet src/sync_infer_request.cpp infer_request:get_profiling_info -The next step in the plugin library implementation is the [Asynchronous Inference Request](@ref openvino_docs_ie_plugin_dg_async_infer_request) class. +The next step in the plugin library implementation is the [Asynchronous Inference Request](@ref openvino_docs_ov_plugin_dg_async_infer_request) class. diff --git a/docs/IE_PLUGIN_DG/Intro.md b/docs/IE_PLUGIN_DG/Intro.md index 88fff85de700ad..ed3d101ea4a6bc 100644 --- a/docs/IE_PLUGIN_DG/Intro.md +++ b/docs/IE_PLUGIN_DG/Intro.md @@ -7,12 +7,14 @@ :caption: Converting and Preparing Models :hidden: - Implement Plugin Functionality - Implement Executable Network Functionality - Implement Synchronous Inference Request - Implement Asynchronous Inference Request - openvino_docs_ie_plugin_dg_plugin_build - openvino_docs_ie_plugin_dg_plugin_testing + Implement Plugin Functionality + Implement Compiled Model Functionality + Implement Synchronous Inference Request + Implement Asynchronous Inference Request + Implement Remote Context + Implement Remote Tensor + openvino_docs_ov_plugin_dg_plugin_build + openvino_docs_ov_plugin_dg_plugin_testing openvino_docs_ie_plugin_detailed_guides openvino_docs_ie_plugin_api_references @@ -27,24 +29,26 @@ OpenVINO Plugin Library OpenVINO plugin dynamic library consists of several main components: -1. [Plugin class](@ref openvino_docs_ie_plugin_dg_plugin): - - Provides information about devices of a specific type. - - Can create an [compiled model](@ref openvino_docs_ie_plugin_dg_executable_network) instance which represents a Neural - Network backend specific graph structure for a particular device in opposite to the ov::Model - which is backend-independent. - - Can import an already compiled graph structure from an input stream to an - [compiled model](@ref openvino_docs_ie_plugin_dg_executable_network) object. -2. [Compiled Modek class](@ref openvino_docs_ie_plugin_dg_executable_network): - - Is an execution configuration compiled for a particular device and takes into account its capabilities. - - Holds a reference to a particular device and a task executor for this device. - - Can create several instances of [Inference Request](@ref openvino_docs_ie_plugin_dg_infer_request). - - Can export an internal backend specific graph structure to an output stream. -3. [Inference Request class](@ref openvino_docs_ie_plugin_dg_infer_request): +1. [Plugin class](@ref openvino_docs_ov_plugin_dg_plugin): + - Provides information about devices of a specific type. + - Can create an [compiled model](@ref openvino_docs_ov_plugin_dg_compiled_model) instance which represents a Neural Network backend specific graph structure for a particular device in opposite to the ov::Model + which is backend-independent. + - Can import an already compiled graph structure from an input stream to an + [compiled model](@ref openvino_docs_ov_plugin_dg_compiled_model) object. +2. [Compiled Model class](@ref openvino_docs_ov_plugin_dg_compiled_model): + - Is an execution configuration compiled for a particular device and takes into account its capabilities. + - Holds a reference to a particular device and a task executor for this device. + - Can create several instances of [Inference Request](@ref openvino_docs_ov_plugin_dg_infer_request). + - Can export an internal backend specific graph structure to an output stream. +3. [Inference Request class](@ref openvino_docs_ov_plugin_dg_infer_request): - Runs an inference pipeline serially. - Can extract performance counters for an inference pipeline execution profiling. -4. [Asynchronous Inference Request class](@ref openvino_docs_ie_plugin_dg_async_infer_request): - - Wraps the [Inference Request](@ref openvino_docs_ie_plugin_dg_infer_request) class and runs pipeline stages in parallel - on several task executors based on a device-specific pipeline structure. +4. [Asynchronous Inference Request class](@ref openvino_docs_ov_plugin_dg_async_infer_request): + - Wraps the [Inference Request](@ref openvino_docs_ov_plugin_dg_infer_request) class and runs pipeline stages in parallel on several task executors based on a device-specific pipeline structure. +5. [Remote Context](@ref openvino_docs_ov_plugin_dg_remote_context): + - Provides the device specific remote context. Context allows to create remote tensors. +6. [Remote Tensor](@ref openvino_docs_ov_plugin_dg_remote_tensor) + - Provides the device specific remote tensor API and implementation. > **NOTE**: This documentation is written based on the `Template` plugin, which demonstrates plugin @@ -55,9 +59,9 @@ at `/src/plugins/template`. Detailed guides ----------------------- -* [Build](@ref openvino_docs_ie_plugin_dg_plugin_build) a plugin library using CMake -* Plugin and its components [testing](@ref openvino_docs_ie_plugin_dg_plugin_testing) -* [Quantized networks](@ref openvino_docs_ie_plugin_dg_quantized_networks) +* [Build](@ref openvino_docs_ov_plugin_dg_plugin_build) a plugin library using CMake +* Plugin and its components [testing](@ref openvino_docs_ov_plugin_dg_plugin_testing) +* [Quantized networks](@ref openvino_docs_ov_plugin_dg_quantized_models) * [Low precision transformations](@ref openvino_docs_OV_UG_lpt) guide * [Writing OpenVINO™ transformations](@ref openvino_docs_transformations) guide diff --git a/docs/IE_PLUGIN_DG/Plugin.md b/docs/IE_PLUGIN_DG/Plugin.md index e3b524fbae70ec..96326fabcb574a 100644 --- a/docs/IE_PLUGIN_DG/Plugin.md +++ b/docs/IE_PLUGIN_DG/Plugin.md @@ -1,4 +1,4 @@ -# Plugin {#openvino_docs_ie_plugin_dg_plugin} +# Plugin {#openvino_docs_ov_plugin_dg_plugin} OpenVINO Plugin usually represents a wrapper around a backend. Backends can be: - OpenCL-like backend (e.g. clDNN library) for GPU devices. @@ -8,7 +8,7 @@ OpenVINO Plugin usually represents a wrapper around a backend. Backends can be: The responsibility of OpenVINO Plugin: - Initializes a backend and throw exception in `Engine` constructor if backend cannot be initialized. - Provides information about devices enabled by a particular backend, e.g. how many devices, their properties and so on. -- Loads or imports [compiled model](@ref openvino_docs_ie_plugin_dg_executable_network) objects. +- Loads or imports [compiled model](@ref openvino_docs_ov_plugin_dg_compiled_model) objects. In addition to the OpenVINO Public API, the OpenVINO provides the Plugin API, which is a set of functions and helper classes that simplify new plugin development: @@ -16,7 +16,7 @@ In addition to the OpenVINO Public API, the OpenVINO provides the Plugin API, wh - implementations in the `src/inference/src/dev/` directory - symbols in the OpenVINO shared library -To build an OpenVINO plugin with the Plugin API, see the [OpenVINO Plugin Building](@ref openvino_docs_ie_plugin_dg_plugin_build) guide. +To build an OpenVINO plugin with the Plugin API, see the [OpenVINO Plugin Building](@ref openvino_docs_ov_plugin_dg_plugin_build) guide. Plugin Class ------------------------ @@ -39,7 +39,7 @@ The provided plugin class also has several fields: As an example, a plugin configuration has three value parameters: - `device_id` - particular device ID to work with. Applicable if a plugin supports more than one `Template` device. In this case, some plugin methods, like `set_property`, `query_model`, and `compile_model`, must support the ov::device::id property. -- `perf_counts` - boolean value to identify whether to collect performance counters during [Inference Request](@ref openvino_docs_ie_plugin_dg_infer_request) execution. +- `perf_counts` - boolean value to identify whether to collect performance counters during [Inference Request](@ref openvino_docs_ov_plugin_dg_infer_request) execution. - `streams_executor_config` - configuration of `ov::threading::IStreamsExecutor` to handle settings of multi-threaded context. - `performance_mode` - configuration of `ov::hint::PerformanceMode` to set the performance mode. @@ -75,7 +75,7 @@ which holds a backend-dependent compiled model in an internal representation: Before a creation of an `CompiledModel` instance via a constructor, a plugin may check if a provided ov::Model object is supported by a device if it is needed. -Actual model compilation is done in the `CompiledModel` constructor. Refer to the [CompiledModel Implementation Guide](@ref openvino_docs_ie_plugin_dg_executable_network) for details. +Actual model compilation is done in the `CompiledModel` constructor. Refer to the [CompiledModel Implementation Guide](@ref openvino_docs_ov_plugin_dg_compiled_model) for details. > **NOTE**: Actual configuration map used in `CompiledModel` is constructed as a base plugin > configuration set via `Plugin::set_property`, where some values are overwritten with `config` passed to `Plugin::compile_model`. @@ -85,7 +85,7 @@ Actual model compilation is done in the `CompiledModel` constructor. Refer to th The function accepts a const shared pointer to `ov::Model` object and applies common and device-specific transformations on a copied model to make it more friendly to hardware operations. For details how to write custom device-specific transformation, please, refer to [Writing OpenVINO™ transformations](@ref openvino_docs_transformations) guide. See detailed topics about model representation: * [Intermediate Representation and Operation Sets](@ref openvino_docs_MO_DG_IR_and_opsets) - * [Quantized models](@ref openvino_docs_ie_plugin_dg_quantized_networks). + * [Quantized models](@ref openvino_docs_ov_plugin_dg_quantized_models). @snippet template/src/plugin.cpp plugin:transform_model @@ -130,7 +130,7 @@ key value to the ov::Any and returns it. ### import_model() The importing of compiled model mechanism allows to import a previously exported backend specific model and wrap it -using an [CompiledModel](@ref openvino_docs_ie_plugin_dg_executable_network) object. This functionality is useful if +using an [CompiledModel](@ref openvino_docs_ov_plugin_dg_compiled_model) object. This functionality is useful if backend specific model compilation takes significant time and/or cannot be done on a target host device due to other reasons. @@ -167,4 +167,4 @@ OpenVINO plugin library must export only one function creating a plugin instance @snippet template/src/plugin.cpp plugin:create_plugin_engine -Next step in a plugin library implementation is the [CompiledModel](@ref openvino_docs_ie_plugin_dg_executable_network) class. +Next step in a plugin library implementation is the [CompiledModel](@ref openvino_docs_ov_plugin_dg_compiled_model) class. diff --git a/docs/IE_PLUGIN_DG/PluginTesting.md b/docs/IE_PLUGIN_DG/PluginTesting.md index f8458ae5171084..ca19d5ea2fbbae 100644 --- a/docs/IE_PLUGIN_DG/PluginTesting.md +++ b/docs/IE_PLUGIN_DG/PluginTesting.md @@ -1,14 +1,14 @@ -# Plugin Testing {#openvino_docs_ie_plugin_dg_plugin_testing} +# Plugin Testing {#openvino_docs_ov_plugin_dg_plugin_testing} OpenVINO tests infrastructure provides a predefined set of functional tests and utilities. They are used to verify a plugin using the OpenVINO public API. All the tests are written in the [Google Test C++ framework](https://github.com/google/googletest). OpenVINO Plugin tests are included in the `openvino::funcSharedTests` CMake target which is built within the OpenVINO repository -(see [Build Plugin Using CMake](@ref openvino_docs_ie_plugin_dg_plugin_build) guide). This library contains tests definitions (the tests bodies) which can be parametrized and instantiated in plugins depending on whether a plugin supports a particular feature, specific sets of parameters for test on supported operation set and so on. +(see [Build Plugin Using CMake](@ref openvino_docs_ov_plugin_dg_plugin_build) guide). This library contains tests definitions (the tests bodies) which can be parametrized and instantiated in plugins depending on whether a plugin supports a particular feature, specific sets of parameters for test on supported operation set and so on. Test definitions are split into tests class declaration (see `src/tests/functional/plugin/shared/include`) and tests class implementation (see `src/tests/functional/plugin/shared/src`) and include the following scopes of plugin conformance tests: -1. **Behavior tests** (`behavior` sub-folder), which are a separate test group to check that a plugin satisfies basic OpenVINO concepts: plugin creation, multiple executable networks support, multiple synchronous and asynchronous inference requests support, and so on. See the next section with details how to instantiate the tests definition class with plugin-specific parameters. +1. **Behavior tests** (`behavior` sub-folder), which are a separate test group to check that a plugin satisfies basic OpenVINO concepts: plugin creation, multiple compiled models support, multiple synchronous and asynchronous inference requests support, and so on. See the next section with details how to instantiate the tests definition class with plugin-specific parameters. 2. **Single layer tests** (`single_layer_tests` sub-folder). This groups of tests checks that a particular single layer can be inferenced on a device. An example of test instantiation based on test definition from `openvino::funcSharedTests` library: @@ -35,7 +35,7 @@ To use these tests for your own plugin development, link the `openvino::funcShar > **NOTE**: A plugin may contain its own tests for use cases that are specific to hardware or need to be extensively tested. To build test binaries together with other build artifacts, use the `make all` command. For details, see -[Build Plugin Using CMake*](@ref openvino_docs_ie_plugin_dg_plugin_build). +[Build Plugin Using CMake*](@ref openvino_docs_ov_plugin_dg_plugin_build). ### How to Extend OpenVINO Plugin Tests diff --git a/docs/IE_PLUGIN_DG/QuantizedNetworks.md b/docs/IE_PLUGIN_DG/QuantizedNetworks.md index 57deb94281de05..f3c712e2f618b5 100644 --- a/docs/IE_PLUGIN_DG/QuantizedNetworks.md +++ b/docs/IE_PLUGIN_DG/QuantizedNetworks.md @@ -1,8 +1,8 @@ -# Quantized networks compute and restrictions {#openvino_docs_ie_plugin_dg_quantized_networks} +# Quantized models compute and restrictions {#openvino_docs_ov_plugin_dg_quantized_models} -One of the feature of Inference Engine is the support of quantized networks with different precisions: INT8, INT4, etc. +One of the feature of OpenVINO is the support of quantized models with different precisions: INT8, INT4, etc. However, it is up to the plugin to define what exact precisions are supported by the particular HW. -All quantized networks which can be expressed in IR have a unified representation by means of *FakeQuantize* operation. +All quantized models which can be expressed in IR have a unified representation by means of *FakeQuantize* operation. For more details about low-precision model representation please refer to this [document](@ref openvino_docs_ie_plugin_dg_lp_representation). ### Interpreting FakeQuantize at runtime @@ -44,6 +44,6 @@ Below we define these rules as follows: - Per-channel quantization of activations for channel-wise and element-wise operations, e.g. Depthwise Convolution, Eltwise Add/Mul, ScaleShift. - Symmetric and asymmetric quantization of weights and activations with the support of per-channel scales and zero-points. - Non-unified quantization parameters for Eltwise and Concat operations. -- Non-quantized network output, i.e. there are no quantization parameters for it. +- Non-quantized models output, i.e. there are no quantization parameters for it. [qdq_propagation]: images/qdq_propagation.png diff --git a/docs/IE_PLUGIN_DG/RemoteContext.md b/docs/IE_PLUGIN_DG/RemoteContext.md new file mode 100644 index 00000000000000..24d4db410ba6fd --- /dev/null +++ b/docs/IE_PLUGIN_DG/RemoteContext.md @@ -0,0 +1,49 @@ +# Remote Context {#openvino_docs_ov_plugin_dg_remote_context} + +ov::RemoteContext class functionality: +- Represents device specific inference context. +- Allows to create remote device specific tensor. + +> **NOTE**: If plugin provides a public API for own Remote Context, the API should be header only and doesn't depend on the plugin library. + + +RemoteContext Class +------------------------ + +OpenVINO Plugin API provides the interface ov::IRemoteContext which should be used as a base class for a plugin specific remote context. Based on that, a declaration of an compiled model class can look as follows: + +@snippet src/remote_context.hpp remote_context:header + +### Class Fields + +The example class has several fields: + +- `m_name` - Device name. +- `m_property` - Device specific context properties. It can be used to cast RemoteContext to device specific type. + +### RemoteContext Constructor + +This constructor should initialize the remote context device name and properties. + +@snippet src/remote_context.cpp remote_context:ctor + +### get_device_name() + +The function returns the device name from the remote context. + +@snippet src/remote_context.cpp remote_context:get_device_name + +### get_property() + +The implementation returns the remote context properties. + +@snippet src/remote_context.cpp remote_context:get_property + + +### create_tensor() + +The method creates device specific remote tensor. + +@snippet src/remote_context.cpp remote_context:create_tensor + +The next step to support device specific tensors is a creation of device specific [Remote Tensor](@ref openvino_docs_ov_plugin_dg_remote_tensor) class. diff --git a/docs/IE_PLUGIN_DG/RemoteTensor.md b/docs/IE_PLUGIN_DG/RemoteTensor.md new file mode 100644 index 00000000000000..b708b0d918e1ea --- /dev/null +++ b/docs/IE_PLUGIN_DG/RemoteTensor.md @@ -0,0 +1,87 @@ +# Remote Tensor {#openvino_docs_ov_plugin_dg_remote_tensor} + +ov::RemoteTensor class functionality: +- Provide an interface to work with device specific memory. + +> **NOTE**: If plugin provides a public API for own Remote Tensor, the API should be header only and doesn't depend on the plugin library. + + +Device Specific Remote Tensor Public API +------------------------------------------ + +The public interface to work with device specific remote tensors should have header only implementation and doesn't depend on the plugin library. + +@snippet include/template/remote_tensor.hpp remote_tensor:public_header + +The implementation below has several methods: + +### type_check() + +Static method is used to understand that some abstract remote tensor can be casted to this particular remote tensor type. + +### get_data() + +The set of methods (specific for the example, other implementation can have another API) which are helpers to get an access to remote data. + +Device Specific Internal tensor implementation +----------------------------------------------- + +The plugin should have the internal implementation of remote tensor which can communicate with public API. +The example contains the implementation of remote tensor which wraps memory from stl vector. + +OpenVINO Plugin API provides the interface ov::IRemoteTensor which should be used as a base class for remote tensors. + +The example implementation have two remote tensor classes: + + - Internal type dependent implementation which has as an template argument the vector type and create the type specific tensor. + - The type independent implementation which works with type dependent tensor inside. + +Based on that, an implementation of a type independent remote tensor class can look as follows: + +@snippet src/remote_context.cpp vector_impl:implementation + +The implementation provides a helper to get wrapped stl tensor and overrides all important methods of ov::IRemoteTensor class and recall the type dependent implementation. + +The type dependent remote tensor has the next implementation: + +@snippet src/remote_context.cpp vector_impl_t:implementation + +### Class Fields + +The class has several fields: + +- `m_element_type` - Tensor element type. +- `m_shape` - Tensor shape. +- `m_strides` - Tensor strides. +- `m_data` - Wrapped vector. +- `m_dev_name` - Device name. +- `m_properties` - Remote tensor specific properties which can be used to detect the type of the remote tensor. + +### VectorTensorImpl() + +The constructor of remote tensor implementation. Creates a vector with data, initialize device name and properties, updates shape, element type and strides. + + +### get_element_type() + +The method returns tensor element type. + +### get_shape() + +The method returns tensor shape. + +### get_strides() + +The method returns tensor strides. + +### set_shape() + +The method allows to set new shapes for the remote tensor. + +### get_properties() + +The method returns tensor specific properties. + +### get_device_name() + +The method returns tensor specific device name. diff --git a/docs/IE_PLUGIN_DG/detailed_guides.md b/docs/IE_PLUGIN_DG/detailed_guides.md index 934c53cc1e2ca5..2076afb0d94447 100644 --- a/docs/IE_PLUGIN_DG/detailed_guides.md +++ b/docs/IE_PLUGIN_DG/detailed_guides.md @@ -6,13 +6,13 @@ :maxdepth: 1 :hidden: - openvino_docs_ie_plugin_dg_quantized_networks + openvino_docs_ov_plugin_dg_quantized_models openvino_docs_OV_UG_lpt @endsphinxdirective The guides below provides extra information about specific features of OpenVINO needed for understanding during OpenVINO plugin development: -* [Quantized networks](@ref openvino_docs_ie_plugin_dg_quantized_networks) +* [Quantized networks](@ref openvino_docs_ov_plugin_dg_quantized_models) * [Low precision transformations](@ref openvino_docs_OV_UG_lpt) guide * [Writing OpenVINO™ transformations](@ref openvino_docs_transformations) guide diff --git a/docs/IE_PLUGIN_DG/layout.xml b/docs/IE_PLUGIN_DG/layout.xml index 1f4120d195f4f9..44137896ee794b 100644 --- a/docs/IE_PLUGIN_DG/layout.xml +++ b/docs/IE_PLUGIN_DG/layout.xml @@ -4,7 +4,7 @@ - + @@ -79,6 +79,8 @@ + + diff --git a/docs/MO_DG/prepare_model/Getting_performance_numbers.md b/docs/MO_DG/prepare_model/Getting_performance_numbers.md index fda387a95a31dd..567706a69e735f 100644 --- a/docs/MO_DG/prepare_model/Getting_performance_numbers.md +++ b/docs/MO_DG/prepare_model/Getting_performance_numbers.md @@ -1,109 +1,168 @@ # Getting Performance Numbers {#openvino_docs_MO_DG_Getting_Performance_Numbers} -This guide explains how to use the benchmark_app to get performance numbers. It also explains how the performance numbers are reflected through internal inference performance counters and execution graphs. It also includes information on using ITT and Intel® VTune™ Profiler to get performance insights. -## Test performance with the benchmark_app +@sphinxdirective -### Prerequisites +This guide explains how to use the benchmark_app to get performance numbers. It also explains how the performance +numbers are reflected through internal inference performance counters and execution graphs. It also includes +information on using ITT and Intel® VTune™ Profiler to get performance insights. -To run benchmarks, you need both OpenVINO developer tools and Runtime installed. Follow the [Installation guide](../../install_guides/installing-model-dev-tools.md) and make sure to install the latest general release package with support for frameworks of the models you want to test. +Test performance with the benchmark_app +########################################################### -To test performance of your model, make sure you [prepare the model for use with OpenVINO](../../Documentation/model_introduction.md). For example, if you use [OpenVINO's automation tools](@ref omz_tools_downloader), these two lines of code will download the resnet-50-tf and convert it to OpenVINO IR. +Prerequisites ++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++ + +To run benchmarks, you need both OpenVINO developer tools and Runtime installed. Follow the +:doc:`Installation guide ` and make sure to install the latest +general release package with support for frameworks of the models you want to test. + +To test performance of your model, make sure you :doc:`prepare the model for use with OpenVINO `. +For example, if you use :doc:`OpenVINO's automation tools `, these two lines of code will download the +resnet-50-tf and convert it to OpenVINO IR. + +.. code-block:: bash -```bash omz_downloader --name resnet-50-tf omz_converter --name resnet-50-tf -``` -### Running the benchmark application -For a detailed description, see the dedicated articles: [benchmark_app for C++](../../../samples/cpp/benchmark_app/README.md) and [benchmark_app for Python](../../../tools/benchmark_tool/README.md). +Running the benchmark application ++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++ + +For a detailed description, see the dedicated articles: +:doc:`benchmark_app for C++ ` and +:doc:`benchmark_app for Python `. The benchmark_app includes a lot of device-specific options, but the primary usage is as simple as: -```bash +.. code-block:: bash + benchmark_app -m -d -i -``` -Each of the [OpenVINO supported devices](../../OV_Runtime_UG/supported_plugins/Supported_Devices.md) offers performance settings that contain command-line equivalents in the Benchmark app. -While these settings provide really low-level control for the optimal model performance on the _specific_ device, it is recommended to always start performance evaluation with the [OpenVINO High-Level Performance Hints](../../OV_Runtime_UG/performance_hints.md) first, like so: +Each of the :doc:`OpenVINO supported devices ` offers +performance settings that contain command-line equivalents in the Benchmark app. + +While these settings provide really low-level control for the optimal model performance on the *specific* device, +it is recommended to always start performance evaluation with the :doc:`OpenVINO High-Level Performance Hints ` first, like so: + +.. code-block:: bash -```bash # for throughput prioritization benchmark_app -hint tput -m -d # for latency prioritization benchmark_app -hint latency -m -d -``` -## Additional benchmarking considerations -### 1 - Select a Proper Set of Operations to Measure +Additional benchmarking considerations +########################################################### + +1 - Select a Proper Set of Operations to Measure ++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++ When evaluating performance of a model with OpenVINO Runtime, it is required to measure a proper set of operations. - Avoid including one-time costs such as model loading. - Track operations that occur outside OpenVINO Runtime (such as video decoding) separately. -> **NOTE**: Some image pre-processing can be baked into OpenVINO IR and accelerated accordingly. For more information, refer to [Embedding the Pre-processing](Additional_Optimizations.md) and [General Runtime Optimizations](../../optimization_guide/dldt_deployment_optimization_common.md). -### 2 - Try to Get Credible Data +.. note:: + + Some image pre-processing can be baked into OpenVINO IR and accelerated accordingly. For more information, + refer to :doc:`Embedding Pre-processing ` and + :doc:`General Runtime Optimizations `. + -Performance conclusions should be build upon reproducible data. As for the performance measurements, they should be done with a large number of invocations of the same routine. Since the first iteration is almost always significantly slower than the subsequent ones, an aggregated value can be used for the execution time for final projections: +2 - Try to Get Credible Data ++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++ -- If the warm-up run does not help or execution time still varies, you can try running a large number of iterations and then average or find a mean of the results. -- If the time values range too much, consider geomean. -- Be aware of the throttling and other power oddities. A device can exist in one of several different power states. When optimizing your model, consider fixing the device frequency for better performance data reproducibility. However, the end-to-end (application) benchmarking should also be performed under real operational conditions. +Performance conclusions should be build upon reproducible data. As for the performance measurements, they should +be done with a large number of invocations of the same routine. Since the first iteration is almost always significantly +slower than the subsequent ones, an aggregated value can be used for the execution time for final projections: +- If the warm-up run does not help or execution time still varies, you can try running a large number of iterations + and then average or find a mean of the results. +- If the time values range too much, consider geomean. +- Be aware of the throttling and other power oddities. A device can exist in one of several different power states. + When optimizing your model, consider fixing the device frequency for better performance data reproducibility. + However, the end-to-end (application) benchmarking should also be performed under real operational conditions. -### 3 - Compare Performance with Native/Framework Code + +3 - Compare Performance with Native/Framework Code ++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++ When comparing the OpenVINO Runtime performance with the framework or another reference code, make sure that both versions are as similar as possible: -- Wrap the exact inference execution (refer to the [Benchmark app](../../../samples/cpp/benchmark_app/README.md) for examples). +- Wrap the exact inference execution (for examples, see :doc:`Benchmark app `). - Do not include model loading time. - Ensure that the inputs are identical for OpenVINO Runtime and the framework. For example, watch out for random values that can be used to populate the inputs. -- In situations when any user-side pre-processing should be tracked separately, consider [image pre-processing and conversion](../../OV_Runtime_UG/preprocessing_overview.md). -- When applicable, leverage the [Dynamic Shapes support](../../OV_Runtime_UG/ov_dynamic_shapes.md). -- If possible, demand the same accuracy. For example, TensorFlow allows `FP16` execution, so when comparing to that, make sure to test the OpenVINO Runtime with the `FP16` as well. +- In situations when any user-side pre-processing should be tracked separately, consider :doc:`image pre-processing and conversion `. +- When applicable, leverage the :doc:`Dynamic Shapes support `. +- If possible, demand the same accuracy. For example, TensorFlow allows ``FP16`` execution, so when comparing to that, make sure to test the OpenVINO Runtime with the ``FP16`` as well. -### Internal Inference Performance Counters and Execution Graphs +Internal Inference Performance Counters and Execution Graphs ++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++ More detailed insights into inference performance breakdown can be achieved with device-specific performance counters and/or execution graphs. -Both [C++](../../../samples/cpp/benchmark_app/README.md) and [Python](../../../tools/benchmark_tool/README.md) versions of the `benchmark_app` support a `-pc` command-line parameter that outputs internal execution breakdown. - -For example, the table shown below is part of performance counters for quantized [TensorFlow implementation of ResNet-50](https://github.com/openvinotoolkit/open_model_zoo/tree/master/models/public/resnet-50-tf) model inference on [CPU Plugin](../../OV_Runtime_UG/supported_plugins/CPU.md). -Keep in mind that since the device is CPU, the `realTime` wall clock and the `cpu` time layers are the same. Information about layer precision is also stored in the performance counters. - -| layerName | execStatus | layerType | execType | realTime (ms) | cpuTime (ms) | -| --------------------------------------------------------- | ---------- | ------------ | -------------------- | ------------- | ------------ | -| resnet\_model/batch\_normalization\_15/FusedBatchNorm/Add | EXECUTED | Convolution | jit\_avx512\_1x1\_I8 | 0.377 | 0.377 | -| resnet\_model/conv2d\_16/Conv2D/fq\_input\_0 | NOT\_RUN | FakeQuantize | undef | 0 | 0 | -| resnet\_model/batch\_normalization\_16/FusedBatchNorm/Add | EXECUTED | Convolution | jit\_avx512\_I8 | 0.499 | 0.499 | -| resnet\_model/conv2d\_17/Conv2D/fq\_input\_0 | NOT\_RUN | FakeQuantize | undef | 0 | 0 | -| resnet\_model/batch\_normalization\_17/FusedBatchNorm/Add | EXECUTED | Convolution | jit\_avx512\_1x1\_I8 | 0.399 | 0.399 | -| resnet\_model/add\_4/fq\_input\_0 | NOT\_RUN | FakeQuantize | undef | 0 | 0 | -| resnet\_model/add\_4 | NOT\_RUN | Eltwise | undef | 0 | 0 | -| resnet\_model/add\_5/fq\_input\_1 | NOT\_RUN | FakeQuantize | undef | 0 | 0 | - - - The `exeStatus` column of the table includes the following possible values: - - `EXECUTED` - the layer was executed by standalone primitive. - - `NOT_RUN` - the layer was not executed by standalone primitive or was fused with another operation and executed in another layer primitive. - - The `execType` column of the table includes inference primitives with specific suffixes. The layers could have the following marks: - * The `I8` suffix is for layers that had 8-bit data type input and were computed in 8-bit precision. - * The `FP32` suffix is for layers computed in 32-bit precision. - - All `Convolution` layers are executed in `int8` precision. The rest of the layers are fused into Convolutions using post-operation optimization, as described in [CPU Device](../../OV_Runtime_UG/supported_plugins/CPU.md). - This contains layer names (as seen in OpenVINO IR), type of the layer, and execution statistics. - -Both `benchmark_app` versions also support the `exec_graph_path` command-line option. It requires OpenVINO to output the same execution statistics per layer, but in the form of plugin-specific [Netron-viewable](https://netron.app/) graph to the specified file. - -Especially when performance-debugging the [latency](../../optimization_guide/dldt_deployment_optimization_latency.md), note that the counters do not reflect the time spent in the `plugin/device/driver/etc` queues. If the sum of the counters is too different from the latency of an inference request, consider testing with less inference requests. For example, running single [OpenVINO stream](../../optimization_guide/dldt_deployment_optimization_tput.md) with multiple requests would produce nearly identical counters as running a single inference request, while the actual latency can be quite different. - -Lastly, the performance statistics with both performance counters and execution graphs are averaged, so such data for the [inputs of dynamic shapes](../../OV_Runtime_UG/ov_dynamic_shapes.md) should be measured carefully, preferably by isolating the specific shape and executing multiple times in a loop, to gather the reliable data. +Both :doc:`C++ ` and :doc:`Python ` +versions of the *benchmark_app* support a ``-pc`` command-line parameter that outputs internal execution breakdown. + +For example, the table shown below is part of performance counters for quantized +`TensorFlow implementation of ResNet-50 `__ +model inference on :doc:`CPU Plugin `. +Keep in mind that since the device is CPU, the ``realTime`` wall clock and the ``cpu`` time layers are the same. +Information about layer precision is also stored in the performance counters. + + +=========================================================== ============= ============== ===================== ================= ============== + layerName execStatus layerType execType realTime (ms) cpuTime (ms) +=========================================================== ============= ============== ===================== ================= ============== + resnet\_model/batch\_normalization\_15/FusedBatchNorm/Add EXECUTED Convolution jit\_avx512\_1x1\_I8 0.377 0.377 + resnet\_model/conv2d\_16/Conv2D/fq\_input\_0 NOT\_RUN FakeQuantize undef 0 0 + resnet\_model/batch\_normalization\_16/FusedBatchNorm/Add EXECUTED Convolution jit\_avx512\_I8 0.499 0.499 + resnet\_model/conv2d\_17/Conv2D/fq\_input\_0 NOT\_RUN FakeQuantize undef 0 0 + resnet\_model/batch\_normalization\_17/FusedBatchNorm/Add EXECUTED Convolution jit\_avx512\_1x1\_I8 0.399 0.399 + resnet\_model/add\_4/fq\_input\_0 NOT\_RUN FakeQuantize undef 0 0 + resnet\_model/add\_4 NOT\_RUN Eltwise undef 0 0 + resnet\_model/add\_5/fq\_input\_1 NOT\_RUN FakeQuantize undef 0 0 +=========================================================== ============= ============== ===================== ================= ============== + +| The ``exeStatus`` column of the table includes the following possible values: +| - ``EXECUTED`` - the layer was executed by standalone primitive. +| - ``NOT_RUN`` - the layer was not executed by standalone primitive or was fused with another operation and executed in another layer primitive. +| +| The ``execType`` column of the table includes inference primitives with specific suffixes. The layers could have the following marks: +| - The ``I8`` suffix is for layers that had 8-bit data type input and were computed in 8-bit precision. +| - The ``FP32`` suffix is for layers computed in 32-bit precision. +| +| All ``Convolution`` layers are executed in ``int8`` precision. The rest of the layers are fused into Convolutions using post-operation optimization, + as described in :doc:`CPU Device `. This contains layer names + (as seen in OpenVINO IR), type of the layer, and execution statistics. + + +Both *benchmark_app* versions also support the ``exec_graph_path`` command-line option. It requires OpenVINO to output the same execution +statistics per layer, but in the form of plugin-specific `Netron-viewable `__ graph to the specified file. + +Especially when performance-debugging the :doc:`latency `, note that the counters +do not reflect the time spent in the ``plugin/device/driver/etc`` queues. If the sum of the counters is too different from the latency +of an inference request, consider testing with less inference requests. For example, running single +:doc:`OpenVINO stream ` with multiple requests would produce nearly identical +counters as running a single inference request, while the actual latency can be quite different. + +Lastly, the performance statistics with both performance counters and execution graphs are averaged, +so such data for the :doc:`inputs of dynamic shapes ` should be measured carefully, +preferably by isolating the specific shape and executing multiple times in a loop, to gather reliable data. + +Use ITT to Get Performance Insights ++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++ + +In general, OpenVINO and its individual plugins are heavily instrumented with Intel® Instrumentation and Tracing Technology (ITT). +Therefore, you can also compile OpenVINO from the source code with ITT enabled and use tools like +`Intel® VTune™ Profiler `__ to get detailed inference performance breakdown and additional +insights in the application-level performance on the timeline view. + + +@endsphinxdirective -### Use ITT to Get Performance Insights -In general, OpenVINO and its individual plugins are heavily instrumented with Intel® Instrumentation and Tracing Technology (ITT). Therefore, you can also compile OpenVINO from the source code with ITT enabled and use tools like [Intel® VTune™ Profiler](https://software.intel.com/en-us/vtune) to get detailed inference performance breakdown and additional insights in the application-level performance on the timeline view. diff --git a/docs/OV_Runtime_UG/Model_caching_overview.md b/docs/OV_Runtime_UG/Model_caching_overview.md index fa407853c07234..ab9cfce654a546 100644 --- a/docs/OV_Runtime_UG/Model_caching_overview.md +++ b/docs/OV_Runtime_UG/Model_caching_overview.md @@ -1,16 +1,18 @@ # Model Caching Overview {#openvino_docs_OV_UG_Model_caching_overview} -As described in the [Integrate OpenVINO™ with Your Application](integrate_with_your_application.md), a common application flow consists of the following steps: +@sphinxdirective + +As described in the :doc:`Integrate OpenVINO™ with Your Application `, a common application flow consists of the following steps: 1. **Create a Core object**: First step to manage available devices and read model objects -2. **Read the Intermediate Representation**: Read an Intermediate Representation file into an object of the `ov::Model` +2. **Read the Intermediate Representation**: Read an Intermediate Representation file into an object of the `ov::Model `__ 3. **Prepare inputs and outputs**: If needed, manipulate precision, memory layout, size or color format 4. **Set configuration**: Pass device-specific loading configurations to the device -5. **Compile and Load Network to device**: Use the `ov::Core::compile_model()` method with a specific device +5. **Compile and Load Network to device**: Use the `ov::Core::compile_model() `__ method with a specific device 6. **Set input data**: Specify input tensor @@ -18,14 +20,14 @@ As described in the [Integrate OpenVINO™ with Your Application](integrate_with Step 5 can potentially perform several time-consuming device-specific optimizations and network compilations, and such delays can lead to a bad user experience on application startup. To avoid this, some devices offer -import/export network capability, and it is possible to either use the [Compile tool](../../tools/compile_tool/README.md) +import/export network capability, and it is possible to either use the :doc:`Compile tool ` or enable model caching to export compiled model automatically. Reusing cached model can significantly reduce compile model time. -### Set "cache_dir" config option to enable model caching +Set "cache_dir" config option to enable model caching ++++++++++++++++++++++++++++++++++++++++++++++++++++++ To enable model caching, the application must specify a folder to store cached blobs, which is done like this: -@sphinxdirective .. tab:: C++ @@ -39,23 +41,24 @@ To enable model caching, the application must specify a folder to store cached b :language: python :fragment: [ov:caching:part0] -@endsphinxdirective -With this code, if the device specified by `device_name` supports import/export model capability, a cached blob is automatically created inside the `/path/to/cache/dir` folder. +With this code, if the device specified by ``device_name`` supports import/export model capability, a cached blob is automatically created inside the ``/path/to/cache/dir`` folder. If the device does not support import/export capability, cache is not created and no error is thrown. Depending on your device, total time for compiling model on application startup can be significantly reduced. -Also note that the very first `compile_model` (when cache is not yet created) takes slightly longer time to "export" the compiled blob into a cache file: +Also note that the very first ``compile_model`` (when cache is not yet created) takes slightly longer time to "export" the compiled blob into a cache file: -![](../img/caching_enabled.svg) -### Even faster: use compile_model(modelPath) +.. image:: _static/images/caching_enabled.svg + + +Even faster: use compile_model(modelPath) ++++++++++++++++++++++++++++++++++++++++++ In some cases, applications do not need to customize inputs and outputs every time. Such application always -call `model = core.read_model(...)`, then `core.compile_model(model, ..)` and it can be further optimized. +call ``model = core.read_model(...)``, then ``core.compile_model(model, ..)`` and it can be further optimized. For these cases, there is a more convenient API to compile the model in a single call, skipping the read step: -@sphinxdirective .. tab:: C++ @@ -69,11 +72,9 @@ For these cases, there is a more convenient API to compile the model in a single :language: python :fragment: [ov:caching:part1] -@endsphinxdirective -With model caching enabled, total load time is even smaller, if `read_model` is optimized as well. +With model caching enabled, total load time is even smaller, if ``read_model`` is optimized as well. -@sphinxdirective .. tab:: C++ @@ -87,16 +88,15 @@ With model caching enabled, total load time is even smaller, if `read_model` is :language: python :fragment: [ov:caching:part2] -@endsphinxdirective -![](../img/caching_times.svg) +.. image:: _static/images/caching_times.svg -### Advanced Examples +Advanced Examples +++++++++++++++++++++ Not every device supports network import/export capability. For those that don't, enabling caching has no effect. To check in advance if a particular device supports model caching, your application can use the following code: -@sphinxdirective .. tab:: C++ @@ -110,8 +110,9 @@ To check in advance if a particular device supports model caching, your applicat :language: python :fragment: [ov:caching:part3] -@endsphinxdirective -> **NOTE**: For GPU, model caching is currently implemented as a preview feature. Before it is fully supported, kernel caching can be used in the same manner: by setting the CACHE_DIR configuration key to a folder where the cache should be stored (see the [GPU plugin documentation](supported_plugins/GPU.md)). -> To activate the preview feature of model caching, set the OV_GPU_CACHE_MODEL environment variable to 1. - +.. note:: + + For GPU, model caching is currently implemented as a preview feature. Before it is fully supported, kernel caching can be used in the same manner: by setting the CACHE_DIR configuration key to a folder where the cache should be stored (see the :doc:`GPU plugin documentation `). To activate the preview feature of model caching, set the OV_GPU_CACHE_MODEL environment variable to 1. + +@endsphinxdirective diff --git a/docs/OV_Runtime_UG/Python_API_exclusives.md b/docs/OV_Runtime_UG/Python_API_exclusives.md index 21a7dbc9f26b78..06d8bce1ddf343 100644 --- a/docs/OV_Runtime_UG/Python_API_exclusives.md +++ b/docs/OV_Runtime_UG/Python_API_exclusives.md @@ -2,111 +2,179 @@ OpenVINO™ Runtime Python API offers additional features and helpers to enhance user experience. The main goal of Python API is to provide user-friendly and simple yet powerful tool for Python users. -## Easier Model Compilation +Easier Model Compilation +######################## -`CompiledModel` can be easily created with the helper method. It hides the creation of `Core` and applies `AUTO` inference mode by default. +``CompiledModel`` can be easily created with the helper method. It hides the creation of ``Core`` and applies ``AUTO`` inference mode by default. -@snippet docs/snippets/ov_python_exclusives.py auto_compilation -## Model/CompiledModel Inputs and Outputs +.. doxygensnippet:: docs/snippets/ov_python_exclusives.py + :language: cpp + :fragment: [auto_compilation] -Besides functions aligned to C++ API, some of them have their Python counterparts or extensions. For example, `Model` and `CompiledModel` inputs/outputs can be accessed via properties. -@snippet docs/snippets/ov_python_exclusives.py properties_example +Model/CompiledModel Inputs and Outputs +###################################### + +Besides functions aligned to C++ API, some of them have their Python counterparts or extensions. For example, ``Model`` and ``CompiledModel`` inputs/outputs can be accessed via properties. + + +.. doxygensnippet:: docs/snippets/ov_python_exclusives.py + :language: cpp + :fragment: [properties_example] + Refer to Python API documentation on which helper functions or properties are available for different classes. -## Working with Tensor +Working with Tensor +#################### + +Python API allows passing data as tensors. The ``Tensor`` object holds a copy of the data from the given array. The ``dtype`` of *numpy* arrays is converted to OpenVINO™ types automatically. + + +.. doxygensnippet:: docs/snippets/ov_python_exclusives.py + :language: cpp + :fragment: [tensor_basics] -Python API allows passing data as tensors. The `Tensor` object holds a copy of the data from the given array. The `dtype` of *numpy* arrays is converted to OpenVINO™ types automatically. -@snippet docs/snippets/ov_python_exclusives.py tensor_basics +Shared Memory Mode +++++++++++++++++++ -### Shared Memory Mode +``Tensor`` objects can share the memory with *numpy* arrays. By specifying the ``shared_memory`` argument, the ``Tensor`` object does not copy data. Instead, it has access to the memory of the *numpy* array. -`Tensor` objects can share the memory with *numpy* arrays. By specifying the `shared_memory` argument, the `Tensor` object does not copy data. Instead, it has access to the memory of the *numpy* array. -@snippet docs/snippets/ov_python_exclusives.py tensor_shared_mode +.. doxygensnippet:: docs/snippets/ov_python_exclusives.py + :language: cpp + :fragment: [tensor_shared_mode] -## Running Inference + +Running Inference +#################### Python API supports extra calling methods to synchronous and asynchronous modes for inference. All infer methods allow users to pass data as popular *numpy* arrays, gathered in either Python dicts or lists. -@snippet docs/snippets/ov_python_exclusives.py passing_numpy_array + +.. doxygensnippet:: docs/snippets/ov_python_exclusives.py + :language: cpp + :fragment: [passing_numpy_array] + Results from inference can be obtained in various ways: -@snippet docs/snippets/ov_python_exclusives.py getting_results -### Synchronous Mode - Extended +.. doxygensnippet:: docs/snippets/ov_python_exclusives.py + :language: cpp + :fragment: [getting_results] + + +Synchronous Mode - Extended ++++++++++++++++++++++++++++ Python API provides different synchronous calls to infer model, which block the application execution. Additionally, these calls return results of inference: -@snippet docs/snippets/ov_python_exclusives.py sync_infer -### AsyncInferQueue +.. doxygensnippet:: docs/snippets/ov_python_exclusives.py + :language: cpp + :fragment: [sync_infer] + + +AsyncInferQueue +++++++++++++++++++++ -Asynchronous mode pipelines can be supported with a wrapper class called `AsyncInferQueue`. This class automatically spawns the pool of `InferRequest` objects (also called "jobs") and provides synchronization mechanisms to control the flow of the pipeline. +Asynchronous mode pipelines can be supported with a wrapper class called ``AsyncInferQueue``. This class automatically spawns the pool of ``InferRequest`` objects (also called "jobs") and provides synchronization mechanisms to control the flow of the pipeline. -Each job is distinguishable by a unique `id`, which is in the range from 0 up to the number of jobs specified in the `AsyncInferQueue` constructor. +Each job is distinguishable by a unique ``id``, which is in the range from 0 up to the number of jobs specified in the ``AsyncInferQueue`` constructor. -The `start_async` function call is not required to be synchronized - it waits for any available job if the queue is busy/overloaded. Every `AsyncInferQueue` code block should end with the `wait_all` function which provides the "global" synchronization of all jobs in the pool and ensure that access to them is safe. +The ``start_async`` function call is not required to be synchronized - it waits for any available job if the queue is busy/overloaded. Every ``AsyncInferQueue`` code block should end with the ``wait_all`` function which provides the "global" synchronization of all jobs in the pool and ensure that access to them is safe. -@snippet docs/snippets/ov_python_exclusives.py asyncinferqueue -#### Acquiring Results from Requests +.. doxygensnippet:: docs/snippets/ov_python_exclusives.py + :language: cpp + :fragment: [asyncinferqueue] -After the call to `wait_all`, jobs and their data can be safely accessed. Acquiring a specific job with `[id]` will return the `InferRequest` object, which will result in seamless retrieval of the output data. -@snippet docs/snippets/ov_python_exclusives.py asyncinferqueue_access +Acquiring Results from Requests +------------------------------- -#### Setting Callbacks +After the call to ``wait_all``, jobs and their data can be safely accessed. Acquiring a specific job with ``[id]`` will return the ``InferRequest`` object, which will result in seamless retrieval of the output data. -Another feature of `AsyncInferQueue` is the ability to set callbacks. When callback is set, any job that ends inference calls upon the Python function. The callback function must have two arguments: one is the request that calls the callback, which provides the `InferRequest` API; the other is called "userdata", which provides the possibility of passing runtime values. Those values can be of any Python type and later used within the callback function. -The callback of `AsyncInferQueue` is uniform for every job. When executed, GIL is acquired to ensure safety of data manipulation inside the function. +.. doxygensnippet:: docs/snippets/ov_python_exclusives.py + :language: cpp + :fragment: [asyncinferqueue_access] -@snippet docs/snippets/ov_python_exclusives.py asyncinferqueue_set_callback -### Working with u1, u4 and i4 Element Types +Setting Callbacks +-------------------- + +Another feature of ``AsyncInferQueue`` is the ability to set callbacks. When callback is set, any job that ends inference calls upon the Python function. The callback function must have two arguments: one is the request that calls the callback, which provides the ``InferRequest`` API; the other is called "userdata", which provides the possibility of passing runtime values. Those values can be of any Python type and later used within the callback function. + +The callback of ``AsyncInferQueue`` is uniform for every job. When executed, GIL is acquired to ensure safety of data manipulation inside the function. + + +.. doxygensnippet:: docs/snippets/ov_python_exclusives.py + :language: cpp + :fragment: [asyncinferqueue_set_callback] + + +Working with u1, u4 and i4 Element Types +++++++++++++++++++++++++++++++++++++++++ Since OpenVINO™ supports low precision element types, there are a few ways to handle them in Python. To create an input tensor with such element types, you may need to pack your data in the new *numpy* array, with which the byte size matches the original input size: -@snippet docs/snippets/ov_python_exclusives.py packing_data + + +.. doxygensnippet:: docs/snippets/ov_python_exclusives.py + :language: cpp + :fragment: [packing_data] + To extract low precision values from a tensor into the *numpy* array, you can use the following helper: -@snippet docs/snippets/ov_python_exclusives.py unpacking -### Release of GIL + +.. doxygensnippet:: docs/snippets/ov_python_exclusives.py + :language: cpp + :fragment: [unpacking] + + +Release of GIL +++++++++++++++++++++ Some functions in Python API release the Global Lock Interpreter (GIL) while running work-intensive code. This can help you achieve more parallelism in your application, using Python threads. For more information about GIL, refer to the Python documentation. -@snippet docs/snippets/ov_python_exclusives.py releasing_gil - -> **NOTE**: While GIL is released, functions can still modify and/or operate on Python objects in C++. Hence, there is no reference counting. You should pay attention to thread safety in case sharing of these objects with another thread occurs. It might affect code only if multiple threads are spawned in Python. - -#### List of Functions that Release the GIL -- openvino.runtime.AsyncInferQueue.start_async -- openvino.runtime.AsyncInferQueue.is_ready -- openvino.runtime.AsyncInferQueue.wait_all -- openvino.runtime.AsyncInferQueue.get_idle_request_id -- openvino.runtime.CompiledModel.create_infer_request -- openvino.runtime.CompiledModel.infer_new_request -- openvino.runtime.CompiledModel.__call__ -- openvino.runtime.CompiledModel.export -- openvino.runtime.CompiledModel.get_runtime_model -- openvino.runtime.Core.compile_model -- openvino.runtime.Core.read_model -- openvino.runtime.Core.import_model -- openvino.runtime.Core.query_model -- openvino.runtime.Core.get_available_devices -- openvino.runtime.InferRequest.infer -- openvino.runtime.InferRequest.start_async -- openvino.runtime.InferRequest.wait -- openvino.runtime.InferRequest.wait_for -- openvino.runtime.InferRequest.get_profiling_info -- openvino.runtime.InferRequest.query_state -- openvino.runtime.Model.reshape -- openvino.preprocess.PrePostProcessor.build + +.. doxygensnippet:: docs/snippets/ov_python_exclusives.py + :language: cpp + :fragment: [releasing_gil] + + +.. note:: While GIL is released, functions can still modify and/or operate on Python objects in C++. Hence, there is no reference counting. You should pay attention to thread safety in case sharing of these objects with another thread occurs. It might affect code only if multiple threads are spawned in Python. + + +List of Functions that Release the GIL +-------------------------------------- + +* openvino.runtime.AsyncInferQueue.start_async +* openvino.runtime.AsyncInferQueue.is_ready +* openvino.runtime.AsyncInferQueue.wait_all +* openvino.runtime.AsyncInferQueue.get_idle_request_id +* openvino.runtime.CompiledModel.create_infer_request +* openvino.runtime.CompiledModel.infer_new_request +* openvino.runtime.CompiledModel.__call__ +* openvino.runtime.CompiledModel.export +* openvino.runtime.CompiledModel.get_runtime_model +* openvino.runtime.Core.compile_model +* openvino.runtime.Core.read_model +* openvino.runtime.Core.import_model +* openvino.runtime.Core.query_model +* openvino.runtime.Core.get_available_devices +* openvino.runtime.InferRequest.infer +* openvino.runtime.InferRequest.start_async +* openvino.runtime.InferRequest.wait +* openvino.runtime.InferRequest.wait_for +* openvino.runtime.InferRequest.get_profiling_info +* openvino.runtime.InferRequest.query_state +* openvino.runtime.Model.reshape +* openvino.preprocess.PrePostProcessor.build diff --git a/docs/OV_Runtime_UG/deployment/deployment-manager-tool.md b/docs/OV_Runtime_UG/deployment/deployment-manager-tool.md index b48525217f267b..5fdd5a2112ac57 100644 --- a/docs/OV_Runtime_UG/deployment/deployment-manager-tool.md +++ b/docs/OV_Runtime_UG/deployment/deployment-manager-tool.md @@ -1,202 +1,184 @@ # Deploying Your Application with Deployment Manager {#openvino_docs_install_guides_deployment_manager_tool} -The OpenVINO™ Deployment Manager is a Python command-line tool that creates a deployment package by assembling the model, OpenVINO IR files, your application, and associated dependencies into a runtime package for your target device. This tool is delivered within the Intel® Distribution of OpenVINO™ toolkit for Linux, Windows and macOS release packages. It is available in the `/tools/deployment_manager` directory after installation. +@sphinxdirective + +The OpenVINO™ Deployment Manager is a Python command-line tool that creates a deployment package by assembling the model, OpenVINO IR files, your application, and associated dependencies into a runtime package for your target device. This tool is delivered within the Intel® Distribution of OpenVINO™ toolkit for Linux, Windows and macOS release packages. It is available in the ``/tools/deployment_manager`` directory after installation. This article provides instructions on how to create a package with Deployment Manager and then deploy the package to your target systems. -## Prerequisites +Prerequisites +#################### To use the Deployment Manager tool, the following requirements need to be met: -* Intel® Distribution of OpenVINO™ toolkit is installed. See the [Installation Guide](../../install_guides/installing-openvino-overview.md) for instructions on different operating systems. + +* Intel® Distribution of OpenVINO™ toolkit is installed. See the :doc:`Installation Guide ` for instructions on different operating systems. * To run inference on a target device other than CPU, device drivers must be pre-installed: - * **For GPU**, see [Configurations for Intel® Processor Graphics (GPU)](../../install_guides/configurations-for-intel-gpu.md). - * **For GNA**, see [Intel® Gaussian & Neural Accelerator (GNA)](../../install_guides/configurations-for-intel-gna.md) -> **IMPORTANT**: The operating system on the target system must be the same as the development system on which you are creating the package. For example, if the target system is Ubuntu 18.04, the deployment package must be created from the OpenVINO™ toolkit installed on Ubuntu 18.04. + * **For GPU**, see :doc:`Configurations for Intel® Processor Graphics (GPU) `. + * **For GNA**, see :doc:`Intel® Gaussian & Neural Accelerator (GNA) ` + +.. important:: + + The operating system on the target system must be the same as the development system on which you are creating the package. For example, if the target system is Ubuntu 18.04, the deployment package must be created from the OpenVINO™ toolkit installed on Ubuntu 18.04. + +.. tip:: -> **TIP**: If your application requires additional dependencies, including the Microsoft Visual C++ Redistributable, use the ['--user_data' option](https://docs.openvino.ai/latest/openvino_docs_install_guides_deployment_manager_tool.html#run-standard-cli-mode) to add them to the deployment archive. Install these dependencies on the target host before running inference. + If your application requires additional dependencies, including the Microsoft Visual C++ Redistributable, use the `'--user_data' option <#running-deployment-manager-in-standard-cli-mode>`__ to add them to the deployment archive. Install these dependencies on the target host before running inference. -## Creating Deployment Package Using Deployment Manager +Creating Deployment Package Using Deployment Manager +#################################################### To create a deployment package that includes inference-related components of OpenVINO™ toolkit, you can run the Deployment Manager tool in either interactive or standard CLI mode . -### Running Deployment Manager in Interactive Mode +Running Deployment Manager in Interactive Mode +++++++++++++++++++++++++++++++++++++++++++++++ -@sphinxdirective +.. dropdown:: Click to expand/collapse -.. raw:: html + The interactive mode provides a user-friendly command-line interface that guides through the process with text prompts. -
+ To launch the Deployment Manager in interactive mode, open a new terminal window, go to the Deployment Manager tool directory, and run the tool script without parameters: -@endsphinxdirective + .. tab:: Linux -The interactive mode provides a user-friendly command-line interface that guides through the process with text prompts. + .. code-block:: sh -To launch the Deployment Manager in interactive mode, open a new terminal window, go to the Deployment Manager tool directory, and run the tool script without parameters: - -@sphinxdirective - -.. tab:: Linux - - .. code-block:: sh - - cd /tools/deployment_manager - - ./deployment_manager.py - -.. tab:: Windows - - .. code-block:: bat - - cd \deployment_tools\tools\deployment_manager - .\deployment_manager.py - -.. tab:: macOS - - .. code-block:: sh - - cd /tools/deployment_manager - ./deployment_manager.py - -@endsphinxdirective + cd /tools/deployment_manager -The target device selection dialog is displayed: - -![Deployment Manager selection dialog](../img/selection_dialog.png) + ./deployment_manager.py -Use the options provided on the screen to complete the selection of the target devices, and press **Enter** to proceed to the package generation dialog. To interrupt the generation process and exit the program, type **q** and press **Enter**. + .. tab:: Windows -Once the selection is accepted, the package generation dialog will appear: - -![Deployment Manager configuration dialog](../img/configuration_dialog.png) + .. code-block:: bat -The target devices selected in the previous step appear on the screen. To go back and change the selection, type **b** and press **Enter**. Use the default settings, or use the following options to configure the generation process: - -* `o. Change output directory` (optional): the path to the output directory. By default, it is set to your home directory. + cd \deployment_tools\tools\deployment_manager + .\deployment_manager.py -* `u. Provide (or change) path to folder with user data` (optional): the path to a directory with user data (OpenVINO IR, model, dataset, etc.) files and subdirectories required for inference, which will be added to the deployment archive. By default, it is set to `None`, which means that copying the user data to the target system need to be done separately. + .. tab:: macOS -* `t. Change archive name` (optional): the deployment archive name without extension. By default, it is set to `openvino_deployment_package`. - -After all the parameters are set, type **g** and press **Enter** to generate the package for the selected target devices. To interrupt the generation process and exit the program, type **q** and press **Enter**. + .. code-block:: sh -Once the script has successfully completed, the deployment package is generated in the specified output directory. + cd /tools/deployment_manager + ./deployment_manager.py -@sphinxdirective -.. raw:: html + The target device selection dialog is displayed: -
+ .. image:: _static/images/selection_dialog.png + :alt: Deployment Manager selection dialog -@endsphinxdirective + Use the options provided on the screen to complete the selection of the target devices, and press **Enter** to proceed to the package generation dialog. To interrupt the generation process and exit the program, type **q** and press **Enter**. -### Running Deployment Manager in Standard CLI Mode + Once the selection is accepted, the package generation dialog will appear: -@sphinxdirective + .. image:: _static/images/configuration_dialog.png + :alt: Deployment Manager configuration dialog -.. raw:: html + The target devices selected in the previous step appear on the screen. To go back and change the selection, type **b** and press **Enter**. Use the default settings, or use the following options to configure the generation process: -
+ * ``o. Change output directory`` (optional): the path to the output directory. By default, it is set to your home directory. -@endsphinxdirective + * ``u. Provide (or change) path to folder with user data`` (optional): the path to a directory with user data (OpenVINO IR, model, dataset, etc.) files and subdirectories required for inference, which will be added to the deployment archive. By default, it is set to ``None``, which means that copying the user data to the target system need to be done separately. -You can also run the Deployment Manager tool in the standard CLI mode. In this mode, specify the target devices and other parameters as command-line arguments of the Deployment Manager Python script. This mode facilitates integrating the tool in an automation pipeline. + * ``t. Change archive name`` (optional): the deployment archive name without extension. By default, it is set to ``openvino_deployment_package``. -To launch the Deployment Manager tool in the standard mode: open a new terminal window, go to the Deployment Manager tool directory, and run the tool command with the following syntax: + After all the parameters are set, type **g** and press **Enter** to generate the package for the selected target devices. To interrupt the generation process and exit the program, type **q** and press **Enter**. -@sphinxdirective + Once the script has successfully completed, the deployment package is generated in the specified output directory. -.. tab:: Linux - .. code-block:: sh +Running Deployment Manager in Standard CLI Mode ++++++++++++++++++++++++++++++++++++++++++++++++ - cd /tools/deployment_manager - ./deployment_manager.py <--targets> [--output_dir] [--archive_name] [--user_data] -.. tab:: Windows +.. dropdown:: Click to expand/collapse - .. code-block:: bat + You can also run the Deployment Manager tool in the standard CLI mode. In this mode, specify the target devices and other parameters as command-line arguments of the Deployment Manager Python script. This mode facilitates integrating the tool in an automation pipeline. - cd \tools\deployment_manager - .\deployment_manager.py <--targets> [--output_dir] [--archive_name] [--user_data] + To launch the Deployment Manager tool in the standard mode: open a new terminal window, go to the Deployment Manager tool directory, and run the tool command with the following syntax: -.. tab:: macOS + .. tab:: Linux - .. code-block:: sh + .. code-block:: sh - cd /tools/deployment_manager - ./deployment_manager.py <--targets> [--output_dir] [--archive_name] [--user_data] + cd /tools/deployment_manager + ./deployment_manager.py <--targets> [--output_dir] [--archive_name] [--user_data] -@endsphinxdirective + .. tab:: Windows -The following options are available: + .. code-block:: bat -* `<--targets>` (required): the list of target devices to run inference. To specify more than one target, separate them with spaces, for example, `--targets cpu gpu`. -To get a list of currently available targets, run the program with the `-h` option. + cd \tools\deployment_manager + .\deployment_manager.py <--targets> [--output_dir] [--archive_name] [--user_data] -* `[--output_dir]` (optional): the path to the output directory. By default, it is set to your home directory. + .. tab:: macOS -* `[--archive_name]` (optional): a deployment archive name without extension. By default, it is set to `openvino_deployment_package`. + .. code-block:: sh -* `[--user_data]` (optional): the path to a directory with user data (OpenVINO IR, model, dataset, etc.) files and subdirectories required for inference, which will be added to the deployment archive. By default, it is set to `None`, which means copying the user data to the target system need to be performed separately. + cd /tools/deployment_manager + ./deployment_manager.py <--targets> [--output_dir] [--archive_name] [--user_data] -Once the script has successfully completed, the deployment package is generated in the output directory specified. -@sphinxdirective + The following options are available: -.. raw:: html + * ``<--targets>`` (required): the list of target devices to run inference. To specify more than one target, separate them with spaces, for example, ``--targets cpu gpu``. + To get a list of currently available targets, run the program with the ``-h`` option. -
+ * ``[--output_dir]`` (optional): the path to the output directory. By default, it is set to your home directory. -@endsphinxdirective + * ``[--archive_name]`` (optional): a deployment archive name without extension. By default, it is set to ``openvino_deployment_package``. + + * ``[--user_data]`` (optional): the path to a directory with user data (OpenVINO IR, model, dataset, etc.) files and subdirectories required for inference, which will be added to the deployment archive. By default, it is set to ``None``, which means copying the user data to the target system need to be performed separately. -## Deploying Package on Target Systems + Once the script has successfully completed, the deployment package is generated in the output directory specified. -Once the Deployment Manager has successfully completed, the `.tar.gz` (on Linux or macOS) or `.zip` (on Windows) package is generated in the specified output directory. + +Deploying Package on Target Systems +################################### + +Once the Deployment Manager has successfully completed, the ``.tar.gz`` (on Linux or macOS) or ``.zip`` (on Windows) package is generated in the specified output directory. To deploy the OpenVINO Runtime components from the development machine to the target system, perform the following steps: 1. Copy the generated archive to the target system by using your preferred method. -2. Extract the archive to the destination directory on the target system. If the name of your archive is different from the default one shown below, replace `openvino_deployment_package` with your specified name. -@sphinxdirective - -.. tab:: Linux +2. Extract the archive to the destination directory on the target system. If the name of your archive is different from the default one shown below, replace ``openvino_deployment_package`` with your specified name. - .. code-block:: sh + .. tab:: Linux - tar xf openvino_deployment_package.tar.gz -C + .. code-block:: sh -.. tab:: Windows + tar xf openvino_deployment_package.tar.gz -C - .. code-block:: bat + .. tab:: Windows - Use the archiver of your choice to unzip the file. + .. code-block:: bat -.. tab:: macOS + Use the archiver of your choice to unzip the file. - .. code-block:: sh + .. tab:: macOS - tar xf openvino_deployment_package.tar.gz -C + .. code-block:: sh -@endsphinxdirective + tar xf openvino_deployment_package.tar.gz -C Now, the package is extracted to the destination directory. The following files and subdirectories are created: - - * `setupvars.sh` — a copy of `setupvars.sh`. - * `runtime` — contains the OpenVINO runtime binary files. - * `install_dependencies` — a snapshot of the `install_dependencies` directory from the OpenVINO installation directory. - * `` — the directory with the user data (OpenVINO IR, model, dataset, etc.) specified while configuring the package. - -3. On a target Linux system, to run inference install additional dependencies by running the `install_openvino_dependencies.sh` script: - ```sh - cd /openvino/install_dependencies - sudo -E ./install_openvino_dependencies.sh - ``` + + * ``setupvars.sh`` — a copy of ``setupvars.sh``. + * ``runtime`` — contains the OpenVINO runtime binary files. + * ``install_dependencies`` — a snapshot of the ``install_dependencies`` directory from the OpenVINO installation directory. + * ```` — the directory with the user data (OpenVINO IR, model, dataset, etc.) specified while configuring the package. + +3. On a target Linux system, to run inference install additional dependencies by running the ``install_openvino_dependencies.sh`` script: + + .. code-block: sh + + cd /openvino/install_dependencies + sudo -E ./install_openvino_dependencies.sh + 4. Set up the environment variables: -@sphinxdirective .. tab:: Linux @@ -219,7 +201,7 @@ To deploy the OpenVINO Runtime components from the development machine to the ta cd /openvino/ source ./setupvars.sh -@endsphinxdirective - Now, you have finished the deployment of the OpenVINO Runtime components to the target system. + +@endsphinxdirective diff --git a/docs/OV_Runtime_UG/deployment/deployment_intro.md b/docs/OV_Runtime_UG/deployment/deployment_intro.md index df629a51e97574..fc9f4581c3792d 100644 --- a/docs/OV_Runtime_UG/deployment/deployment_intro.md +++ b/docs/OV_Runtime_UG/deployment/deployment_intro.md @@ -11,47 +11,69 @@ Deploy Application with Deployment Manager Local Distribution Libraries -@endsphinxdirective -> **NOTE**: Note that [running inference in OpenVINO Runtime](../openvino_intro.md) is the most basic form of deployment. Before moving forward, make sure you know how to create a proper Inference configuration and [develop your application properly](../integrate_with_your_application.md) +.. note:: + Note that :doc:`running inference in OpenVINO Runtime ` is the most basic form of deployment. Before moving forward, make sure you know how to create a proper Inference configuration and :doc:`develop your application properly `. -## Local Deployment Options +Local Deployment Options +######################## - Set a dependency on the existing prebuilt packages, also called "centralized distribution": - - using Debian / RPM packages - a recommended way for Linux operating systems; - - using PIP package manager on PyPI - the default approach for Python-based applications; - - using Docker images - if the application should be deployed as a Docker image, use a pre-built OpenVINO™ Runtime Docker image as a base image in the Dockerfile for the application container image. For more information about OpenVINO Docker images, refer to [Installing OpenVINO on Linux from Docker](../../install_guides/installing-openvino-docker-linux.md) and [Installing OpenVINO on Windows from Docker](../../install_guides/installing-openvino-docker-windows.md). -Furthermore, to customize your OpenVINO Docker image, use the [Docker CI Framework](https://github.com/openvinotoolkit/docker_ci) to generate a Dockerfile and built the image. + + - using Debian / RPM packages - a recommended way for Linux operating systems; + - using PIP package manager on PyPI - the default approach for Python-based applications; + - using Docker images - if the application should be deployed as a Docker image, use a pre-built OpenVINO™ Runtime Docker image as a base image in the Dockerfile for the application container image. For more information about OpenVINO Docker images, refer to :doc:`Installing OpenVINO on Linux from Docker ` and :doc:`Installing OpenVINO on Windows from Docker `. + +Furthermore, to customize your OpenVINO Docker image, use the `Docker CI Framework ` to generate a Dockerfile and built the image. + - Grab a necessary functionality of OpenVINO together with your application, also called "local distribution": - - using [OpenVINO Deployment Manager](deployment-manager-tool.md) - providing a convenient way for creating a distribution package; - - using the advanced [local distribution](local-distribution.md) approach; - - using [a static version of OpenVINO Runtime linked to the final app](https://github.com/openvinotoolkit/openvino/blob/master/docs/dev/static_libaries.md). + + - using :doc:`OpenVINO Deployment Manager ` - providing a convenient way for creating a distribution package; + - using the advanced :doc:`local distribution ` approach; + - using `a static version of OpenVINO Runtime linked to the final app `__. The table below shows which distribution type can be used for what target operating system: -| Distribution type | Operating systems | -|------- ---------- | ----------------- | -| Debian packages | Ubuntu 18.04 long-term support (LTS), 64-bit; Ubuntu 20.04 long-term support (LTS), 64-bit | -| RMP packages | Red Hat Enterprise Linux 8, 64-bit | -| Docker images | Ubuntu 18.04 long-term support (LTS), 64-bit; Ubuntu 20.04 long-term support (LTS), 64-bit; Red Hat Enterprise Linux 8, 64-bit; Windows Server Core base LTSC 2019, 64-bit; Windows 10, version 20H2, 64-bit | -| PyPI (PIP package manager) | See [https://pypi.org/project/openvino/](https://pypi.org/project/openvino/) | -| [OpenVINO Deployment Manager](deployment-manager-tool.md) | All operating systems | -| [Local distribution](local-distribution.md) | All operating systems | -| [Build OpenVINO statically and link to the final app](https://github.com/openvinotoolkit/openvino/blob/master/docs/dev/static_libaries.md) | All operating systems | +.. list-table:: + :header-rows: 1 + + * - Distribution type + - Operating systems + * - Debian packages + - Ubuntu 18.04 long-term support (LTS), 64-bit; Ubuntu 20.04 long-term support (LTS), 64-bit + * - RMP packages + - Red Hat Enterprise Linux 8, 64-bit + * - Docker images + - Ubuntu 18.04 long-term support (LTS), 64-bit; Ubuntu 20.04 long-term support (LTS), 64-bit; Red Hat Enterprise Linux 8, 64-bit; Windows Server Core base LTSC 2019, 64-bit; Windows 10, version 20H2, 64-bit + * - PyPI (PIP package manager) + - See https://pypi.org/project/openvino + * - :doc:`OpenVINO Deployment Manager ` + - All operating systems + * - :doc:`Libraries for Local Distribution ` + - All operating systems + * - `Build OpenVINO statically and link to the final app `__ + - All operating systems -## Granularity of Major Distribution Types -The granularity of OpenVINO packages may vary for different distribution types. For example, the PyPI distribution of OpenVINO has a [single 'openvino' package](https://pypi.org/project/openvino/) that contains all the runtime libraries and plugins, while a [local distribution](local-distribution.md) is a more configurable type providing higher granularity. Below are important details of the set of libraries included in the OpenVINO Runtime package: +Granularity of Major Distribution Types +####################################### -![](../../img/deployment_simplified.svg) +The granularity of OpenVINO packages may vary for different distribution types. For example, the PyPI distribution of OpenVINO has a `single 'openvino' package `__ that contains all the runtime libraries and plugins, while a :doc:`local distribution ` is a more configurable type providing higher granularity. Below are important details of the set of libraries included in the OpenVINO Runtime package: -- The main library `openvino` is used by users' C++ applications to link against with. The library provides all OpenVINO Runtime public APIs, including both API 2.0 and the previous Inference Engine and nGraph APIs. For C language applications, `openvino_c` is additionally required for distribution. -- The "optional" plugin libraries like `openvino_intel_cpu_plugin` (matching the `openvino_.+_plugin` pattern) are used to provide inference capabilities on specific devices or additional capabilities like [Hetero Execution](../hetero_execution.md) and [Multi-Device Execution](../multi_device.md). -- The "optional" plugin libraries like `openvino_ir_frontend` (matching `openvino_.+_frontend`) are used to provide capabilities to read models of different file formats such as OpenVINO IR, TensorFlow, ONNX, and PaddlePaddle. +.. image:: _static/images/deployment_simplified.svg + + +- The main library ``openvino`` is used by users' C++ applications to link against with. The library provides all OpenVINO Runtime public APIs, including both API 2.0 and the previous Inference Engine and nGraph APIs. For C language applications, ``openvino_c`` is additionally required for distribution. +- The "optional" plugin libraries like ``openvino_intel_cpu_plugin`` (matching the ``openvino_.+_plugin`` pattern) are used to provide inference capabilities on specific devices or additional capabilities like :doc:`Hetero Execution ` and :doc:`Multi-Device Execution `. +- The "optional" plugin libraries like ``openvino_ir_frontend`` (matching ``openvino_.+_frontend``) are used to provide capabilities to read models of different file formats such as OpenVINO IR, TensorFlow, ONNX, and PaddlePaddle. Here the term "optional" means that if the application does not use the capability enabled by the plugin, the plugin library or a package with the plugin is not needed in the final distribution. -Building a local distribution will require more detailed information, and you will find it in the dedicated [Libraries for Local Distribution](local-distribution.md) article. +Building a local distribution will require more detailed information, and you will find it in the dedicated :doc:`Libraries for Local Distribution ` article. + +.. note:: + + Depending on your target OpenVINO devices, the following configurations might be needed for deployed machines: :doc:`Configurations for GPU `, :doc:`Configurations for GNA `. -> **NOTE**: Depending on your target OpenVINO devices, the following configurations might be needed for deployed machines: [Configurations for GPU](../../install_guides/configurations-for-intel-gpu.md), [Configurations for GNA](../../install_guides/configurations-for-intel-gna.md). +@endsphinxdirective \ No newline at end of file diff --git a/docs/OV_Runtime_UG/deployment/local-distribution.md b/docs/OV_Runtime_UG/deployment/local-distribution.md index 1b4e02143ad1dc..cd68ac4bdbf085 100644 --- a/docs/OV_Runtime_UG/deployment/local-distribution.md +++ b/docs/OV_Runtime_UG/deployment/local-distribution.md @@ -1,155 +1,163 @@ # Libraries for Local Distribution {#openvino_docs_deploy_local_distribution} +@sphinxdirective + With a local distribution, each C or C++ application/installer will have its own copies of OpenVINO Runtime binaries. However, OpenVINO has a scalable plugin-based architecture, which means that some components can be loaded in runtime only when they are really needed. Therefore, it is important to understand which minimal set of libraries is really needed to deploy the application. This guide helps you to achieve that goal. +Local distribution is also appropriate for OpenVINO binaries built from sources using `Build instructions `__, +but the guide below supposes OpenVINO Runtime is built dynamically. For case of `Static OpenVINO Runtime `__ select the required OpenVINO capabilities on CMake configuration stage using `CMake Options for Custom Compilation `__, the build and link the OpenVINO components into the final application. -Local dsitribution is also appropriate for OpenVINO binaries built from sources using [Build instructions](https://github.com/openvinotoolkit/openvino/wiki#how-to-build), but the guide below supposes OpenVINO Runtime is built dynamically. For case of [Static OpenVINO Runtime](https://github.com/openvinotoolkit/openvino/blob/master/docs/dev/static_libaries.md) select the required OpenVINO capabilities on CMake configuration stage using [CMake Options for Custom Compilation](https://github.com/openvinotoolkit/openvino/blob/master/docs/dev/cmake_options_for_custom_comiplation.md), the build and link the OpenVINO components into the final application. +.. note:: -> **NOTE**: The steps below are operating system independent and refer to a library file name without any prefixes (like `lib` on Unix systems) or suffixes (like `.dll` on Windows OS). Do not put `.lib` files on Windows OS to the distribution, because such files are needed only on a linker stage. + The steps below are operating system independent and refer to a library file name without any prefixes (like ``lib`` on Unix systems) or suffixes (like ``.dll`` on Windows OS). Do not put ``.lib`` files on Windows OS to the distribution, because such files are needed only on a linker stage. -## Library Requirements for C++ and C Languages -Independent on the language used to write the application, the `openvino` library must always be put to the final distribution, since it's a core library which orchestrates with all the inference and frontend plugins. In Intel® Distribution of OpenVINO™ toolkit, `openvino` depends on the TBB libraries which are used by OpenVINO Runtime to optimally saturate the devices with computations, so it must be put to the distribution package. +Library Requirements for C++ and C Languages +############################################ -If your application is written with C language, you need to put the `openvino_c` library additionally. +Independent on the language used to write the application, the ``openvino`` library must always be put to the final distribution, since it's a core library which orchestrates with all the inference and frontend plugins. In Intel® Distribution of OpenVINO™ toolkit, ``openvino`` depends on the TBB libraries which are used by OpenVINO Runtime to optimally saturate the devices with computations, so it must be put to the distribution package. -The `plugins.xml` file with information about inference devices must also be taken as a support file for `openvino`. +If your application is written with C language, you need to put the ``openvino_c`` library additionally. +The ``plugins.xml`` file with information about inference devices must also be taken as a support file for ``openvino``. -## Libraries for Pluggable Components + +Libraries for Pluggable Components +################################## The picture below presents dependencies between the OpenVINO Runtime core and pluggable libraries: -![](../../img/deployment_full.svg) +.. image:: _static/images/deployment_full.svg -### Libraries for Compute Devices +Libraries for Compute Devices ++++++++++++++++++++++++++++++ For each inference device, OpenVINO Runtime has its own plugin library: -- `openvino_intel_cpu_plugin` for [Intel® CPU devices](../supported_plugins/CPU.md). -- `openvino_intel_gpu_plugin` for [Intel® GPU devices](../supported_plugins/GPU.md). -- `openvino_intel_gna_plugin` for [Intel® GNA devices](../supported_plugins/GNA.md). -- `openvino_arm_cpu_plugin` for [ARM CPU devices](../supported_plugins/ARM_CPU.md). + +- ``openvino_intel_cpu_plugin`` for :doc:`Intel® CPU devices `. +- ``openvino_intel_gpu_plugin`` for :doc:`Intel® GPU devices `. +- ``openvino_intel_gna_plugin`` for :doc:`Intel® GNA devices `. +- ``openvino_arm_cpu_plugin`` for :doc:`ARM CPU devices `. Depending on what devices are used in the app, the appropriate libraries need to be put to the distribution package. As it is shown on the picture above, some plugin libraries may have OS-specific dependencies which are either backend libraries or additional supports files with firmware, etc. Refer to the table below for details: -@sphinxdirective +.. dropdown:: Windows OS: -.. raw:: html + .. list-table:: + :header-rows: 1 -
+ * - Device + - Dependency + * - CPU + - ``-`` + * - GPU + - ``OpenCL.dll``, ``cache.json`` + * - GNA + - ``gna.dll`` + * - Arm® CPU + - ``-`` -@endsphinxdirective -| Device | Dependency | -|-------------|------------| -| CPU | `-` | -| GPU | `OpenCL.dll`, `cache.json` | -| GNA | `gna.dll` | -| Arm® CPU | `-` | +.. dropdown:: Linux OS: -@sphinxdirective + .. list-table:: + :header-rows: 1 -.. raw:: html + * - Device + - Dependency + * - CPU + - ``-`` + * - GPU + - ``libOpenCL.so``, ``cache.json`` + * - GNA + - ``gna.dll`` + * - Arm® CPU + - ``-`` -
-@endsphinxdirective -@sphinxdirective +.. dropdown:: MacOS: -.. raw:: html + .. list-table:: + :header-rows: 1 -
+ * - Device + - Dependency + * - CPU + - ``-`` + * - Arm® CPU + - ``-`` -@endsphinxdirective -| Device | Dependency | -|-------------|-------------| -| CPU | `-` | -| GPU | `libOpenCL.so`, `cache.json` | -| GNA | `gna.dll` | -| Arm® CPU | `-` | +Libraries for Execution Modes ++++++++++++++++++++++++++++++ -@sphinxdirective +The ``HETERO``, ``MULTI``, ``BATCH`` and ``AUTO`` execution modes can also be used explicitly or implicitly by the application. Use the following recommendation scheme to decide whether to put the appropriate libraries to the distribution package: -.. raw:: html +- If :doc:`AUTO ` is used explicitly in the application or `ov::Core::compile_model `__ is used without specifying a device, put ``openvino_auto_plugin`` to the distribution. -
+ .. note:: -@endsphinxdirective -@sphinxdirective - -.. raw:: html + Automatic Device Selection relies on :doc:`[inference device plugins `. If you are not sure about what inference devices are available on target system, put all the inference plugin libraries to the distribution. If `ov::device::priorities `__ is used for `AUTO` to specify a limited device list, grab the corresponding device plugins only. -
+- If :doc:`MULTI ` is used explicitly, put ``openvino_auto_plugin`` to the distribution. +- If :doc:`HETERO ` is either used explicitly or `ov::hint::performance_mode `__ is used with GPU, put ``openvino_hetero_plugin`` to the distribution. +- If :doc:`BATCH ` is either used explicitly or ``ov::hint::performance_mode`` is used with GPU, put ``openvino_batch_plugin`` to the distribution. -@endsphinxdirective +Frontend Libraries for Reading Models ++++++++++++++++++++++++++++++++++++++ -| Device | Dependency | -|-------------|-------------| -| CPU | `-` | -| Arm® CPU | `-` | - -@sphinxdirective - -.. raw:: html - -
- -@endsphinxdirective - -### Libraries for Execution Modes - -The `HETERO`, `MULTI`, `BATCH` and `AUTO` execution modes can also be used explicitly or implicitly by the application. Use the following recommendation scheme to decide whether to put the appropriate libraries to the distribution package: -- If [AUTO](../auto_device_selection.md) is used explicitly in the application or `ov::Core::compile_model` is used without specifying a device, put `openvino_auto_plugin` to the distribution. - > **NOTE**: Automatic Device Selection relies on [inference device plugins](../supported_plugins/Device_Plugins.md). If you are not sure about what inference devices are available on target system, put all the inference plugin libraries to the distribution. If `ov::device::priorities` is used for `AUTO` to specify a limited device list, grab the corresponding device plugins only. +OpenVINO Runtime uses frontend libraries dynamically to read models in different formats: -- If [MULTI](../multi_device.md) is used explicitly, put `openvino_auto_plugin` to the distribution. -- If [HETERO](../hetero_execution.md) is either used explicitly or `ov::hint::performance_mode` is used with GPU, put `openvino_hetero_plugin` to the distribution. -- If [BATCH](../automatic_batching.md) is either used explicitly or `ov::hint::performance_mode` is used with GPU, put `openvino_batch_plugin` to the distribution. +- ``openvino_ir_frontend`` is used to read OpenVINO IR. +- ``openvino_tensorflow_frontend`` is used to read TensorFlow file format. +- ``openvino_onnx_frontend`` is used to read ONNX file format. +- ``openvino_paddle_frontend`` is used to read Paddle file format. -### Frontend Libraries for Reading Models +Depending on the model format types that are used in the application in `ov::Core::read_model `__, pick up the appropriate libraries. -OpenVINO Runtime uses frontend libraries dynamically to read models in different formats: -- `openvino_ir_frontend` is used to read OpenVINO IR. -- `openvino_tensorflow_frontend` is used to read TensorFlow file format. -- `openvino_onnx_frontend` is used to read ONNX file format. -- `openvino_paddle_frontend` is used to read Paddle file format. +.. note:: -Depending on the model format types that are used in the application in `ov::Core::read_model`, pick up the appropriate libraries. + To optimize the size of final distribution package, you are recommended to convert models to OpenVINO IR by using :doc:`Model Optimizer `. This way you don't have to keep TensorFlow, ONNX, PaddlePaddle, and other frontend libraries in the distribution package. -> **NOTE**: To optimize the size of final distribution package, you are recommended to convert models to OpenVINO IR by using [Model Optimizer](../../MO_DG/Deep_Learning_Model_Optimizer_DevGuide.md). This way you don't have to keep TensorFlow, ONNX, PaddlePaddle, and other frontend libraries in the distribution package. +(Legacy) Preprocessing via G-API +++++++++++++++++++++++++++++++++ -### (Legacy) Preprocessing via G-API +.. note:: -> **NOTE**: [G-API](../../gapi/gapi_intro.md) preprocessing is a legacy functionality, use [preprocessing capabilities from OpenVINO 2.0](../preprocessing_overview.md) which do not require any additional libraries. + :doc:`G-API ` preprocessing is a legacy functionality, use :doc:`preprocessing capabilities from OpenVINO 2.0 ` which do not require any additional libraries. -If the application uses `InferenceEngine::PreProcessInfo::setColorFormat` or `InferenceEngine::PreProcessInfo::setResizeAlgorithm` methods, OpenVINO Runtime dynamically loads `openvino_gapi_preproc` plugin to perform preprocessing via G-API. +If the application uses `InferenceEngine::PreProcessInfo::setColorFormat `__ or `InferenceEngine::PreProcessInfo::setResizeAlgorithm `__ methods, OpenVINO Runtime dynamically loads `openvino_gapi_preproc` plugin to perform preprocessing via G-API. -## Examples +Examples +#################### **CPU + OpenVINO IR in C application** In this example, the application is written in C language, performs inference on CPU, and reads models stored as the OpenVINO IR format. The following libraries are used: -- The `openvino_c` library is a main dependency of the application. It links against this library. -- The `openvino` library is used as a private dependency for `openvino_c` and is also used in the deployment. -- `openvino_intel_cpu_plugin` is used for inference. -- `openvino_ir_frontend` is used to read source models. +- The ``openvino_c`` library is a main dependency of the application. It links against this library. +- The ``openvino`` library is used as a private dependency for ``openvino_c`` and is also used in the deployment. +- ``openvino_intel_cpu_plugin`` is used for inference. +- ``openvino_ir_frontend`` is used to read source models. **MULTI execution on GPU and CPU in `tput` mode** -In this example, the application is written in C++, performs inference [simultaneously on GPU and CPU devices](../multi_device.md) with the `ov::hint::PerformanceMode::THROUGHPUT` property set, and reads models stored in the ONNX format. The following libraries are used: -- The `openvino` library is a main dependency of the application. It links against this library. -- `openvino_intel_gpu_plugin` and `openvino_intel_cpu_plugin` are used for inference. -- `openvino_auto_plugin` is used for Multi-Device Execution. -- `openvino_auto_batch_plugin` can be also put to the distribution to improve the saturation of [Intel® GPU](../supported_plugins/GPU.md) device. If there is no such plugin, [Automatic Batching](../automatic_batching.md) is turned off. -- `openvino_onnx_frontend` is used to read source models. +In this example, the application is written in C++, performs inference :doc:`simultaneously on GPU and CPU devices ` with the `ov::hint::PerformanceMode::THROUGHPUT `__ property set, and reads models stored in the ONNX format. The following libraries are used: + +- The ``openvino`` library is a main dependency of the application. It links against this library. +- ``openvino_intel_gpu_plugin`` and ``openvino_intel_cpu_plugin`` are used for inference. +- ``openvino_auto_plugin`` is used for Multi-Device Execution. +- ``openvino_auto_batch_plugin`` can be also put to the distribution to improve the saturation of :doc:`Intel® GPU ` device. If there is no such plugin, :doc:`Automatic Batching ` is turned off. +- ``openvino_onnx_frontend`` is used to read source models. **Auto-Device Selection between GPU and CPU** -In this example, the application is written in C++, performs inference with the [Automatic Device Selection](../auto_device_selection.md) mode, limiting device list to GPU and CPU, and reads models [created using C++ code](../model_representation.md). The following libraries are used: -- The `openvino` library is a main dependency of the application. It links against this library. -- `openvino_auto_plugin` is used to enable Automatic Device Selection. -- `openvino_intel_gpu_plugin` and `openvino_intel_cpu_plugin` are used for inference. AUTO selects between CPU and GPU devices according to their physical existence on the deployed machine. -- No frontend library is needed because `ov::Model` is created in code. +In this example, the application is written in C++, performs inference with the :doc:`Automatic Device Selection ` mode, limiting device list to GPU and CPU, and reads models :doc:`created using C++ code `. The following libraries are used: + +- The ``openvino`` library is a main dependency of the application. It links against this library. +- ``openvino_auto_plugin`` is used to enable Automatic Device Selection. +- ``openvino_intel_gpu_plugin`` and ``openvino_intel_cpu_plugin`` are used for inference. AUTO selects between CPU and GPU devices according to their physical existence on the deployed machine. +- No frontend library is needed because ``ov::Model`` is created in code. + +@endsphinxdirective diff --git a/docs/OV_Runtime_UG/img/BASIC_FLOW_IE_C.svg b/docs/OV_Runtime_UG/img/BASIC_FLOW_IE_C.svg deleted file mode 100644 index 6b8ad0ef282518..00000000000000 --- a/docs/OV_Runtime_UG/img/BASIC_FLOW_IE_C.svg +++ /dev/null @@ -1,3 +0,0 @@ -version https://git-lfs.github.com/spec/v1 -oid sha256:ccc7704d2a27f7491729767443f3d2bdd0ccc930f16fde631a7f9c67d158297a -size 71369 diff --git a/docs/OV_Runtime_UG/lowlatency2.md b/docs/OV_Runtime_UG/lowlatency2.md index 87b544ffb235cf..6e14d0ea8aee2e 100644 --- a/docs/OV_Runtime_UG/lowlatency2.md +++ b/docs/OV_Runtime_UG/lowlatency2.md @@ -90,7 +90,7 @@ Steps to Apply LowLatency2 } -4. Use state API. See the :ref:`OpenVINO state API ` and the `Example of stateful network inference ` sections. +4. Use state API. See the :ref:`OpenVINO state API ` and the `Example of stateful network inference ` sections. Known Limitations #################### diff --git a/docs/OV_Runtime_UG/lowlatency_deprecated.md b/docs/OV_Runtime_UG/lowlatency_deprecated.md index 7afe243ed72404..a94ab2c0c469f1 100644 --- a/docs/OV_Runtime_UG/lowlatency_deprecated.md +++ b/docs/OV_Runtime_UG/lowlatency_deprecated.md @@ -72,7 +72,7 @@ Steps to Apply LowLatency } -4. Use state API. See the :ref:`OpenVINO state API ` and the :ref:`Example of stateful network inference ` sections. +4. Use state API. See the :ref:`OpenVINO state API ` and the :ref:`Example of stateful network inference ` sections. Known Limitations for the LowLatency #################################### diff --git a/docs/OV_Runtime_UG/network_state_intro.md b/docs/OV_Runtime_UG/model_state_intro.md similarity index 69% rename from docs/OV_Runtime_UG/network_state_intro.md rename to docs/OV_Runtime_UG/model_state_intro.md index 063eef814e8f56..9f5f525565e78e 100644 --- a/docs/OV_Runtime_UG/network_state_intro.md +++ b/docs/OV_Runtime_UG/model_state_intro.md @@ -1,4 +1,4 @@ -# Stateful models {#openvino_docs_OV_UG_network_state_intro} +# Stateful models {#openvino_docs_OV_UG_model_state_intro} @sphinxdirective @@ -11,22 +11,22 @@ Several use cases require processing of data sequences. When length of a sequence is known and small enough, -it can be processed with RNN like networks that contain a cycle inside. However, in some cases (e.g., online speech recognition of time series +it can be processed with RNN like models that contain a cycle inside. However, in some cases (e.g., online speech recognition of time series forecasting) length of data sequence is unknown. Then, data can be divided in small portions and processed step-by-step. The dependency -between data portions should be addressed. For that, networks save some data between inferences - a state. When one dependent sequence is over, +between data portions should be addressed. For that, models save some data between inferences - a state. When one dependent sequence is over, a state should be reset to initial value and a new sequence can be started. -Several frameworks have special APIs for states in networks. For example, Keras has ``stateful`` - a special option for RNNs, that turns on saving a state between inferences. Kaldi contains special ``Offset`` specifier to define time offset in a network. +Several frameworks have special APIs for states in model. For example, Keras has ``stateful`` - a special option for RNNs, that turns on saving a state between inferences. Kaldi contains special ``Offset`` specifier to define time offset in a model. -OpenVINO also contains a special API to simplify work with networks with states. A state is automatically saved between inferences, +OpenVINO also contains a special API to simplify work with models with states. A state is automatically saved between inferences, and there is a way to reset a state when needed. A state can also be read or set to some new value between inferences. OpenVINO State Representation ############################# -OpenVINO contains the ``Variable``, a special abstraction to represent a state in a network. There are two operations: :doc:`Assign ` - to save a value in a state and :doc:`ReadValue ` - to read a value saved on previous iteration. +OpenVINO contains the ``Variable``, a special abstraction to represent a state in a model. There are two operations: :doc:`Assign ` - to save a value in a state and :doc:`ReadValue ` - to read a value saved on previous iteration. -To get a model with states ready for inference, convert a model from another framework to OpenVINO IR with Model Optimizer or create an nGraph function. +To get a model with states ready for inference, convert a model from another framework to OpenVINO IR with Model Optimizer or create an OpenVINO model. (For more information, refer to the :doc:`Build OpenVINO Model section `). Below is the graph in both forms: @@ -47,7 +47,7 @@ The ``bin`` file for this graph should contain ``float 0`` in binary form. The c .. code-block:: xml - + @@ -154,65 +154,44 @@ The ``bin`` file for this graph should contain ``float 0`` in binary form. The c -Example of Creating Model nGraph API -++++++++++++++++++++++++++++++++++++ +Example of Creating Model OpenVINO API +++++++++++++++++++++++++++++++++++++++++ -In the following example, the ``SinkVector`` is used to create the `ngraph::Function `__. For a network with states, except inputs and outputs, the ``Assign`` nodes should also point to the ``Function`` to avoid deleting it during graph transformations. Use the constructor to do it, as shown in the example, or with the special ``add_sinks(const SinkVector& sinks)`` method. After deleting the node from the graph with the ``delete_sink()`` method, a sink can be deleted from ``ngraph::Function``. - -.. code-block:: cpp - - #include - #include - // ... - - auto arg = make_shared(element::f32, Shape{1, 1}); - auto init_const = ngraph::opset6::Constant::create(element::f32, Shape{1, 1}, {0}); - - // The ReadValue/Assign operations must be used in pairs in the network. - // For each such a pair, its own variable object must be created. - const std::string variable_name("variable0"); - auto variable = std::make_shared(VariableInfo{PartialShape::dynamic(), element::dynamic, variable_name}); - - // Creating ngraph::function - auto read = make_shared(init_const, variable); - std::vector> args = {arg, read}; - auto add = make_shared(arg, read); - auto assign = make_shared(add, variable); - auto add2 = make_shared(add, read); - auto res = make_shared(add2); - - auto f = make_shared(ResultVector({res}), ParameterVector({arg}), SinkVector({assign})); +In the following example, the ``SinkVector`` is used to create the ``ov::Model``. For a model with states, except inputs and outputs, the ``Assign`` nodes should also point to the ``Model`` to avoid deleting it during graph transformations. Use the constructor to do it, as shown in the example, or with the special ``add_sinks(const SinkVector& sinks)`` method. After deleting the node from the graph with the ``delete_sink()`` method, a sink can be deleted from ``ov::Model``. +.. doxygensnippet:: docs/snippets/ov_model_with_state_infer.cpp + :language: cpp + :fragment: [model_create] .. _openvino-state-api: OpenVINO State API #################### -Inference Engine has the ``InferRequest::QueryState`` method to get the list of states from a network and ``IVariableState`` interface to operate with states. Below is a brief description of methods and the example of how to use this interface. +OpenVINO has the ``InferRequest::query_state`` method to get the list of states from a model and ``ov::IVariableState`` interface to operate with states. Below is a brief description of methods and the example of how to use this interface. -* ``std::string GetName() const`` - returns the name (variable_id) of a corresponding Variable. -* ``void Reset()`` - resets a state to a default value. -* ``void SetState(Blob::Ptr newState)`` - sets a new value for a state. -* ``Blob::CPtr GetState() const`` - returns current value of state. +* ``std::string get_name() const`` - returns the name (variable_id) of a corresponding Variable. +* ``void reset()`` - resets a state to a default value. +* ``void set_state(const ov::Tensor& state)`` - sets a new value for a state. +* ``const ov::Tensor& get_state() const`` - returns current value of state. -.. _example-of-stateful-network-inference: +.. _example-of-stateful-model-inference: -Example of Stateful Network Inference +Example of Stateful Model Inference ##################################### Based on the IR from the previous section, the example below demonstrates inference of two independent sequences of data. A state should be reset between these sequences. -One infer request and one thread will be used in this example. Using several threads is possible if there are several independent sequences. Then, each sequence can be processed in its own infer request. Inference of one sequence in several infer requests is not recommended. In one infer request, a state will be saved automatically between inferences, but if the first step is done in one infer request and the second in another, a state should be set in a new infer request manually (using the ``IVariableState::SetState`` method). +One infer request and one thread will be used in this example. Using several threads is possible if there are several independent sequences. Then, each sequence can be processed in its own infer request. Inference of one sequence in several infer requests is not recommended. In one infer request, a state will be saved automatically between inferences, but if the first step is done in one infer request and the second in another, a state should be set in a new infer request manually (using the ``ov::IVariableState::set_state`` method). -.. doxygensnippet:: docs/snippets/InferenceEngine_network_with_state_infer.cpp +.. doxygensnippet:: docs/snippets/ov_model_with_state_infer.cpp :language: cpp :fragment: [part1] -For more elaborate examples demonstrating how to work with networks with states, +For more elaborate examples demonstrating how to work with models with states, refer to the speech sample and a demo in the :doc:`Samples Overview `. LowLatency Transformations diff --git a/docs/OV_Runtime_UG/multi_device.md b/docs/OV_Runtime_UG/multi_device.md index 01d03ef100bd3f..e1b6492d59e5a2 100644 --- a/docs/OV_Runtime_UG/multi_device.md +++ b/docs/OV_Runtime_UG/multi_device.md @@ -7,13 +7,13 @@ To run inference on multiple devices, you can choose either of the following way - Use the :ref:`CUMULATIVE_THROUGHPUT option ` of the Automatic Device Selection mode. This way, you can use all available devices in the system without the need to specify them. - Use the Multi-Device execution mode. This page will explain how it works and how to use it. -@endsphinxdirective - -## How MULTI Works +How MULTI Works +#################### The Multi-Device execution mode, or MULTI for short, acts as a "virtual" or a "proxy" device, which does not bind to a specific type of hardware. Instead, it assigns available computing devices to particular inference requests, which are then executed in parallel. The potential gains from using Multi-Device execution are: + * improved throughput from using multiple devices at once, * increase in performance stability due to multiple devices sharing inference workload. @@ -22,31 +22,29 @@ Importantly, the Multi-Device mode does not change the application logic, so it Note that the performance increase in this mode comes from utilizing multiple devices at once. This means that you need to provide the devices with enough inference requests to keep them busy, otherwise you will not benefit much from using MULTI. -## Using the Multi-Device Mode +Using the Multi-Device Mode +########################### Following the OpenVINO™ naming convention, the Multi-Device mode is assigned the label of “MULTI.” The only configuration option available for it is a prioritized list of devices to use: -@sphinxdirective -+---------------------------+---------------------------------+------------------------------------------------------------+ -| Property | Property values | Description | -+===========================+=================================+============================================================+ -| | | MULTI: | | Specifies the devices available for selection. | -| | | comma-separated, no spaces | | The device sequence will be taken as priority | -+---------------------------+---------------------------------+ | from high to low. | -| ov::device::priorities | | device names | | Priorities can be set directly as a string. | -| | | comma-separated, no spaces | | -+---------------------------+---------------------------------+------------------------------------------------------------+ ++----------------------------+---------------------------------+------------------------------------------------------------+ +| Property | Property values | Description | ++============================+=================================+============================================================+ +| | | MULTI: | | Specifies the devices available for selection. | +| | | comma-separated, no spaces | | The device sequence will be taken as priority | ++----------------------------+---------------------------------+ | from high to low. | +| ``ov::device::priorities`` | | device names | | Priorities can be set directly as a string. | +| | | comma-separated, no spaces | | ++----------------------------+---------------------------------+------------------------------------------------------------+ -@endsphinxdirective Specifying the device list explicitly is required by MULTI, as it defines the devices available for inference and sets their priorities. Importantly, the list may also specify the number of requests for MULTI to keep for each device, as described below. -Note that OpenVINO™ Runtime enables you to use “GPU” as an alias for “GPU.0” in function calls. More details on enumerating devices can be found in [Working with devices](supported_plugins/Device_Plugins.md). +Note that OpenVINO™ Runtime enables you to use “GPU” as an alias for “GPU.0” in function calls. More details on enumerating devices can be found in :doc:`Working with devices `. The following commands are accepted by the API: -@sphinxdirective .. tab:: C++ @@ -60,11 +58,9 @@ The following commands are accepted by the API: :language: python :fragment: [MULTI_0] -@endsphinxdirective Notice that MULTI allows you to **change device priorities on the fly**. You can alter the order, exclude a device, and bring an excluded device back. Still, it does not allow adding new devices. -@sphinxdirective .. tab:: C++ @@ -78,19 +74,17 @@ Notice that MULTI allows you to **change device priorities on the fly**. You can :language: python :fragment: [MULTI_1] -@endsphinxdirective - +One more thing you can define is the **number of requests to allocate for each device**. You can do it simply by adding the number to each device in parentheses, like this: ``"MULTI:CPU(2),GPU(2)"``. However, this method is not recommended as it is not performance-portable. The suggested approach is to configure individual devices and query the resulting number of requests to be used at the application level, as described in `Configuring Individual Devices and Creating MULTI On Top <#configuring-individual-devices-and-creating-the-multi-device-on-top>`__. -One more thing you can define is the **number of requests to allocate for each device**. You can do it simply by adding the number to each device in parentheses, like this: `"MULTI:CPU(2),GPU(2)"`. However, this method is not recommended as it is not performance-portable. The suggested approach is to configure individual devices and query the resulting number of requests to be used at the application level, as described in [Configuring Individual Devices and Creating MULTI On Top](#config-multi-on-top). +To check what devices are present in the system, you can use the Device API. For information on how to do it, check :doc:`Query device properties and configuration `. -To check what devices are present in the system, you can use the Device API. For information on how to do it, check [Query device properties and configuration](supported_plugins/config_properties.md). +Configuring Individual Devices and Creating the Multi-Device On Top ++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++ -### Configuring Individual Devices and Creating the Multi-Device On Top As mentioned previously, executing inference with MULTI may be set up by configuring individual devices before creating the "MULTI" device on top. It may be considered for performance reasons. -@sphinxdirective .. tab:: C++ @@ -104,17 +98,15 @@ As mentioned previously, executing inference with MULTI may be set up by configu :language: python :fragment: [MULTI_4] -@endsphinxdirective Alternatively, you can combine all the individual device settings into a single config file and load it for MULTI to parse. See the code example in the next section. +Querying the Optimal Number of Inference Requests ++++++++++++++++++++++++++++++++++++++++++++++++++ - -### Querying the Optimal Number of Inference Requests When using MULTI, you don't need to sum over included devices yourself, you can query the optimal number of requests directly, -using the [configure devices](supported_plugins/config_properties.md) property: +using the :doc:`configure devices ` property: -@sphinxdirective .. tab:: C++ @@ -122,56 +114,52 @@ using the [configure devices](supported_plugins/config_properties.md) property: :language: cpp :fragment: [part5] -@endsphinxdirective - - -## Using the Multi-Device with OpenVINO Samples and Benchmarking Performance +Using the Multi-Device with OpenVINO Samples and Benchmarking Performance +######################################################################### To see how the Multi-Device execution is used in practice and test its performance, take a look at OpenVINO's Benchmark Application which presents the optimal performance of the plugin without the need for additional settings, like the number of requests or CPU threads. Here is an example command to evaluate performance of CPU + GPU: -```sh -./benchmark_app –d MULTI:CPU,GPU –m -i -niter 1000 -``` +.. code-block:: sh + + ./benchmark_app –d MULTI:CPU,GPU –m -i -niter 1000 + + +For more information, refer to the :doc:`C++ ` or :doc:`Python ` version instructions. -For more information, refer to the [C++](../../samples/cpp/benchmark_app/README.md) or [Python](../../tools/benchmark_tool/README.md) version instructions. -@sphinxdirective .. note:: You can keep using the FP16 IR without converting it to FP32, even if some of the listed devices do not support it. The conversion will be done automatically for you. - No demos are yet fully optimized for MULTI, by means of supporting the ov::optimal_number_of_infer_requests property, using the GPU streams/throttling, and so on. -@endsphinxdirective + No demos are yet fully optimized for MULTI, by means of supporting the ``ov::optimal_number_of_infer_requests`` property, using the GPU streams/throttling, and so on. + +Performance Considerations for the Multi-Device Execution +######################################################### -## Performance Considerations for the Multi-Device Execution For best performance when using the MULTI execution mode you should consider a few recommendations: -- MULTI usually performs best when the fastest device is specified first in the device candidate list. -This is particularly important when the request-level parallelism is not sufficient -(e.g. the number of requests is not enough to saturate all devices). -- Just like with any throughput-oriented execution mode, it is highly recommended to query the optimal number of inference requests -directly from the instance of the `ov:compiled_model`. Refer to the code of the previously mentioned `benchmark_app` for more details. -- Execution on certain device combinations, for example CPU+GPU, performs better with certain knobs. Refer to the `benchmark_app` code for details. One specific example is disabling GPU driver polling, which in turn requires multiple GPU streams to balance out slower -communication of inference completion from the device to the host. -- The MULTI logic always attempts to save on copying data between device-agnostic and user-facing inference requests, -and device-specific 'worker' requests that are being actually scheduled behind the scene. -To facilitate the copy savings, it is recommended to run the requests in the order in which they were created. + +- MULTI usually performs best when the fastest device is specified first in the device candidate list. This is particularly important when the request-level parallelism is not sufficient (e.g. the number of requests is not enough to saturate all devices). +- Just like with any throughput-oriented execution mode, it is highly recommended to query the optimal number of inference requests directly from the instance of the ``ov:compiled_model``. Refer to the code of the previously mentioned ``benchmark_app`` for more details. +- Execution on certain device combinations, for example CPU+GPU, performs better with certain knobs. Refer to the ``benchmark_app`` code for details. One specific example is disabling GPU driver polling, which in turn requires multiple GPU streams to balance out slower communication of inference completion from the device to the host. +- The MULTI logic always attempts to save on copying data between device-agnostic and user-facing inference requests, and device-specific 'worker' requests that are being actually scheduled behind the scene. To facilitate the copy savings, it is recommended to run the requests in the order in which they were created. - While performance of accelerators combines well with MULTI, the CPU+GPU execution may introduce certain performance issues. It is due to the devices sharing some resources, like power or bandwidth. Enabling the GPU throttling hint, which saves a CPU thread for CPU inference, is an example of a recommended solution addressing this issue. +Additional Resources +#################### -## Additional Resources +- :doc:`Supported Devices ` +- :doc:`Automatic Device Selection ` -- [Supported Devices](supported_plugins/Supported_Devices.md) -- [Automatic Device Selection](./auto_device_selection.md) -@sphinxdirective .. raw:: html -@endsphinxdirective -> **NOTE**: This video is currently available only for C++, but many of the same concepts apply to Python. +.. note:: This video is currently available only for C++, but many of the same concepts apply to Python. + +@endsphinxdirective diff --git a/docs/OV_Runtime_UG/openvino_intro.md b/docs/OV_Runtime_UG/openvino_intro.md index 5b92e98db19b48..62c394eb070ba4 100644 --- a/docs/OV_Runtime_UG/openvino_intro.md +++ b/docs/OV_Runtime_UG/openvino_intro.md @@ -13,23 +13,22 @@ openvino_docs_OV_UG_Working_with_devices openvino_docs_OV_UG_ShapeInference openvino_docs_OV_UG_DynamicShapes - openvino_docs_OV_UG_network_state_intro - -@endsphinxdirective + openvino_docs_OV_UG_model_state_intro + OpenVINO Runtime is a set of C++ libraries with C and Python bindings providing a common API to deliver inference solutions on the platform of your choice. Use the OpenVINO Runtime API to read an Intermediate Representation (IR), TensorFlow, ONNX, or PaddlePaddle model and execute it on preferred devices. OpenVINO Runtime uses a plugin architecture. Its plugins are software components that contain complete implementation for inference on a particular Intel® hardware device: CPU, GPU, GNA, etc. Each plugin implements the unified API and provides additional hardware-specific APIs for configuring devices or API interoperability between OpenVINO Runtime and underlying plugin backend. - -The scheme below illustrates the typical workflow for deploying a trained deep learning model: - -![](img/BASIC_FLOW_IE_C.svg) +The scheme below illustrates the typical workflow for deploying a trained deep learning model: -## Video +.. image:: _static/images/BASIC_FLOW_IE_C.svg + + +Video +#################### -@sphinxdirective .. list-table:: @@ -39,5 +38,5 @@ The scheme below illustrates the typical workflow for deploying a trained deep l src="https://www.youtube.com/embed/e6R13V8nbak"> * - **OpenVINO Runtime Concept**. Duration: 3:43 - + @endsphinxdirective diff --git a/docs/OV_Runtime_UG/performance_hints.md b/docs/OV_Runtime_UG/performance_hints.md index 5b825b7d43aa4a..c66bb552b473ac 100644 --- a/docs/OV_Runtime_UG/performance_hints.md +++ b/docs/OV_Runtime_UG/performance_hints.md @@ -1,47 +1,56 @@ # High-level Performance Hints {#openvino_docs_OV_UG_Performance_Hints} -Even though all [supported devices](supported_plugins/Device_Plugins.md) in OpenVINO™ offer low-level performance settings, utilizing them is not recommended outside of very few cases. -The preferred way to configure performance in OpenVINO Runtime is using performance hints. This is a future-proof solution fully compatible with the [automatic device selection inference mode](./auto_device_selection.md) and designed with *portability* in mind. +@sphinxdirective + +Even though all :doc:`supported devices ` in OpenVINO™ offer low-level performance settings, utilizing them is not recommended outside of very few cases. +The preferred way to configure performance in OpenVINO Runtime is using performance hints. This is a future-proof solution fully compatible with the :doc:`automatic device selection inference mode ` and designed with *portability* in mind. The hints also set the direction of the configuration in the right order. Instead of mapping the application needs to the low-level performance settings, and keeping an associated application logic to configure each possible device separately, the hints express a target scenario with a single config key and let the *device* configure itself in response. -Previously, a certain level of automatic configuration was the result of the *default* values of the parameters. For example, the number of CPU streams was deduced from the number of CPU cores, when `ov::streams::AUTO` (`CPU_THROUGHPUT_AUTO` in the pre-API 2.0 terminology) was set. However, the resulting number of streams did not account for actual compute requirements of the model to be inferred. +Previously, a certain level of automatic configuration was the result of the *default* values of the parameters. For example, the number of CPU streams was deduced from the number of CPU cores, when `ov::streams::AUTO `__ (``CPU_THROUGHPUT_AUTO`` in the pre-API 2.0 terminology) was set. However, the resulting number of streams did not account for actual compute requirements of the model to be inferred. The hints, in contrast, respect the actual model, so the parameters for optimal throughput are calculated for each model individually (based on its compute versus memory bandwidth requirements and capabilities of the device). -## Performance Hints: Latency and Throughput -As discussed in the [Optimization Guide](../optimization_guide/dldt_deployment_optimization_guide.md) there are a few different metrics associated with inference speed. +Performance Hints: Latency and Throughput +######################################### + +As discussed in the :doc:`Optimization Guide ` there are a few different metrics associated with inference speed. Throughput and latency are some of the most widely used metrics that measure the overall performance of an application. -Therefore, in order to ease the configuration of the device, OpenVINO offers two dedicated hints, namely `ov::hint::PerformanceMode::THROUGHPUT` and `ov::hint::PerformanceMode::LATENCY`. -A special `ov::hint::PerformanceMode::UNDEFINED` hint acts the same as specifying no hint. +Therefore, in order to ease the configuration of the device, OpenVINO offers two dedicated hints, namely `ov::hint::PerformanceMode::THROUGHPUT `__ and `ov::hint::PerformanceMode::LATENCY `__. +A special `ov::hint::PerformanceMode::UNDEFINED `__ hint acts the same as specifying no hint. -For more information on conducting performance measurements with the `benchmark_app`, refer to the last section in this document. +For more information on conducting performance measurements with the ``benchmark_app``, refer to the last section in this document. -Keep in mind that a typical model may take significantly more time to load with the `ov::hint::PerformanceMode::THROUGHPUT` and consume much more memory, compared to the `ov::hint::PerformanceMode::LATENCY`. +Keep in mind that a typical model may take significantly more time to load with the ``ov::hint::PerformanceMode::THROUGHPUT`` and consume much more memory, compared to the ``ov::hint::PerformanceMode::LATENCY``. + +Performance Hints: How It Works +############################### -## Performance Hints: How It Works Internally, every device "translates" the value of the hint to the actual performance settings. -For example, the `ov::hint::PerformanceMode::THROUGHPUT` selects the number of CPU or GPU streams. -Additionally, the optimal batch size is selected for the GPU and the [automatic batching](../OV_Runtime_UG/automatic_batching.md) is applied whenever possible. To check whether the device supports it, refer to the [devices/features support matrix](./supported_plugins/Device_Plugins.md) article. - -The resulting (device-specific) settings can be queried back from the instance of the `ov:Compiled_Model`. -Be aware that the `benchmark_app` outputs the actual settings for the `THROUGHPUT` hint. See the example of the output below: - - ``` - $benchmark_app -hint tput -d CPU -m 'path to your favorite model' - ... - [Step 8/11] Setting optimal runtime parameters - [ INFO ] Device: CPU - [ INFO ] { PERFORMANCE_HINT , THROUGHPUT } - ... - [ INFO ] { OPTIMAL_NUMBER_OF_INFER_REQUESTS , 4 } - [ INFO ] { NUM_STREAMS , 4 } - ... - ``` - -## Using the Performance Hints: Basic API -In the example code snippet below, `ov::hint::PerformanceMode::THROUGHPUT` is specified for the `ov::hint::performance_mode` property for `compile_model`: -@sphinxdirective +For example, the ``ov::hint::PerformanceMode::THROUGHPUT`` selects the number of CPU or GPU streams. +Additionally, the optimal batch size is selected for the GPU and the :doc:`automatic batching ` is applied whenever possible. To check whether the device supports it, refer to the :doc:`devices/features support matrix ` article. + +The resulting (device-specific) settings can be queried back from the instance of the ``ov:Compiled_Model``. +Be aware that the ``benchmark_app`` outputs the actual settings for the ``THROUGHPUT`` hint. See the example of the output below: + + .. code-block:: sh + + $benchmark_app -hint tput -d CPU -m 'path to your favorite model' + ... + [Step 8/11] Setting optimal runtime parameters + [ INFO ] Device: CPU + [ INFO ] { PERFORMANCE_HINT , THROUGHPUT } + ... + [ INFO ] { OPTIMAL_NUMBER_OF_INFER_REQUESTS , 4 } + [ INFO ] { NUM_STREAMS , 4 } + ... + + +Using the Performance Hints: Basic API +###################################### + +In the example code snippet below, ``ov::hint::PerformanceMode::THROUGHPUT`` is specified for the ``ov::hint::performance_mode`` property for ``compile_model``: + .. tab:: C++ @@ -55,12 +64,13 @@ In the example code snippet below, `ov::hint::PerformanceMode::THROUGHPUT` is sp :language: python :fragment: [compile_model] -@endsphinxdirective -## Additional (Optional) Hints from the App -For an application that processes 4 video streams, the most future-proof way to communicate the limitation of the parallel slack is to equip the performance hint with the optional `ov::hint::num_requests` configuration key set to 4. -As mentioned earlier, this will limit the batch size for the GPU and the number of inference streams for the CPU. Thus, each device uses the `ov::hint::num_requests` while converting the hint to the actual device configuration options: -@sphinxdirective +Additional (Optional) Hints from the App +######################################## + +For an application that processes 4 video streams, the most future-proof way to communicate the limitation of the parallel slack is to equip the performance hint with the optional ``ov::hint::num_requests`` configuration key set to 4. +As mentioned earlier, this will limit the batch size for the GPU and the number of inference streams for the CPU. Thus, each device uses the ``ov::hint::num_requests`` while converting the hint to the actual device configuration options: + .. tab:: C++ @@ -74,11 +84,12 @@ As mentioned earlier, this will limit the batch size for the GPU and the number :language: python :fragment: [hint_num_requests] -@endsphinxdirective -## Optimal Number of Inference Requests -The hints are used on the presumption that the application queries `ov::optimal_number_of_infer_requests` to create and run the returned number of requests simultaneously: -@sphinxdirective +Optimal Number of Inference Requests +#################################### + +The hints are used on the presumption that the application queries ``ov::optimal_number_of_infer_requests`` to create and run the returned number of requests simultaneously: + .. tab:: C++ @@ -92,21 +103,24 @@ The hints are used on the presumption that the application queries `ov::optimal_ :language: python :fragment: [query_optimal_num_requests] -@endsphinxdirective -While an application is free to create more requests if needed (for example to support asynchronous inputs population) **it is very important to at least run the `ov::optimal_number_of_infer_requests` of the inference requests in parallel**. It is recommended for efficiency, or device utilization, reasons. +While an application is free to create more requests if needed (for example to support asynchronous inputs population) **it is very important to at least run the ``ov::optimal_number_of_infer_requests`` of the inference requests in parallel**. It is recommended for efficiency, or device utilization, reasons. + +Keep in mind that ``ov::hint::PerformanceMode::LATENCY`` does not necessarily imply using single inference request. For example, multi-socket CPUs can deliver as many requests at the same minimal latency as the number of NUMA nodes in the system. +To make your application fully scalable, make sure to query the ``ov::optimal_number_of_infer_requests`` directly. -Keep in mind that `ov::hint::PerformanceMode::LATENCY` does not necessarily imply using single inference request. For example, multi-socket CPUs can deliver as many requests at the same minimal latency as the number of NUMA nodes in the system. -To make your application fully scalable, make sure to query the `ov::optimal_number_of_infer_requests` directly. +Prefer Async API +################ + +The API of the inference requests offers Sync and Async execution. The ``ov::InferRequest::infer()`` is inherently synchronous and simple to operate (as it serializes the execution flow in the current application thread). The Async "splits" the ``infer()`` into ``ov::InferRequest::start_async()`` and ``ov::InferRequest::wait()`` (or callbacks). For more information, refer to the doc:`API examples `. +Although the Synchronous API can be somewhat easier to start with, it is recommended to use the Asynchronous (callbacks-based) API in the production code. It is the most general and scalable way to implement the flow control for any possible number of requests (and thus both latency and throughput scenarios). + +Combining the Hints and Individual Low-Level Settings +##################################################### -## Prefer Async API -The API of the inference requests offers Sync and Async execution. The `ov::InferRequest::infer()` is inherently synchronous and simple to operate (as it serializes the execution flow in the current application thread). The Async "splits" the `infer()` into `ov::InferRequest::start_async()` and `ov::InferRequest::wait()` (or callbacks). For more information, refer to the [API examples](../OV_Runtime_UG/ov_infer_request.md). - Although the Synchronous API can be somewhat easier to start with, it is recommended to use the Asynchronous (callbacks-based) API in the production code. It is the most general and scalable way to implement the flow control for any possible number of requests (and thus both latency and throughput scenarios). - -## Combining the Hints and Individual Low-Level Settings While sacrificing the portability to some extent, it is possible to combine the hints with individual device-specific settings. -For example, use `ov::hint::PerformanceMode::THROUGHPUT` to prepare a general configuration and override any of its specific values: -@sphinxdirective +For example, use ``ov::hint::PerformanceMode::THROUGHPUT`` to prepare a general configuration and override any of its specific values: + .. tab:: C++ @@ -121,15 +135,22 @@ For example, use `ov::hint::PerformanceMode::THROUGHPUT` to prepare a general co :fragment: [hint_plus_low_level] -@endsphinxdirective +Testing Performance of the Hints with the Benchmark_App +####################################################### + +The ``benchmark_app``, that exists in both :doc:`C++ ` and :doc:`Python ` versions, is the best way to evaluate the functionality of the performance hints for a particular device: + +* benchmark_app **-hint tput** -d 'device' -m 'path to your model' +* benchmark_app **-hint latency** -d 'device' -m 'path to your model' + +Disabling the hints to emulate the pre-hints era (highly recommended before trying the individual low-level settings, such as the number of streams as below, threads, etc): -## Testing Performance of the Hints with the Benchmark_App -The `benchmark_app`, that exists in both [C++](../../samples/cpp/benchmark_app/README.md) and [Python](../../tools/benchmark_tool/README.md) versions, is the best way to evaluate the functionality of the performance hints for a particular device: - - benchmark_app **-hint tput** -d 'device' -m 'path to your model' - - benchmark_app **-hint latency** -d 'device' -m 'path to your model' -- Disabling the hints to emulate the pre-hints era (highly recommended before trying the individual low-level settings, such as the number of streams as below, threads, etc): -- - benchmark_app **-hint none -nstreams 1** -d 'device' -m 'path to your model' - +* benchmark_app **-hint none -nstreams 1** -d 'device' -m 'path to your model' -### Additional Resources -* [Supported Devices](./supported_plugins/Supported_Devices.md) + +Additional Resources +#################### + +* :doc:`Supported Devices ` + +@endsphinxdirective diff --git a/docs/OV_Runtime_UG/supported_plugins/CPU.md b/docs/OV_Runtime_UG/supported_plugins/CPU.md index 1ca603e92f6c06..df17fd10f0d57c 100644 --- a/docs/OV_Runtime_UG/supported_plugins/CPU.md +++ b/docs/OV_Runtime_UG/supported_plugins/CPU.md @@ -279,7 +279,7 @@ Stateful Models The CPU plugin supports stateful models without any limitations. -For details, see :doc:`stateful models guide `. +For details, see :doc:`stateful models guide `. Supported Properties ########################################################### diff --git a/docs/OV_Runtime_UG/supported_plugins/Device_Plugins.md b/docs/OV_Runtime_UG/supported_plugins/Device_Plugins.md index 0ee460106cb5a2..ce1953633fc5cb 100644 --- a/docs/OV_Runtime_UG/supported_plugins/Device_Plugins.md +++ b/docs/OV_Runtime_UG/supported_plugins/Device_Plugins.md @@ -48,7 +48,7 @@ The table below demonstrates support of key features by OpenVINO device plugins. :doc:`Dynamic shapes ` Yes Partial No No :doc:`Import/Export ` Yes No Yes No :doc:`Preprocessing acceleration ` Yes Yes No Partial - :doc:`Stateful models ` Yes No Yes No + :doc:`Stateful models ` Yes No Yes No :doc:`Extensibility ` Yes Yes No No ========================================================================================= =============== =============== =============== ======================== diff --git a/docs/OV_Runtime_UG/supported_plugins/GNA.md b/docs/OV_Runtime_UG/supported_plugins/GNA.md index 15967d6f9fcb8b..7faace9e172d45 100644 --- a/docs/OV_Runtime_UG/supported_plugins/GNA.md +++ b/docs/OV_Runtime_UG/supported_plugins/GNA.md @@ -209,7 +209,7 @@ To compile a model, use either :doc:`compile Tool `. +GNA plugin natively supports stateful models. For more details on such models, refer to the :doc:`Stateful models `. .. note:: diff --git a/docs/OV_Runtime_UG/supported_plugins/GPU_RemoteTensor_API.md b/docs/OV_Runtime_UG/supported_plugins/GPU_RemoteTensor_API.md index dc73deb70965d0..0eac844e4c7701 100644 --- a/docs/OV_Runtime_UG/supported_plugins/GPU_RemoteTensor_API.md +++ b/docs/OV_Runtime_UG/supported_plugins/GPU_RemoteTensor_API.md @@ -45,46 +45,85 @@ To create the ``ov::RemoteContext`` object for user context, explicitly provide of ``ov::RemoteContext`` derived classes. -.. tab:: Linux +.. tab:: Linux/C++ .. tab:: Create from cl_context .. doxygensnippet:: docs/snippets/gpu/remote_objects_creation.cpp :language: cpp - :fragment: context_from_cl_context + :fragment: [context_from_cl_context] .. tab:: Create from cl_queue .. doxygensnippet:: docs/snippets/gpu/remote_objects_creation.cpp :language: cpp - :fragment: context_from_cl_queue + :fragment: [context_from_cl_queue] .. tab:: Create from VADisplay .. doxygensnippet:: docs/snippets/gpu/remote_objects_creation.cpp :language: cpp - :fragment: context_from_va_display + :fragment: [context_from_va_display] -.. tab:: Windows +.. tab:: Windows/C++ .. tab:: Create from cl_context .. doxygensnippet:: docs/snippets/gpu/remote_objects_creation.cpp :language: cpp - :fragment: context_from_cl_context + :fragment: [context_from_cl_context] .. tab:: Create from cl_queue .. doxygensnippet:: docs/snippets/gpu/remote_objects_creation.cpp :language: cpp - :fragment: context_from_cl_queue + :fragment: [context_from_cl_queue] .. tab:: Create from ID3D11Device .. doxygensnippet:: docs/snippets/gpu/remote_objects_creation.cpp :language: cpp - :fragment: context_from_d3d_device + :fragment: [context_from_d3d_device] +.. tab:: Linux/C + + .. tab:: Create from cl_context + + .. doxygensnippet:: docs/snippets/gpu/remote_objects_creation_c.cpp + :language: c + :fragment: [context_from_cl_context] + + .. tab:: Create from cl_queue + + .. doxygensnippet:: docs/snippets/gpu/remote_objects_creation_c.cpp + :language: c + :fragment: [context_from_cl_queue] + + .. tab:: Create from VADisplay + + .. doxygensnippet:: docs/snippets/gpu/remote_objects_creation_c.cpp + :language: c + :fragment: [context_from_va_display] + +.. tab:: Windows/C + + .. tab:: Create from cl_context + + .. doxygensnippet:: docs/snippets/gpu/remote_objects_creation_c.cpp + :language: c + :fragment: [context_from_cl_context] + + .. tab:: Create from cl_queue + + .. doxygensnippet:: docs/snippets/gpu/remote_objects_creation_c.cpp + :language: c + :fragment: [context_from_cl_queue] + + .. tab:: Create from ID3D11Device + + .. doxygensnippet:: docs/snippets/gpu/remote_objects_creation_c.cpp + :language: c + :fragment: [context_from_d3d_device] Getting RemoteContext from the Plugin +++++++++++++++++++++++++++++++++++++++++++++++++++++++++++ @@ -96,19 +135,33 @@ Once the plugin options have been changed, the internal context is replaced by t To request the current default context of the plugin, use one of the following methods: +.. tab:: C++ -.. tab:: Get context from Core + .. tab:: Get context from Core - .. doxygensnippet:: docs/snippets/gpu/remote_objects_creation.cpp - :language: cpp - :fragment: default_context_from_core + .. doxygensnippet:: docs/snippets/gpu/remote_objects_creation.cpp + :language: cpp + :fragment: [default_context_from_core] -.. tab:: Get context from compiled model + .. tab:: Get context from compiled model - .. doxygensnippet:: docs/snippets/gpu/remote_objects_creation.cpp - :language: cpp - :fragment: default_context_from_model + .. doxygensnippet:: docs/snippets/gpu/remote_objects_creation.cpp + :language: cpp + :fragment: [default_context_from_model] + +.. tab:: C + + .. tab:: Get context from Core + + .. doxygensnippet:: docs/snippets/gpu/remote_objects_creation_c.cpp + :language: c + :fragment: [default_context_from_core] + .. tab:: Get context from compiled model + + .. doxygensnippet:: docs/snippets/gpu/remote_objects_creation_c.cpp + :language: c + :fragment: [default_context_from_model] Memory Sharing Between Application and GPU Plugin ########################################################### @@ -116,70 +169,140 @@ Memory Sharing Between Application and GPU Plugin The classes that implement the ``ov::RemoteTensor`` interface are the wrappers for native API memory handles (which can be obtained from them at any time). -To create a shared tensor from a native memory handle, use dedicated ``create_tensor``or ``create_tensor_nv12`` methods +To create a shared tensor from a native memory handle, use dedicated ``create_tensor`` or ``create_tensor_nv12`` methods of the ``ov::RemoteContext`` sub-classes. ``ov::intel_gpu::ocl::ClContext`` has multiple overloads of ``create_tensor`` methods which allow to wrap pre-allocated native handles with the ``ov::RemoteTensor`` -object or request plugin to allocate specific device memory. For more details, see the code snippets below: +object or request plugin to allocate specific device memory. There also provides C APIs to do the same things with C++ APIs. +For more details, see the code snippets below: .. tab-set:: - .. tab-item:: Wrap native handles + .. tab-item:: Wrap native handles/C++ + :sync: wrap-native-handles .. tab-set:: .. tab-item:: USM pointer + :sync: usm-pointer .. doxygensnippet:: docs/snippets/gpu/remote_objects_creation.cpp - :language: sh - :fragment: wrap_usm_pointer + :language: cpp + :fragment: [wrap_usm_pointer] .. tab-item:: cl_mem + :sync: cl_mem .. doxygensnippet:: docs/snippets/gpu/remote_objects_creation.cpp - :language: sh - :fragment: wrap_cl_mem + :language: cpp + :fragment: [wrap_cl_mem] .. tab-item:: cl::Buffer + :sync: buffer .. doxygensnippet:: docs/snippets/gpu/remote_objects_creation.cpp - :language: sh - :fragment: wrap_cl_buffer + :language: cpp + :fragment: [wrap_cl_buffer] .. tab-item:: cl::Image2D + :sync: image2D .. doxygensnippet:: docs/snippets/gpu/remote_objects_creation.cpp - :language: sh - :fragment: wrap_cl_image + :language: cpp + :fragment: [wrap_cl_image] .. tab-item:: biplanar NV12 surface + :sync: biplanar .. doxygensnippet:: docs/snippets/gpu/remote_objects_creation.cpp - :language: sh - :fragment: wrap_nv12_surface + :language: cpp + :fragment: [wrap_nv12_surface] - .. tab-item:: Allocate device memory + .. tab-item:: Allocate device memory/C++ + :sync: allocate-device-memory .. tab-set:: - + .. tab-item:: USM host memory + :sync: usm-host-memory .. doxygensnippet:: docs/snippets/gpu/remote_objects_creation.cpp - :language: sh - :fragment: allocate_usm_host + :language: cpp + :fragment: [allocate_usm_host] .. tab-item:: USM device memory + :sync: usm-device-memory .. doxygensnippet:: docs/snippets/gpu/remote_objects_creation.cpp - :language: sh - :fragment: allocate_usm_device + :language: cpp + :fragment: [allocate_usm_device] .. tab-item:: cl::Buffer .. doxygensnippet:: docs/snippets/gpu/remote_objects_creation.cpp - :language: sh - :fragment: allocate_cl_buffer + :language: cpp + :fragment: [allocate_cl_buffer] + +.. tab-set:: + + .. tab-item:: Wrap native handles/C + :sync: wrap-native-handles + + .. tab-set:: + + .. tab-item:: USM pointer + :sync: usm-pointer + .. doxygensnippet:: docs/snippets/gpu/remote_objects_creation_c.cpp + :language: c + :fragment: [wrap_usm_pointer] + + .. tab-item:: cl_mem + :sync: cl_mem + + .. doxygensnippet:: docs/snippets/gpu/remote_objects_creation_c.cpp + :language: c + :fragment: [wrap_cl_mem] + + .. tab-item:: cl::Buffer + :sync: buffer + + .. doxygensnippet:: docs/snippets/gpu/remote_objects_creation_c.cpp + :language: c + :fragment: [wrap_cl_buffer] + + .. tab-item:: cl::Image2D + :sync: image2D + + .. doxygensnippet:: docs/snippets/gpu/remote_objects_creation_c.cpp + :language: c + :fragment: [wrap_cl_image] + + .. tab-item:: biplanar NV12 surface + :sync: biplanar + + .. doxygensnippet:: docs/snippets/gpu/remote_objects_creation_c.cpp + :language: c + :fragment: [create_nv12_surface] + + .. tab-item:: Allocate device memory/C + :sync: allocate-device-memory + + .. tab-set:: + + .. tab-item:: USM host memory + :sync: usm-host-memory + + .. doxygensnippet:: docs/snippets/gpu/remote_objects_creation_c.cpp + :language: c + :fragment: [allocate_usm_host] + + .. tab-item:: USM device memory + :sync: usm-device-memory + + .. doxygensnippet:: docs/snippets/gpu/remote_objects_creation_c.cpp + :language: c + :fragment: [allocate_usm_device] The ``ov::intel_gpu::ocl::D3DContext`` and ``ov::intel_gpu::ocl::VAContext`` classes are derived from ``ov::intel_gpu::ocl::ClContext``. Therefore, they provide the functionality described above and extend it @@ -202,9 +325,17 @@ should be added before model compilation: .. tab:: two-plane - .. doxygensnippet:: docs/snippets/gpu/preprocessing_nv12_two_planes.cpp - :language: cpp - :fragment: [init_preproc] + .. tab:: C++ + + .. doxygensnippet:: docs/snippets/gpu/preprocessing_nv12_two_planes.cpp + :language: cpp + :fragment: [init_preproc] + + .. tab:: C + + .. doxygensnippet:: docs/snippets/gpu/preprocessing_nv12_two_planes_c.cpp + :language: c + :fragment: [init_preproc] .. tab:: single-plane @@ -228,21 +359,29 @@ inputs need to be set via the ``ov::InferRequest::set_tensors`` method with vect .. tab:: two-plane - .. doxygensnippet:: docs/snippets/gpu/preprocessing_nv12_two_planes.cpp - :language: cpp - :fragment: single_batch + .. tab:: C++ + + .. doxygensnippet:: docs/snippets/gpu/preprocessing_nv12_two_planes.cpp + :language: cpp + :fragment: [single_batch] + + .. tab:: C + + .. doxygensnippet:: docs/snippets/gpu/preprocessing_nv12_two_planes_c.cpp + :language: c + :fragment: [single_batch] .. tab:: single-plane .. doxygensnippet:: docs/snippets/gpu/preprocessing_nv12_single_plane.cpp :language: cpp - :fragment: single_batch + :fragment: [single_batch] .. tab:: NV12 to Grey .. doxygensnippet:: docs/snippets/gpu/preprocessing_nv12_to_gray.cpp :language: cpp - :fragment: single_batch + :fragment: [single_batch] .. tab:: Multiple Batches @@ -250,19 +389,19 @@ inputs need to be set via the ``ov::InferRequest::set_tensors`` method with vect .. doxygensnippet:: docs/snippets/gpu/preprocessing_nv12_two_planes.cpp :language: cpp - :fragment: batched_case + :fragment: [batched_case] .. tab:: single-plane .. doxygensnippet:: docs/snippets/gpu/preprocessing_nv12_single_plane.cpp :language: cpp - :fragment: batched_case + :fragment: [batched_case] .. tab:: NV12 to Grey .. doxygensnippet:: docs/snippets/gpu/preprocessing_nv12_to_gray.cpp :language: cpp - :fragment: batched_case + :fragment: [batched_case] I420 color format can be processed in a similar way @@ -283,7 +422,7 @@ on waiting for the completion of inference. The pseudo-code may look as follows: .. doxygensnippet:: docs/snippets/gpu/queue_sharing.cpp :language: cpp - :fragment: queue_sharing + :fragment: [queue_sharing] Limitations @@ -326,20 +465,27 @@ To see pseudo-code of usage examples, refer to the sections below. .. doxygensnippet:: docs/snippets/gpu/context_sharing.cpp :language: cpp - :fragment: context_sharing_get_from_ov + :fragment: [context_sharing_get_from_ov] .. dropdown:: Running GPU Plugin Inference within User-Supplied Shared Context .. doxygensnippet:: docs/snippets/gpu/context_sharing.cpp :language: cpp - :fragment: context_sharing_user_handle + :fragment: [context_sharing_user_handle] .. dropdown:: Direct Consuming of the NV12 VAAPI Video Decoder Surface on Linux - .. doxygensnippet:: docs/snippets/gpu/context_sharing_va.cpp - :language: cpp - :fragment: context_sharing_va + .. tab:: C++ + + .. doxygensnippet:: docs/snippets/gpu/context_sharing_va.cpp + :language: cpp + :fragment: [context_sharing_va] + + .. tab:: C + .. doxygensnippet:: docs/snippets/gpu/context_sharing_va_c.cpp + :language: c + :fragment: [context_sharing_va] See Also ####################################### diff --git a/docs/img/BASIC_FLOW_IE_C.svg b/docs/_static/images/BASIC_FLOW_IE_C.svg similarity index 100% rename from docs/img/BASIC_FLOW_IE_C.svg rename to docs/_static/images/BASIC_FLOW_IE_C.svg diff --git a/docs/img/batch_device.svg b/docs/_static/images/batch_device.svg similarity index 100% rename from docs/img/batch_device.svg rename to docs/_static/images/batch_device.svg diff --git a/docs/img/caching_enabled.svg b/docs/_static/images/caching_enabled.svg similarity index 100% rename from docs/img/caching_enabled.svg rename to docs/_static/images/caching_enabled.svg diff --git a/docs/img/caching_times.svg b/docs/_static/images/caching_times.svg similarity index 100% rename from docs/img/caching_times.svg rename to docs/_static/images/caching_times.svg diff --git a/docs/OV_Runtime_UG/img/configuration_dialog.png b/docs/_static/images/configuration_dialog.png similarity index 100% rename from docs/OV_Runtime_UG/img/configuration_dialog.png rename to docs/_static/images/configuration_dialog.png diff --git a/docs/img/cpu_execution_conventional_approach.svg b/docs/_static/images/cpu_execution_conventional_approach.svg similarity index 100% rename from docs/img/cpu_execution_conventional_approach.svg rename to docs/_static/images/cpu_execution_conventional_approach.svg diff --git a/docs/img/cpu_execution_streams.svg b/docs/_static/images/cpu_execution_streams.svg similarity index 100% rename from docs/img/cpu_execution_streams.svg rename to docs/_static/images/cpu_execution_streams.svg diff --git a/docs/img/cpu_execution_streams_2.svg b/docs/_static/images/cpu_execution_streams_2.svg similarity index 100% rename from docs/img/cpu_execution_streams_2.svg rename to docs/_static/images/cpu_execution_streams_2.svg diff --git a/tools/pot/docs/images/default_quantization_flow.svg b/docs/_static/images/default_quantization_flow.svg similarity index 100% rename from tools/pot/docs/images/default_quantization_flow.svg rename to docs/_static/images/default_quantization_flow.svg diff --git a/docs/img/deployment_full.svg b/docs/_static/images/deployment_full.svg similarity index 100% rename from docs/img/deployment_full.svg rename to docs/_static/images/deployment_full.svg diff --git a/docs/img/deployment_simplified.svg b/docs/_static/images/deployment_simplified.svg similarity index 100% rename from docs/img/deployment_simplified.svg rename to docs/_static/images/deployment_simplified.svg diff --git a/docs/img/gapi_development_workflow.png b/docs/_static/images/gapi_development_workflow.png similarity index 100% rename from docs/img/gapi_development_workflow.png rename to docs/_static/images/gapi_development_workflow.png diff --git a/docs/img/gapi_face_analytics_pipeline.png b/docs/_static/images/gapi_face_analytics_pipeline.png similarity index 100% rename from docs/img/gapi_face_analytics_pipeline.png rename to docs/_static/images/gapi_face_analytics_pipeline.png diff --git a/docs/img/gapi_face_beautification_algorithm.png b/docs/_static/images/gapi_face_beautification_algorithm.png similarity index 100% rename from docs/img/gapi_face_beautification_algorithm.png rename to docs/_static/images/gapi_face_beautification_algorithm.png diff --git a/docs/img/gapi_face_beautification_example.jpg b/docs/_static/images/gapi_face_beautification_example.jpg similarity index 100% rename from docs/img/gapi_face_beautification_example.jpg rename to docs/_static/images/gapi_face_beautification_example.jpg diff --git a/docs/img/gapi_kernel_implementation_hierarchy.png b/docs/_static/images/gapi_kernel_implementation_hierarchy.png similarity index 100% rename from docs/img/gapi_kernel_implementation_hierarchy.png rename to docs/_static/images/gapi_kernel_implementation_hierarchy.png diff --git a/docs/img/gapi_programming_model.png b/docs/_static/images/gapi_programming_model.png similarity index 100% rename from docs/img/gapi_programming_model.png rename to docs/_static/images/gapi_programming_model.png diff --git a/docs/img/large_batch_approach.svg b/docs/_static/images/large_batch_approach.svg similarity index 100% rename from docs/img/large_batch_approach.svg rename to docs/_static/images/large_batch_approach.svg diff --git a/docs/img/nncf_workflow.svg b/docs/_static/images/nncf_workflow.svg similarity index 100% rename from docs/img/nncf_workflow.svg rename to docs/_static/images/nncf_workflow.svg diff --git a/tools/pot/docs/range_supervision/images/img_combined_2.png b/docs/_static/images/range_supervision/img_combined_2.png similarity index 100% rename from tools/pot/docs/range_supervision/images/img_combined_2.png rename to docs/_static/images/range_supervision/img_combined_2.png diff --git a/tools/pot/docs/range_supervision/images/scheme3.svg b/docs/_static/images/range_supervision/scheme3.svg similarity index 100% rename from tools/pot/docs/range_supervision/images/scheme3.svg rename to docs/_static/images/range_supervision/scheme3.svg diff --git a/docs/OV_Runtime_UG/img/selection_dialog.png b/docs/_static/images/selection_dialog.png similarity index 100% rename from docs/OV_Runtime_UG/img/selection_dialog.png rename to docs/_static/images/selection_dialog.png diff --git a/docs/img/synch-vs-asynch.svg b/docs/_static/images/synch-vs-asynch.svg similarity index 100% rename from docs/img/synch-vs-asynch.svg rename to docs/_static/images/synch-vs-asynch.svg diff --git a/docs/api/ie_python_api/api.rst b/docs/api/ie_python_api/api.rst index c06cc2b2fb8c80..5faa85f4e05b8e 100644 --- a/docs/api/ie_python_api/api.rst +++ b/docs/api/ie_python_api/api.rst @@ -73,6 +73,12 @@ OpenVINO Python API openvino.runtime.opset10 +.. autosummary:: + :toctree: _autosummary + :template: custom-module-template.rst + + openvino.runtime.opset11 + .. autosummary:: :toctree: _autosummary :template: custom-module-template.rst @@ -95,4 +101,4 @@ OpenVINO Python API :maxdepth: 2 :hidden: - compatibility \ No newline at end of file + compatibility diff --git a/docs/benchmarks/performance_benchmarks.md b/docs/benchmarks/performance_benchmarks.md index 6c00b7df02f1ee..d63431ac5d9f33 100644 --- a/docs/benchmarks/performance_benchmarks.md +++ b/docs/benchmarks/performance_benchmarks.md @@ -10,17 +10,25 @@ openvino_docs_MO_DG_Getting_Performance_Numbers -@endsphinxdirective - -The [Intel® Distribution of OpenVINO™ toolkit](https://software.intel.com/content/www/us/en/develop/tools/openvino-toolkit.html) helps accelerate deep learning inference across a variety of Intel® processors and accelerators. +The `Intel® Distribution of OpenVINO™ toolkit `__ +helps accelerate deep learning inference across a variety of Intel® processors and accelerators. -The benchmark results below demonstrate high performance gains on several public neural networks on multiple Intel® CPUs, GPUs and GNAs covering a broad performance range. The results may be helpful when deciding which hardware is best for your applications or to plan AI workload on the Intel computing already included in your solutions. +The benchmark results presented here demonstrate high performance gains on several public neural networks on multiple Intel® CPUs, +GPUs, and GNAs covering a broad performance range. The results may be helpful when deciding which hardware is best for your +applications or to plan AI workload on the Intel computing already included in your solutions. Benchmarks are available for: -* [Intel® Distribution of OpenVINO™ toolkit](performance_benchmarks_openvino.md). +* :doc:`Intel® Distribution of OpenVINO™ toolkit `. + +You can also test performance for your system yourself, following the guide on :doc:`getting performance numbers `. +Performance of a particular application can also be evaluated virtually using `Intel® DevCloud for the Edge `__. +It is a remote development environment with access to Intel® hardware and the latest versions of the Intel® Distribution of the OpenVINO™ Toolkit. +To learn more about it, visit `the website `__ +or `create an account `__. -You can also test performance for your system yourself, following the guide on [getting performance numbers](../MO_DG/prepare_model/Getting_performance_numbers.md). -Performance of a particular application can also be evaluated virtually using [Intel® DevCloud for the Edge](https://devcloud.intel.com/edge/). It is a remote development environment with access to Intel® hardware and the latest versions of the Intel® Distribution of the OpenVINO™ Toolkit. To learn more about it, visit [the website](https://www.intel.com/content/www/us/en/developer/tools/devcloud/edge/overview.html) or [create an account](https://www.intel.com/content/www/us/en/forms/idz/devcloud-registration.html?tgt=https://www.intel.com/content/www/us/en/secure/forms/devcloud-enrollment/account-provisioning.html). + +@endsphinxdirective + diff --git a/docs/benchmarks/performance_benchmarks_faq.md b/docs/benchmarks/performance_benchmarks_faq.md index 7972034e677b96..8ce96e4768a7f0 100644 --- a/docs/benchmarks/performance_benchmarks_faq.md +++ b/docs/benchmarks/performance_benchmarks_faq.md @@ -6,7 +6,7 @@ .. dropdown:: How often do performance benchmarks get updated? New performance benchmarks are typically published on every - `major.minor` release of the Intel® Distribution of OpenVINO™ toolkit. + ``major.minor`` release of the Intel® Distribution of OpenVINO™ toolkit. .. dropdown:: Where can I find the models used in the performance benchmarks? @@ -22,7 +22,7 @@ All of the performance benchmarks are generated using the open-source tool within the Intel® Distribution of OpenVINO™ toolkit - called `benchmark_app`. This tool is available + called ``benchmark_app``. This tool is available :doc:`for C++ apps `. as well as :doc:`for Python apps `. @@ -42,63 +42,63 @@ - Public Network - Task - Input Size - * - `bert-base-cased `_ + * - `bert-base-cased `__ - BERT - question / answer - 124 - * - `bert-large-uncased-whole-word-masking-squad-int8-0001 `_ + * - `bert-large-uncased-whole-word-masking-squad-int8-0001 `__ - BERT-large - question / answer - 384 - * - `deeplabv3-TF `_ + * - `deeplabv3-TF `__ - DeepLab v3 Tf - semantic segmentation - 513x513 - * - `densenet-121-TF `_ + * - `densenet-121-TF `__ - Densenet-121 Tf - classification - 224x224 - * - `efficientdet-d0 `_ + * - `efficientdet-d0 `__ - Efficientdet - classification - 512x512 - * - `faster_rcnn_resnet50_coco-TF `_ + * - `faster_rcnn_resnet50_coco-TF `__ - Faster RCNN Tf - object detection - 600x1024 - * - `inception-v4-TF `_ + * - `inception-v4-TF `__ - Inception v4 Tf (aka GoogleNet-V4) - classification - 299x299 - * - `mobilenet-ssd-CF `_ + * - `mobilenet-ssd-CF `__ - SSD (MobileNet)_COCO-2017_Caffe - object detection - 300x300 - * - `mobilenet-v2-pytorch `_ + * - `mobilenet-v2-pytorch `__ - Mobilenet V2 PyTorch - classification - 224x224 - * - `resnet-18-pytorch `_ + * - `resnet-18-pytorch `__ - ResNet-18 PyTorch - classification - 224x224 - * - `resnet-50-TF `_ + * - `resnet-50-TF `__ - ResNet-50_v1_ILSVRC-2012 - classification - 224x224 - * - `ssd-resnet34-1200-onnx `_ + * - `ssd-resnet34-1200-onnx `__ - ssd-resnet34 onnx model - object detection - 1200x1200 - * - `unet-camvid-onnx-0001 `_ + * - `unet-camvid-onnx-0001 `__ - U-Net - semantic segmentation - 368x480 - * - `yolo-v3-tiny-tf `_ + * - `yolo-v3-tiny-tf `__ - YOLO v3 Tiny - object detection - 416x416 - * - `yolo_v4-TF `_ + * - `yolo_v4-TF `__ - Yolo-V4 TF - object detection - 608x608 @@ -107,16 +107,16 @@ .. dropdown:: Where can I purchase the specific hardware used in the benchmarking? Intel partners with vendors all over the world. For a list of Hardware Manufacturers, see the - `Intel® AI: In Production Partners & Solutions Catalog `_. + `Intel® AI: In Production Partners & Solutions Catalog `__. For more details, see the :doc:`Supported Devices `. documentation. Before purchasing any hardware, you can test and run - models remotely, using `Intel® DevCloud for the Edge `_. + models remotely, using `Intel® DevCloud for the Edge `__. .. dropdown:: How can I optimize my models for better performance or accuracy? Set of guidelines and recommendations to optimize models are available in the :doc:`optimization guide `. - Join the conversation in the `Community Forum `_ for further support. + Join the conversation in the `Community Forum `__ for further support. .. dropdown:: Why are INT8 optimized models used for benchmarking on CPUs with no VNNI support? diff --git a/docs/benchmarks/performance_benchmarks_openvino.md b/docs/benchmarks/performance_benchmarks_openvino.md index 639f1c38a8dd64..2bc5d5774edd69 100644 --- a/docs/benchmarks/performance_benchmarks_openvino.md +++ b/docs/benchmarks/performance_benchmarks_openvino.md @@ -73,7 +73,7 @@ Intel® Distribution of OpenVINO™ toolkit performance benchmark numbers are ba Intel technologies’ features and benefits depend on system configuration and may require enabled hardware, software or service activation. Learn more at intel.com, or from the OEM or retailer. Performance results are based on testing as of December 13, 2022 and may not reflect all publicly available updates. See configuration disclosure for details. No product can be absolutely secure. -Performance varies by use, configuration and other factors. Learn more at :ref:`www.intel.com/PerformanceIndex`. +Performance varies by use, configuration and other factors. Learn more at `www.intel.com/PerformanceIndex `__. Your costs and results may vary. @@ -81,4 +81,8 @@ Intel optimizations, for Intel compilers or other products, may not optimize to © Intel Corporation. Intel, the Intel logo, and other Intel marks are trademarks of Intel Corporation or its subsidiaries. Other names and brands may be claimed as the property of others. -@endsphinxdirective \ No newline at end of file + + +@endsphinxdirective + + diff --git a/docs/benchmarks/performance_benchmarks_ovms.md b/docs/benchmarks/performance_benchmarks_ovms.md index 5626aa731323cd..be46a6d7f9cd5b 100644 --- a/docs/benchmarks/performance_benchmarks_ovms.md +++ b/docs/benchmarks/performance_benchmarks_ovms.md @@ -3,7 +3,24 @@ @endsphinxdirective # OpenVINO™ Model Server Benchmark Results {#openvino_docs_performance_benchmarks_ovms} -OpenVINO™ Model Server is an open-source, production-grade inference platform that exposes a set of models via a convenient inference API over gRPC or HTTP/REST. It employs the OpenVINO™ Runtime libraries from the Intel® Distribution of OpenVINO™ toolkit to extend workloads across Intel® hardware including CPU, GPU and others. + +@sphinxdirective +Click the "Benchmark Graphs" button to see the OpenVINO™ benchmark graphs. Select the models, the hardware platforms (CPU SKUs), +precision and performance index from the lists and click the “Build Graphs” button. + +.. button-link:: # + :class: ov-toolkit-benchmark-results + :color: primary + :outline: + + :material-regular:`bar_chart;1.4em` Benchmark Graphs + + +OpenVINO™ Model Server is an open-source, production-grade inference platform that exposes a set of models via a convenient inference API +over gRPC or HTTP/REST. It employs the OpenVINO™ Runtime libraries from the Intel® Distribution of OpenVINO™ toolkit to extend workloads +across Intel® hardware including CPU, GPU and others. +@endsphinxdirective + ![OpenVINO™ Model Server](../img/performance_benchmarks_ovms_01.png) @@ -21,216 +38,49 @@ OpenVINO™ Model Server is measured in multiple-client-single-server configurat * **Execution Controller** is launched on the client platform. It is responsible for synchronization of the whole measurement process, downloading metrics from the load balancer, and presenting the final report of the execution. -## bert-small-uncased-whole-word-masking-squad-002 (INT8) -![](../_static/benchmarks_files/ovms/bert-small-uncased-whole-word-masking-squad-002-int8.png) -## bert-small-uncased-whole-word-masking-squad-002 (FP32) -![](../_static/benchmarks_files/ovms/bert-small-uncased-whole-word-masking-squad-002-fp32.png) -## densenet-121 (INT8) -![](../_static/benchmarks_files/ovms/densenet-121-int8.png) -## densenet-121 (FP32) -![](../_static/benchmarks_files/ovms/densenet-121-fp32.png) -## efficientdet-d0 (INT8) -![](../_static/benchmarks_files/ovms/efficientdet-d0-int8.png) -## efficientdet-d0 (FP32) -![](../_static/benchmarks_files/ovms/efficientdet-d0-fp32.png) -## inception-v4 (INT8) -![](../_static/benchmarks_files/ovms/inception-v4-int8.png) -## inception-v4 (FP32) -![](../_static/benchmarks_files/ovms/inception-v4-fp32.png) -## mobilenet-ssd (INT8) -![](../_static/benchmarks_files/ovms/mobilenet-ssd-int8.png) -## mobilenet-ssd (FP32) -![](../_static/benchmarks_files/ovms/mobilenet-ssd-fp32.png) -## mobilenet-v2 (INT8) -![](../_static/benchmarks_files/ovms/mobilenet-v2-int8.png) -## mobilenet-v2 (FP32) -![](../_static/benchmarks_files/ovms/mobilenet-v2-fp32.png) -## resnet-18 (INT8) -![](../_static/benchmarks_files/ovms/resnet-18-int8.png) -## resnet-18 (FP32) -![](../_static/benchmarks_files/ovms/resnet-18-fp32.png) -## resnet-50 (INT8) -![](../_static/benchmarks_files/ovms/resnet-50-int8.png) -## resnet-50 (FP32) -![](../_static/benchmarks_files/ovms/resnet-50-fp32.png) -## ssd-resnt34-1200 (INT8) -![](../_static/benchmarks_files/ovms/ssd-resnt34-1200-int8.png) -## ssd-resnt34-1200 (FP32) -![](../_static/benchmarks_files/ovms/ssd-resnt34-1200-fp32.png) -## unet-camvid-onnx-001 (INT8) -![](../_static/benchmarks_files/ovms/unet-camvid-onnx-001-int8.png) -## unet-camvid-onnx-001 (FP32) -![](../_static/benchmarks_files/ovms/unet-camvid-onnx-001-fp32.png) -## yolo-v3-tiny (INT8) -![](../_static/benchmarks_files/ovms/yolo-v3-tiny-int8.png) -## yolo-v3-tiny (FP32) -![](../_static/benchmarks_files/ovms/yolo-v3-tiny-fp32.png) -## yolo-v4 (INT8) -![](../_static/benchmarks_files/ovms/yolo-v4-int8.png) -## yolo-v4 (FP32) -![](../_static/benchmarks_files/ovms/yolo-v4-fp32.png) - - -## Platform Configurations - -OpenVINO™ Model Server performance benchmark numbers are based on release 2022.2. Performance results are based on testing as of November 16, 2022 and may not reflect all publicly available updates. @sphinxdirective -.. dropdown:: Platform with Intel® Xeon® Platinum 8260M - - .. table:: - :widths: 25 25 50 - - +--------------------------+-------------------------------------------+----------------------------------------+ - | | Server Platform | Client Platform | - +==========================+===========================================+========================================+ - | Motherboard | Inspur YZMB-00882-104 NF5280M5 | Inspur YZMB-00882-104 NF5280M5 | - +--------------------------+-------------------------------------------+----------------------------------------+ - | Memory | Samsung 16 x 16GB @ 2666 MT/s DDR4 | Kingston 16 x 16GB @ 2666 MT/s DDR4 | - +--------------------------+-------------------------------------------+----------------------------------------+ - | CPU | Intel® Xeon® Platinum 8260M CPU @ 2.40GHz | Intel® Xeon® Gold 6238M CPU @ 2.10GHz | - +--------------------------+-------------------------------------------+----------------------------------------+ - | Selected CPU Flags | Hyper Threading, Turbo Boost, DL Boost | Hyper Threading, Turbo Boost, DL Boost | - +--------------------------+-------------------------------------------+----------------------------------------+ - | CPU Thermal Design Power | 162W | 150W | - +--------------------------+-------------------------------------------+----------------------------------------+ - | Operating System | Ubuntu 20.04.4 LTS | Ubuntu 20.04.4 LTS | - +--------------------------+-------------------------------------------+----------------------------------------+ - | Kernel Version | 5.4.0-107-generic | 5.4.0-107-generic | - +--------------------------+-------------------------------------------+----------------------------------------+ - | BIOS Vendor | American Megatrends Inc. | AMI | - +--------------------------+-------------------------------------------+----------------------------------------+ - | BIOS Version & Release | 4.1.16; date: 06/23/2020 | 4.1.16; date: 06/23/2020 | - +--------------------------+-------------------------------------------+----------------------------------------+ - | Docker Version | 20.10.3 | 20.10.3 | - +--------------------------+-------------------------------------------+----------------------------------------+ - | Network Speed | 40 Gb/s | 40 Gb/s | - +--------------------------+-------------------------------------------+----------------------------------------+ - -.. dropdown:: Platform with 6238M - - .. table:: - :widths: 25 25 50 - - +--------------------------+-------------------------------------------+--------------------------------------------+ - | | Server Platform | Client Platform | - +==========================+===========================================+============================================+ - | Motherboard | Inspur YZMB-00882-104 NF5280M5 | Inspur YZMB-00882-104 NF5280M5 | - +--------------------------+-------------------------------------------+--------------------------------------------+ - | Memory | Kingston 16 x 16GB @ 2666 MT/s DDR4 | Samsung 16 x 16GB @ 2666 MT/s DDR4 | - +--------------------------+-------------------------------------------+--------------------------------------------+ - | CPU | Intel® Xeon® Gold 6238M CPU @ 2.10GHz | Intel® Xeon® Platinum 8260M CPU @ 2.40GHz | - +--------------------------+-------------------------------------------+--------------------------------------------+ - | Selected CPU Flags | Hyper Threading, Turbo Boost, DL Boost | Hyper Threading, Turbo Boost, DL Boost | - +--------------------------+-------------------------------------------+--------------------------------------------+ - | CPU Thermal Design Power | 150W | 162W | - +--------------------------+-------------------------------------------+--------------------------------------------+ - | Operating System | Ubuntu 20.04.4 LTS | Ubuntu 20.04.4 LTS | - +--------------------------+-------------------------------------------+--------------------------------------------+ - | Kernel Version | 5.4.0-107-generic | 5.4.0-107-generic | - +--------------------------+-------------------------------------------+--------------------------------------------+ - | BIOS Vendor | AMI | American Megatrends Inc. | - +--------------------------+-------------------------------------------+--------------------------------------------+ - | BIOS Version & Release | 4.1.16; date: 06/23/2020 | 4.1.16; date: 06/23/2020 | - +--------------------------+-------------------------------------------+--------------------------------------------+ - | Docker Version | 20.10.3 | 20.10.3 | - +--------------------------+-------------------------------------------+--------------------------------------------+ - | Network Speed | 40 Gb/s | 40 Gb/s | - +--------------------------+-------------------------------------------+--------------------------------------------+ - -.. dropdown:: Platform with Intel® Core™ i9-10920X - - .. table:: - :widths: 25 25 50 - - +--------------------------+-------------------------------------------+--------------------------------------------+ - | | Server Platform | Client Platform | - +==========================+===========================================+============================================+ - | Motherboard | ASUSTeK COMPUTER INC. PRIME X299-A II | ASUSTeK COMPUTER INC. PRIME Z370-P | - +--------------------------+-------------------------------------------+--------------------------------------------+ - | Memory | Corsair 4 x 16GB @ 2666 MT/s DDR4 | Corsair 4 x 16GB @ 2133 MT/s DDR4 | - +--------------------------+-------------------------------------------+--------------------------------------------+ - | CPU | Intel® Core™ i9-10920X CPU @ 3.50GHz | Intel® Core™ i7-8700T CPU @ 2.40GHz | - +--------------------------+-------------------------------------------+--------------------------------------------+ - | Selected CPU Flags | Hyper Threading, Turbo Boost, DL Boost | Hyper Threading, Turbo Boost, DL Boost | - +--------------------------+-------------------------------------------+--------------------------------------------+ - | CPU Thermal Design Power | 165W | 35 W | - +--------------------------+-------------------------------------------+--------------------------------------------+ - | Operating System | Ubuntu 20.04.4 LTS | Ubuntu 20.04.4 LTS | - +--------------------------+-------------------------------------------+--------------------------------------------+ - | Kernel Version | 5.4.0-107-generic | 5.4.0-107-generic | - +--------------------------+-------------------------------------------+--------------------------------------------+ - | BIOS Vendor | American Megatrends Inc. | American Megatrends Inc. | - +--------------------------+-------------------------------------------+--------------------------------------------+ - | BIOS Version & Release | 0702; date: 06/10/2020 | 2401; date: 07/15/2019 | - +--------------------------+-------------------------------------------+--------------------------------------------+ - | Docker Version | 19.03.13 | 19.03.14 | - +--------------------------+-------------------------------------------+--------------------------------------------+ - | Network Speed | 10 Gb/s | 10 Gb/s | - +--------------------------+-------------------------------------------+--------------------------------------------+ - - -.. dropdown:: Platform with Intel® Core™ i7-8700T - - .. table:: - :widths: 25 25 50 - - +--------------------------+-------------------------------------------+--------------------------------------------+ - | | Server Platform | Client Platform | - +==========================+===========================================+============================================+ - | Motherboard | ASUSTeK COMPUTER INC. PRIME Z370-P | ASUSTeK COMPUTER INC. PRIME X299-A II | - +--------------------------+-------------------------------------------+--------------------------------------------+ - | Memory | Corsair 4 x 16GB @ 2133 MT/s DDR4 | Corsair 4 x 16GB @ 2666 MT/s DDR4 | - +--------------------------+-------------------------------------------+--------------------------------------------+ - | CPU | Intel® Core™ i7-8700T CPU @ 2.40GHz | Intel® Core™ i9-10920X CPU @ 3.50GHz | - +--------------------------+-------------------------------------------+--------------------------------------------+ - | Selected CPU Flags | Hyper Threading, Turbo Boost | Hyper Threading, Turbo Boost | - +--------------------------+-------------------------------------------+--------------------------------------------+ - | CPU Thermal Design Power | 35W | 165 W | - +--------------------------+-------------------------------------------+--------------------------------------------+ - | Operating System | Ubuntu 20.04.4 LTS | Ubuntu 20.04.4 LTS | - +--------------------------+-------------------------------------------+--------------------------------------------+ - | Kernel Version | 5.4.0-107-generic | 5.4.0-107-generic | - +--------------------------+-------------------------------------------+--------------------------------------------+ - | BIOS Vendor | American Megatrends Inc. | American Megatrends Inc. | - +--------------------------+-------------------------------------------+--------------------------------------------+ - | BIOS Version & Release | 2401; date: 07/15/2019 | 0702; date: 06/10/2020 | - +--------------------------+-------------------------------------------+--------------------------------------------+ - | Docker Version | 19.03.14 | 19.03.13 | - +--------------------------+-------------------------------------------+--------------------------------------------+ - | Network Speed | 10 Gb/s | 10 Gb/s | - +--------------------------+-------------------------------------------+--------------------------------------------+ - -.. dropdown:: Platform with Intel® Core™ i5-8500 - - .. table:: - :widths: 25 25 50 - - +--------------------------+-------------------------------------------+--------------------------------------------+ - | | Server Platform | Client Platform | - +==========================+===========================================+============================================+ - | Motherboard | ASUSTeK COMPUTER INC. PRIME Z370-A | Gigabyte Technology Co., Ltd. Z390 UD | - +--------------------------+-------------------------------------------+--------------------------------------------+ - | Memory | Corsair 2 x 16GB @ 2133 MT/s DDR4 | 029E 4 x 8GB @ 2400 MT/s DDR4 | - +--------------------------+-------------------------------------------+--------------------------------------------+ - | CPU | Intel® Core™ i5-8500 CPU @ 3.00GHz | Intel® Core™ i3-8100 CPU @ 3.60GHz | - +--------------------------+-------------------------------------------+--------------------------------------------+ - | Selected CPU Flags | Turbo Boost | | - +--------------------------+-------------------------------------------+--------------------------------------------+ - | CPU Thermal Design Power | 65W | 65 W | - +--------------------------+-------------------------------------------+--------------------------------------------+ - | Operating System | Ubuntu 20.04.4 LTS | Ubuntu 20.04.1 LTS | - +--------------------------+-------------------------------------------+--------------------------------------------+ - | Kernel Version | 5.4.0-113-generic | 5.4.0-52-generic | - +--------------------------+-------------------------------------------+--------------------------------------------+ - | BIOS Vendor | American Megatrends Inc. | American Megatrends Inc. | - +--------------------------+-------------------------------------------+--------------------------------------------+ - | BIOS Version & Release | 3004; date: 07/12/2021 | F10j; date: 09/16/2020 | - +--------------------------+-------------------------------------------+--------------------------------------------+ - | Docker Version | 19.03.13 | 20.10.0 | - +--------------------------+-------------------------------------------+--------------------------------------------+ - | Network Speed | 40 Gb/s | 40 Gb/s | - +--------------------------+-------------------------------------------+--------------------------------------------+ - -@endsphinxdirective \ No newline at end of file + + + +Platform & Configurations +#################################### + +For a listing of all platforms and configurations used for testing, refer to the following: + +.. button-link:: _static/benchmarks_files/platform_list_22.3.pdf + :color: primary + :outline: + + :material-regular:`download;1.5em` Click for Hardware Platforms [PDF] + +.. button-link:: _static/benchmarks_files/OV-2022.3-system-info-detailed.xlsx + :color: primary + :outline: + + :material-regular:`download;1.5em` Click for Configuration Details [XLSX] + +.. the files above need to be changed to the proper ones!!! + +The presented performance benchmark numbers are based on the release 2022.2 of the Intel® Distribution of OpenVINO™ toolkit. +The benchmark application loads the OpenVINO™ Runtime and executes inferences on the specified hardware (CPU, GPU or GNA). +It measures the time spent on actual inference (excluding any pre or post processing) and then reports on the inferences per second (or Frames Per Second). + +Disclaimers +#################################### + +Intel® Distribution of OpenVINO™ toolkit performance benchmark numbers are based on release 2022.3. + +Intel technologies’ features and benefits depend on system configuration and may require enabled hardware, software or service activation. Learn more at intel.com, or from the OEM or retailer. Performance results are based on testing as of November 16, 2022 and may not reflect all publicly available updates. See configuration disclosure for details. No product can be absolutely secure. + +Performance varies by use, configuration and other factors. Learn more at `www.intel.com/PerformanceIndex `__. + +Your costs and results may vary. + +Intel optimizations, for Intel compilers or other products, may not optimize to the same degree for non-Intel products. + +© Intel Corporation. Intel, the Intel logo, and other Intel marks are trademarks of Intel Corporation or its subsidiaries. Other names and brands may be claimed as the property of others. + + +@endsphinxdirective diff --git a/docs/benchmarks/performance_int8_vs_fp32.md b/docs/benchmarks/performance_int8_vs_fp32.md index 7faed00e38e8e1..b90ac906e56d13 100644 --- a/docs/benchmarks/performance_int8_vs_fp32.md +++ b/docs/benchmarks/performance_int8_vs_fp32.md @@ -1,12 +1,16 @@ # Model Accuracy {#openvino_docs_performance_int8_vs_fp32} -The following table presents the absolute accuracy drop calculated as the accuracy difference between FP32 and INT8 representations of a model on two platforms + +@sphinxdirective + +The following table presents the absolute accuracy drop calculated as the accuracy difference +between FP32 and INT8 representations of a model on two platforms. * A - Intel® Core™ i9-9000K (AVX2) * B - Intel® Xeon® 6338, (VNNI) * C - Intel® Flex-170 -@sphinxdirective + .. list-table:: Model Accuracy :header-rows: 1 diff --git a/docs/dlstreamer.md b/docs/dlstreamer.md index eb21196221ff0c..13214a98a60163 100644 --- a/docs/dlstreamer.md +++ b/docs/dlstreamer.md @@ -1,5 +1,7 @@ # Intel® Deep Learning Streamer (Intel® DL Streamer) {#openvino_docs_dlstreamer} +@sphinxdirective + Intel® DL Streamer is a streaming media analytics framework, based on GStreamer* multimedia framework, for creating complex media analytics pipelines. Intel® DL Streamer makes Media analytics easy: @@ -9,10 +11,13 @@ Intel® DL Streamer makes Media analytics easy: * Analyze video and audio streams, create actionable results, capture results, and send them to the cloud * Leverage the efficiency and computational power of Intel hardware platforms -Go to [Intel® DL Streamer documentation website](https://dlstreamer.github.io) for information on how to download, install, and use. +Go to `Intel® DL Streamer documentation website `__ for information on how to download, install, and use. **Media analytics** is the analysis of audio & video streams to detect, classify, track, identify and count objects, events and people. The analyzed results can be used to take actions, coordinate events, identify patterns and gain insights across multiple domains. **Media analytics pipelines** transform media streams into insights through audio / video processing, inference, and analytics operations across multiple IP blocks. -\* Other names and brands may be claimed as the property of others. \ No newline at end of file +\* Other names and brands may be claimed as the property of others. + +@endsphinxdirective + diff --git a/docs/gapi/face_beautification.md b/docs/gapi/face_beautification.md index 8fff5ba95a267c..085ef40a041a9b 100644 --- a/docs/gapi/face_beautification.md +++ b/docs/gapi/face_beautification.md @@ -1,434 +1,510 @@ # Implementing a Face Beautification Algorithm {#openvino_docs_gapi_face_beautification} -## Introduction +@sphinxdirective + +Introduction +############ + In this tutorial you will learn: * Basics of a sample face beautification algorithm; * How to infer different networks inside a pipeline with G-API; * How to run a G-API pipeline on a video stream. -## Prerequisites +Prerequisites +############# + This sample requires: * PC with GNU/Linux or Microsoft Windows (Apple macOS is supported but was not tested) -* OpenCV 4.2 or higher built with [Intel® Distribution of OpenVINO™ Toolkit](https://software.intel.com/content/www/us/en/develop/tools/openvino-toolkit.html) (building with [Intel® TBB](https://www.threadingbuildingblocks.org/intel-tbb-tutorial) is a plus) -* The following pre-trained models from the [Open Model Zoo](@ref omz_models_group_intel) - * [face-detection-adas-0001](@ref omz_models_model_face_detection_adas_0001) - * [facial-landmarks-35-adas-0002](@ref omz_models_model_facial_landmarks_35_adas_0002) +* OpenCV 4.2 or higher built with `Intel® Distribution of OpenVINO™ Toolkit `__ (building with `Intel® TBB `__ is a plus) +* The following pre-trained models from the :doc:`Open Model Zoo ` -To download the models from the Open Model Zoo, use the [Model Downloader](@ref omz_tools_downloader) tool. + * `face-detection-adas-0001 `__ + * `facial-landmarks-35-adas-0002 `__ -## Face Beautification Algorithm -We will implement a simple face beautification algorithm using a combination of modern Deep Learning techniques and traditional Computer Vision. The general idea behind the algorithm is to make face skin smoother while preserving face features like eyes or a mouth contrast. The algorithm identifies parts of the face using a DNN inference, applies different filters to the parts found, and then combines it into the final result using basic image arithmetics: +To download the models from the Open Model Zoo, use the :doc:`Model Downloader ` tool. + +Face Beautification Algorithm +############################# -![Face Beautification Algorithm](../img/gapi_face_beautification_algorithm.png) +We will implement a simple face beautification algorithm using a combination of modern Deep Learning techniques and traditional Computer Vision. The general idea behind the algorithm is to make face skin smoother while preserving face features like eyes or a mouth contrast. The algorithm identifies parts of the face using a DNN inference, applies different filters to the parts found, and then combines it into the final result using basic image arithmetics: +.. image:: _static/images/gapi_face_beautification_algorithm.png Briefly the algorithm is described as follows: -- Input image \f$I\f$ is passed to unsharp mask and bilateral filters - (\f$U\f$ and \f$L\f$ respectively); -- Input image \f$I\f$ is passed to an SSD-based face detector; -- SSD result (a \f$[1 \times 1 \times 200 \times 7]\f$ blob) is parsed and converted to an array of faces; + +- Input image :math:`I` is passed to unsharp mask and bilateral filters + (\f$U\f$ and :math:`L` respectively); +- Input image :math:`I` is passed to an SSD-based face detector; +- SSD result (a :math:`[1 \times 1 \times 200 \times 7]` blob) is parsed and converted to an array of faces; - Every face is passed to a landmarks detector; - Based on landmarks found for every face, three image masks are generated: - - A background mask \f$b\f$ -- indicating which areas from the original image to keep as-is; - - A face part mask \f$p\f$ -- identifying regions to preserve (sharpen). - - A face skin mask \f$s\f$ -- identifying regions to blur; -- The final result \f$O\f$ is a composition of features above calculated as \f$O = b*I + p*U + s*L\f$. + + - A background mask :math:`b` -- indicating which areas from the original image to keep as-is; + - A face part mask :math:`p` -- identifying regions to preserve (sharpen). + - A face skin mask :math:`s` -- identifying regions to blur; +- The final result :math:`O` is a composition of features above calculated as :math:`O = b\*I + p\*U + s\*L`. Generating face element masks based on a limited set of features (just 35 per face, including all its parts) is not very trivial and is described in the sections below. -## Constructing a G-API Pipeline +Constructing a G-API Pipeline +############################# + +Declare Deep Learning Topologies +++++++++++++++++++++++++++++++++ -### Declare Deep Learning Topologies This sample is using two DNN detectors. Every network takes one input and produces one output. In G-API, networks are defined with macro G_API_NET(): -```cpp -G_API_NET(FaceDetector, , "face_detector"); -G_API_NET(LandmDetector, , "landm_detector"); -``` + +.. code-block:: cpp + + G_API_NET(FaceDetector, , "face_detector"); + G_API_NET(LandmDetector, , "landm_detector"); + To get more information, see Declaring Deep Learning topologies described in the "Face Analytics pipeline" tutorial. -### Describe the Processing Graph +Describe the Processing Graph ++++++++++++++++++++++++++++++ + The code below generates a graph for the algorithm above: -```cpp -cv::GComputation pipeline([=]() -{ - cv::GMat gimgIn; // input - cv::GMat faceOut = cv::gapi::infer(gimgIn); - GArrayROI garRects = custom::GFacePostProc::on(faceOut, gimgIn, config::kConfThresh); // post-proc - cv::GArray landmOut = cv::gapi::infer(garRects, gimgIn); - cv::GArray garElems; // | - cv::GArray garJaws; // |output arrays - std::tie(garElems, garJaws) = custom::GLandmPostProc::on(landmOut, garRects); // post-proc - cv::GArray garElsConts; // face elements - cv::GArray garFaceConts; // whole faces - std::tie(garElsConts, garFaceConts) = custom::GGetContours::on(garElems, garJaws); // interpolation - cv::GMat mskSharp = custom::GFillPolyGContours::on(gimgIn, garElsConts); // | - cv::GMat mskSharpG = cv::gapi::gaussianBlur(mskSharp, config::kGKernelSize, // | - config::kGSigma); // | - cv::GMat mskBlur = custom::GFillPolyGContours::on(gimgIn, garFaceConts); // | - cv::GMat mskBlurG = cv::gapi::gaussianBlur(mskBlur, config::kGKernelSize, // | - config::kGSigma); // |draw masks - // The first argument in mask() is Blur as we want to subtract from // | - // BlurG the next step: // | - cv::GMat mskBlurFinal = mskBlurG - cv::gapi::mask(mskBlurG, mskSharpG); // | - cv::GMat mskFacesGaussed = mskBlurFinal + mskSharpG; // | - cv::GMat mskFacesWhite = cv::gapi::threshold(mskFacesGaussed, 0, 255, cv::THRESH_BINARY); // | - cv::GMat mskNoFaces = cv::gapi::bitwise_not(mskFacesWhite); // | - cv::GMat gimgBilat = custom::GBilatFilter::on(gimgIn, config::kBSize, - config::kBSigmaCol, config::kBSigmaSp); - cv::GMat gimgSharp = custom::unsharpMask(gimgIn, config::kUnshSigma, - config::kUnshStrength); - // Applying the masks - // Custom function mask3C() should be used instead of just gapi::mask() - // as mask() provides CV_8UC1 source only (and we have CV_8U3C) - cv::GMat gimgBilatMasked = custom::mask3C(gimgBilat, mskBlurFinal); - cv::GMat gimgSharpMasked = custom::mask3C(gimgSharp, mskSharpG); - cv::GMat gimgInMasked = custom::mask3C(gimgIn, mskNoFaces); - cv::GMat gimgBeautif = gimgBilatMasked + gimgSharpMasked + gimgInMasked; - return cv::GComputation(cv::GIn(gimgIn), cv::GOut(gimgBeautif, - cv::gapi::copy(gimgIn), - garFaceConts, - garElsConts, - garRects)); -}); -``` -The resulting graph is a mixture of G-API's standard operations, user-defined operations (namespace custom::), and DNN inference. The generic function `cv::gapi::infer<>()` allows you to trigger inference within the pipeline; networks to infer are specified as template parameters. The sample code is using two versions of `cv::gapi::infer<>()`: + +.. code-block:: cpp + + cv::GComputation pipeline([=]() + { + cv::GMat gimgIn; // input + cv::GMat faceOut = cv::gapi::infer(gimgIn); + GArrayROI garRects = custom::GFacePostProc::on(faceOut, gimgIn, config::kConfThresh); // post-proc + cv::GArray landmOut = cv::gapi::infer(garRects, gimgIn); + cv::GArray garElems; // | + cv::GArray garJaws; // |output arrays + std::tie(garElems, garJaws) = custom::GLandmPostProc::on(landmOut, garRects); // post-proc + cv::GArray garElsConts; // face elements + cv::GArray garFaceConts; // whole faces + std::tie(garElsConts, garFaceConts) = custom::GGetContours::on(garElems, garJaws); // interpolation + cv::GMat mskSharp = custom::GFillPolyGContours::on(gimgIn, garElsConts); // | + cv::GMat mskSharpG = cv::gapi::gaussianBlur(mskSharp, config::kGKernelSize, // | + config::kGSigma); // | + cv::GMat mskBlur = custom::GFillPolyGContours::on(gimgIn, garFaceConts); // | + cv::GMat mskBlurG = cv::gapi::gaussianBlur(mskBlur, config::kGKernelSize, // | + config::kGSigma); // |draw masks + // The first argument in mask() is Blur as we want to subtract from // | + // BlurG the next step: // | + cv::GMat mskBlurFinal = mskBlurG - cv::gapi::mask(mskBlurG, mskSharpG); // | + cv::GMat mskFacesGaussed = mskBlurFinal + mskSharpG; // | + cv::GMat mskFacesWhite = cv::gapi::threshold(mskFacesGaussed, 0, 255, cv::THRESH_BINARY); // | + cv::GMat mskNoFaces = cv::gapi::bitwise_not(mskFacesWhite); // | + cv::GMat gimgBilat = custom::GBilatFilter::on(gimgIn, config::kBSize, + config::kBSigmaCol, config::kBSigmaSp); + cv::GMat gimgSharp = custom::unsharpMask(gimgIn, config::kUnshSigma, + config::kUnshStrength); + // Applying the masks + // Custom function mask3C() should be used instead of just gapi::mask() + // as mask() provides CV_8UC1 source only (and we have CV_8U3C) + cv::GMat gimgBilatMasked = custom::mask3C(gimgBilat, mskBlurFinal); + cv::GMat gimgSharpMasked = custom::mask3C(gimgSharp, mskSharpG); + cv::GMat gimgInMasked = custom::mask3C(gimgIn, mskNoFaces); + cv::GMat gimgBeautif = gimgBilatMasked + gimgSharpMasked + gimgInMasked; + return cv::GComputation(cv::GIn(gimgIn), cv::GOut(gimgBeautif, + cv::gapi::copy(gimgIn), + garFaceConts, + garElsConts, + garRects)); + }); + + +The resulting graph is a mixture of G-API's standard operations, user-defined operations (namespace custom::), and DNN inference. The generic function ``cv::gapi::infer<>()`` allows you to trigger inference within the pipeline; networks to infer are specified as template parameters. The sample code is using two versions of ``cv::gapi::infer<>()``: * A frame-oriented one is used to detect faces on the input frame. -* An ROI-list oriented one is used to run landmarks inference on a list of faces – this version produces an array of landmarks per every face. -More on this in "Face Analytics pipeline" ([Building a GComputation](@ref gapi_ifd_gcomputation) section). - -### Unsharp mask in G-API -The unsharp mask \f$U\f$ for image \f$I\f$ is defined as: - -\f[U = I - s * L(M(I)),\f] - -where \f$M()\f$ is a median filter, \f$L()\f$ is the Laplace operator, and \f$s\f$ is a strength coefficient. While G-API doesn't provide this function out-of-the-box, it is expressed naturally with the existing G-API operations: - -```cpp -inline cv::GMat custom::unsharpMask(const cv::GMat &src, - const int sigma, - const float strength) -{ - cv::GMat blurred = cv::gapi::medianBlur(src, sigma); - cv::GMat laplacian = custom::GLaplacian::on(blurred, CV_8U); - return (src - (laplacian * strength)); -} -``` +* An ROI-list oriented one is used to run landmarks inference on a list of faces – this version produces an array of landmarks per every face. More on this in "Face Analytics pipeline" (:ref:`Building a GComputation ` section). + +Unsharp mask in G-API ++++++++++++++++++++++ + +The unsharp mask :math:`U` for image :math:`I` is defined as: + +.. math:: + + U = I - s \* L(M(I)) + +where :math:`M()` is a median filter, :math:`L()` is the Laplace operator, and :math:`s` is a strength coefficient. While G-API doesn't provide this function out-of-the-box, it is expressed naturally with the existing G-API operations: + +.. code-block:: cpp + + inline cv::GMat custom::unsharpMask(const cv::GMat &src, + const int sigma, + const float strength) + { + cv::GMat blurred = cv::gapi::medianBlur(src, sigma); + cv::GMat laplacian = custom::GLaplacian::on(blurred, CV_8U); + return (src - (laplacian \* strength)); + } + Note that the code snipped above is a regular C++ function defined with G-API types. Users can write functions like this to simplify graph construction; when called, this function just puts the relevant nodes to the pipeline it is used in. -## Custom Operations +Custom Operations +################# + The face beautification graph is using custom operations extensively. This chapter focuses on the most interesting kernels, refer to G-API Kernel API for general information on defining operations and implementing kernels in G-API. -### Face detector post-processing +Face detector post-processing ++++++++++++++++++++++++++++++ + A face detector output is converted to an array of faces with the following kernel: -```cpp -using VectorROI = std::vector; -GAPI_OCV_KERNEL(GCPUFacePostProc, GFacePostProc) -{ - static void run(const cv::Mat &inDetectResult, - const cv::Mat &inFrame, - const float faceConfThreshold, - VectorROI &outFaces) - { - const int kObjectSize = 7; - const int imgCols = inFrame.size().width; - const int imgRows = inFrame.size().height; - const cv::Rect borders({0, 0}, inFrame.size()); - outFaces.clear(); - const int numOfDetections = inDetectResult.size[2]; - const float *data = inDetectResult.ptr(); - for (int i = 0; i < numOfDetections; i++) - { - const float faceId = data[i * kObjectSize + 0]; - if (faceId < 0.f) // indicates the end of detections - { - break; - } - const float faceConfidence = data[i * kObjectSize + 2]; - // We can cut detections by the `conf` field - // to avoid mistakes of the detector. - if (faceConfidence > faceConfThreshold) - { - const float left = data[i * kObjectSize + 3]; - const float top = data[i * kObjectSize + 4]; - const float right = data[i * kObjectSize + 5]; - const float bottom = data[i * kObjectSize + 6]; - // These are normalized coordinates and are between 0 and 1; - // to get the real pixel coordinates we should multiply it by - // the image sizes respectively to the directions: - cv::Point tl(toIntRounded(left * imgCols), - toIntRounded(top * imgRows)); - cv::Point br(toIntRounded(right * imgCols), - toIntRounded(bottom * imgRows)); - outFaces.push_back(cv::Rect(tl, br) & borders); - } - } - } -}; -``` - -### Facial Landmarks Post-Processing +.. code-block:: cpp + + using VectorROI = std::vector; + GAPI_OCV_KERNEL(GCPUFacePostProc, GFacePostProc) + { + static void run(const cv::Mat &inDetectResult, + const cv::Mat &inFrame, + const float faceConfThreshold, + VectorROI &outFaces) + { + const int kObjectSize = 7; + const int imgCols = inFrame.size().width; + const int imgRows = inFrame.size().height; + const cv::Rect borders({0, 0}, inFrame.size()); + outFaces.clear(); + const int numOfDetections = inDetectResult.size[2]; + const float \*data = inDetectResult.ptr(); + for (int i = 0; i < numOfDetections; i++) + { + const float faceId = data[i \* kObjectSize + 0]; + if (faceId < 0.f) // indicates the end of detections + { + break; + } + const float faceConfidence = data[i \* kObjectSize + 2]; + // We can cut detections by the `conf` field + // to avoid mistakes of the detector. + if (faceConfidence > faceConfThreshold) + { + const float left = data[i \* kObjectSize + 3]; + const float top = data[i \* kObjectSize + 4]; + const float right = data[i \* kObjectSize + 5]; + const float bottom = data[i \* kObjectSize + 6]; + // These are normalized coordinates and are between 0 and 1; + // to get the real pixel coordinates we should multiply it by + // the image sizes respectively to the directions: + cv::Point tl(toIntRounded(left \* imgCols), + toIntRounded(top \* imgRows)); + cv::Point br(toIntRounded(right \* imgCols), + toIntRounded(bottom \* imgRows)); + outFaces.push_back(cv::Rect(tl, br) & borders); + } + } + } + }; + +Facial Landmarks Post-Processing +++++++++++++++++++++++++++++++++ + The algorithm infers locations of face elements (like the eyes, the mouth and the head contour itself) using a generic facial landmarks detector (details) from OpenVINO™ Open Model Zoo. However, the detected landmarks as-is are not enough to generate masks — this operation requires regions of interest on the face represented by closed contours, so some interpolation is applied to get them. This landmarks processing and interpolation is performed by the following kernel: -```cpp -GAPI_OCV_KERNEL(GCPUGetContours, GGetContours) -{ - static void run(const std::vector &vctPtsFaceElems, // 18 landmarks of the facial elements - const std::vector &vctCntJaw, // 17 landmarks of a jaw - std::vector &vctElemsContours, - std::vector &vctFaceContours) - { - size_t numFaces = vctCntJaw.size(); - CV_Assert(numFaces == vctPtsFaceElems.size()); - CV_Assert(vctElemsContours.size() == 0ul); - CV_Assert(vctFaceContours.size() == 0ul); - // vctFaceElemsContours will store all the face elements' contours found - // in an input image, namely 4 elements (two eyes, nose, mouth) for every detected face: - vctElemsContours.reserve(numFaces * 4); - // vctFaceElemsContours will store all the faces' contours found in an input image: - vctFaceContours.reserve(numFaces); - Contour cntFace, cntLeftEye, cntRightEye, cntNose, cntMouth; - cntNose.reserve(4); - for (size_t i = 0ul; i < numFaces; i++) - { - // The face elements contours - // A left eye: - // Approximating the lower eye contour by half-ellipse (using eye points) and storing in cntLeftEye: - cntLeftEye = getEyeEllipse(vctPtsFaceElems[i][1], vctPtsFaceElems[i][0]); - // Pushing the left eyebrow clock-wise: - cntLeftEye.insert(cntLeftEye.end(), {vctPtsFaceElems[i][12], vctPtsFaceElems[i][13], - vctPtsFaceElems[i][14]}); - // A right eye: - // Approximating the lower eye contour by half-ellipse (using eye points) and storing in vctRightEye: - cntRightEye = getEyeEllipse(vctPtsFaceElems[i][2], vctPtsFaceElems[i][3]); - // Pushing the right eyebrow clock-wise: - cntRightEye.insert(cntRightEye.end(), {vctPtsFaceElems[i][15], vctPtsFaceElems[i][16], - vctPtsFaceElems[i][17]}); - // A nose: - // Storing the nose points clock-wise - cntNose.clear(); - cntNose.insert(cntNose.end(), {vctPtsFaceElems[i][4], vctPtsFaceElems[i][7], - vctPtsFaceElems[i][5], vctPtsFaceElems[i][6]}); - // A mouth: - // Approximating the mouth contour by two half-ellipses (using mouth points) and storing in vctMouth: - cntMouth = getPatchedEllipse(vctPtsFaceElems[i][8], vctPtsFaceElems[i][9], - vctPtsFaceElems[i][10], vctPtsFaceElems[i][11]); - // Storing all the elements in a vector: - vctElemsContours.insert(vctElemsContours.end(), {cntLeftEye, cntRightEye, cntNose, cntMouth}); - // The face contour: - // Approximating the forehead contour by half-ellipse (using jaw points) and storing in vctFace: - cntFace = getForeheadEllipse(vctCntJaw[i][0], vctCntJaw[i][16], vctCntJaw[i][8]); - // The ellipse is drawn clock-wise, but jaw contour points goes vice versa, so it's necessary to push - // cntJaw from the end to the begin using a reverse iterator: - std::copy(vctCntJaw[i].crbegin(), vctCntJaw[i].crend(), std::back_inserter(cntFace)); - // Storing the face contour in another vector: - vctFaceContours.push_back(cntFace); - } - } -}; -``` + +.. code-block:: cpp + + GAPI_OCV_KERNEL(GCPUGetContours, GGetContours) + { + static void run(const std::vector &vctPtsFaceElems, // 18 landmarks of the facial elements + const std::vector &vctCntJaw, // 17 landmarks of a jaw + std::vector &vctElemsContours, + std::vector &vctFaceContours) + { + size_t numFaces = vctCntJaw.size(); + CV_Assert(numFaces == vctPtsFaceElems.size()); + CV_Assert(vctElemsContours.size() == 0ul); + CV_Assert(vctFaceContours.size() == 0ul); + // vctFaceElemsContours will store all the face elements' contours found + // in an input image, namely 4 elements (two eyes, nose, mouth) for every detected face: + vctElemsContours.reserve(numFaces \* 4); + // vctFaceElemsContours will store all the faces' contours found in an input image: + vctFaceContours.reserve(numFaces); + Contour cntFace, cntLeftEye, cntRightEye, cntNose, cntMouth; + cntNose.reserve(4); + for (size_t i = 0ul; i < numFaces; i++) + { + // The face elements contours + // A left eye: + // Approximating the lower eye contour by half-ellipse (using eye points) and storing in cntLeftEye: + cntLeftEye = getEyeEllipse(vctPtsFaceElems[i][1], vctPtsFaceElems[i][0]); + // Pushing the left eyebrow clock-wise: + cntLeftEye.insert(cntLeftEye.end(), {vctPtsFaceElems[i][12], vctPtsFaceElems[i][13], + vctPtsFaceElems[i][14]}); + // A right eye: + // Approximating the lower eye contour by half-ellipse (using eye points) and storing in vctRightEye: + cntRightEye = getEyeEllipse(vctPtsFaceElems[i][2], vctPtsFaceElems[i][3]); + // Pushing the right eyebrow clock-wise: + cntRightEye.insert(cntRightEye.end(), {vctPtsFaceElems[i][15], vctPtsFaceElems[i][16], + vctPtsFaceElems[i][17]}); + // A nose: + // Storing the nose points clock-wise + cntNose.clear(); + cntNose.insert(cntNose.end(), {vctPtsFaceElems[i][4], vctPtsFaceElems[i][7], + vctPtsFaceElems[i][5], vctPtsFaceElems[i][6]}); + // A mouth: + // Approximating the mouth contour by two half-ellipses (using mouth points) and storing in vctMouth: + cntMouth = getPatchedEllipse(vctPtsFaceElems[i][8], vctPtsFaceElems[i][9], + vctPtsFaceElems[i][10], vctPtsFaceElems[i][11]); + // Storing all the elements in a vector: + vctElemsContours.insert(vctElemsContours.end(), {cntLeftEye, cntRightEye, cntNose, cntMouth}); + // The face contour: + // Approximating the forehead contour by half-ellipse (using jaw points) and storing in vctFace: + cntFace = getForeheadEllipse(vctCntJaw[i][0], vctCntJaw[i][16], vctCntJaw[i][8]); + // The ellipse is drawn clock-wise, but jaw contour points goes vice versa, so it's necessary to push + // cntJaw from the end to the begin using a reverse iterator: + std::copy(vctCntJaw[i].crbegin(), vctCntJaw[i].crend(), std::back_inserter(cntFace)); + // Storing the face contour in another vector: + vctFaceContours.push_back(cntFace); + } + } + }; + + The kernel takes two arrays of denormalized landmarks coordinates and returns an array of elements' closed contours and an array of faces' closed contours; in other words, outputs are, the first, an array of contours of image areas to be sharpened and, the second, another one to be smoothed. -Here and below `Contour` is a vector of points. +Here and below ``Contour`` is a vector of points. + +Get an Eye Contour +------------------ -#### Get an Eye Contour Eye contours are estimated with the following function: -```cpp -inline int custom::getLineInclinationAngleDegrees(const cv::Point &ptLeft, const cv::Point &ptRight) -{ - const cv::Point residual = ptRight - ptLeft; - if (residual.y == 0 && residual.x == 0) - return 0; - else - return toIntRounded(atan2(toDouble(residual.y), toDouble(residual.x)) * 180.0 / CV_PI); -} -inline Contour custom::getEyeEllipse(const cv::Point &ptLeft, const cv::Point &ptRight) -{ - Contour cntEyeBottom; - const cv::Point ptEyeCenter((ptRight + ptLeft) / 2); - const int angle = getLineInclinationAngleDegrees(ptLeft, ptRight); - const int axisX = toIntRounded(cv::norm(ptRight - ptLeft) / 2.0); - // According to research, in average a Y axis of an eye is approximately - // 1/3 of an X one. - const int axisY = axisX / 3; - // We need the lower part of an ellipse: - static constexpr int kAngEyeStart = 0; - static constexpr int kAngEyeEnd = 180; - cv::ellipse2Poly(ptEyeCenter, cv::Size(axisX, axisY), angle, kAngEyeStart, kAngEyeEnd, config::kAngDelta, - cntEyeBottom); - return cntEyeBottom; -} -``` -Briefly, this function restores the bottom side of an eye by a half-ellipse based on two points in left and right eye corners. In fact, `cv::ellipse2Poly()` is used to approximate the eye region, and the function only defines ellipse parameters based on just two points: -- The ellipse center and the \f$X\f$ half-axis calculated by two eye Points. -- The \f$Y\f$ half-axis calculated according to the assumption that an average eye width is \f$1/3\f$ of its length. -- The start and the end angles which are 0 and 180 (refer to `cv::ellipse()` documentation). + +.. code-block:: cpp + + inline int custom::getLineInclinationAngleDegrees(const cv::Point &ptLeft, const cv::Point &ptRight) + { + const cv::Point residual = ptRight - ptLeft; + if (residual.y == 0 && residual.x == 0) + return 0; + else + return toIntRounded(atan2(toDouble(residual.y), toDouble(residual.x)) \* 180.0 / CV_PI); + } + inline Contour custom::getEyeEllipse(const cv::Point &ptLeft, const cv::Point &ptRight) + { + Contour cntEyeBottom; + const cv::Point ptEyeCenter((ptRight + ptLeft) / 2); + const int angle = getLineInclinationAngleDegrees(ptLeft, ptRight); + const int axisX = toIntRounded(cv::norm(ptRight - ptLeft) / 2.0); + // According to research, in average a Y axis of an eye is approximately + // 1/3 of an X one. + const int axisY = axisX / 3; + // We need the lower part of an ellipse: + static constexpr int kAngEyeStart = 0; + static constexpr int kAngEyeEnd = 180; + cv::ellipse2Poly(ptEyeCenter, cv::Size(axisX, axisY), angle, kAngEyeStart, kAngEyeEnd, config::kAngDelta, + cntEyeBottom); + return cntEyeBottom; + } + +Briefly, this function restores the bottom side of an eye by a half-ellipse based on two points in left and right eye corners. In fact, ``cv::ellipse2Poly()`` is used to approximate the eye region, and the function only defines ellipse parameters based on just two points: + +- The ellipse center and the :math:`X` half-axis calculated by two eye Points. +- The :math:`Y` half-axis calculated according to the assumption that an average eye width is :math:`1/3` of its length. +- The start and the end angles which are 0 and 180 (refer to ``cv::ellipse()`` documentation). - The angle delta: how much points to produce in the contour. - The inclination angle of the axes. -The use of the `atan2()` instead of just `atan()` in function `custom::getLineInclinationAngleDegrees()` is essential as it allows to return a negative value depending on the `x` and the `y` signs so we can get the right angle even in case of upside-down face arrangement (if we put the points in the right order, of course). +The use of the ``atan2()`` instead of just ``atan()`` in function ``custom::getLineInclinationAngleDegrees()`` is essential as it allows to return a negative value depending on the ``x`` and the ``y`` signs so we can get the right angle even in case of upside-down face arrangement (if we put the points in the right order, of course). + +Get a Forehead Contour +---------------------- -#### Get a Forehead Contour The function approximates the forehead contour: -```cpp -inline Contour custom::getForeheadEllipse(const cv::Point &ptJawLeft, - const cv::Point &ptJawRight, - const cv::Point &ptJawLower) -{ - Contour cntForehead; - // The point amid the top two points of a jaw: - const cv::Point ptFaceCenter((ptJawLeft + ptJawRight) / 2); - // This will be the center of the ellipse. - // The angle between the jaw and the vertical: - const int angFace = getLineInclinationAngleDegrees(ptJawLeft, ptJawRight); - // This will be the inclination of the ellipse - // Counting the half-axis of the ellipse: - const double jawWidth = cv::norm(ptJawLeft - ptJawRight); - // A forehead width equals the jaw width, and we need a half-axis: - const int axisX = toIntRounded(jawWidth / 2.0); - const double jawHeight = cv::norm(ptFaceCenter - ptJawLower); - // According to research, in average a forehead is approximately 2/3 of - // a jaw: - const int axisY = toIntRounded(jawHeight * 2 / 3.0); - // We need the upper part of an ellipse: - static constexpr int kAngForeheadStart = 180; - static constexpr int kAngForeheadEnd = 360; - cv::ellipse2Poly(ptFaceCenter, cv::Size(axisX, axisY), angFace, kAngForeheadStart, kAngForeheadEnd, - config::kAngDelta, cntForehead); - return cntForehead; -} -``` -As we have only jaw points in our detected landmarks, we have to get a half-ellipse based on three points of a jaw: the leftmost, the rightmost and the lowest one. The jaw width is assumed to be equal to the forehead width and the latter is calculated using the left and the right points. Speaking of the \f$Y\f$ axis, we have no points to get it directly, and instead assume that the forehead height is about \f$2/3\f$ of the jaw height, which can be figured out from the face center (the middle between the left and right points) and the lowest jaw point. - -### Draw Masks + +.. code-block:: cpp + + inline Contour custom::getForeheadEllipse(const cv::Point &ptJawLeft, + const cv::Point &ptJawRight, + const cv::Point &ptJawLower) + { + Contour cntForehead; + // The point amid the top two points of a jaw: + const cv::Point ptFaceCenter((ptJawLeft + ptJawRight) / 2); + // This will be the center of the ellipse. + // The angle between the jaw and the vertical: + const int angFace = getLineInclinationAngleDegrees(ptJawLeft, ptJawRight); + // This will be the inclination of the ellipse + // Counting the half-axis of the ellipse: + const double jawWidth = cv::norm(ptJawLeft - ptJawRight); + // A forehead width equals the jaw width, and we need a half-axis: + const int axisX = toIntRounded(jawWidth / 2.0); + const double jawHeight = cv::norm(ptFaceCenter - ptJawLower); + // According to research, in average a forehead is approximately 2/3 of + // a jaw: + const int axisY = toIntRounded(jawHeight \* 2 / 3.0); + // We need the upper part of an ellipse: + static constexpr int kAngForeheadStart = 180; + static constexpr int kAngForeheadEnd = 360; + cv::ellipse2Poly(ptFaceCenter, cv::Size(axisX, axisY), angFace, kAngForeheadStart, kAngForeheadEnd, + config::kAngDelta, cntForehead); + return cntForehead; + } + + +As we have only jaw points in our detected landmarks, we have to get a half-ellipse based on three points of a jaw: the leftmost, the rightmost and the lowest one. The jaw width is assumed to be equal to the forehead width and the latter is calculated using the left and the right points. Speaking of the :math:`Y` axis, we have no points to get it directly, and instead assume that the forehead height is about :math:`2/3` of the jaw height, which can be figured out from the face center (the middle between the left and right points) and the lowest jaw point. + +Draw Masks +++++++++++ + When we have all the contours needed, you are able to draw masks: -```cpp -cv::GMat mskSharp = custom::GFillPolyGContours::on(gimgIn, garElsConts); // | -cv::GMat mskSharpG = cv::gapi::gaussianBlur(mskSharp, config::kGKernelSize, // | - config::kGSigma); // | -cv::GMat mskBlur = custom::GFillPolyGContours::on(gimgIn, garFaceConts); // | -cv::GMat mskBlurG = cv::gapi::gaussianBlur(mskBlur, config::kGKernelSize, // | - config::kGSigma); // |draw masks -// The first argument in mask() is Blur as we want to subtract from // | -// BlurG the next step: // | -cv::GMat mskBlurFinal = mskBlurG - cv::gapi::mask(mskBlurG, mskSharpG); // | -cv::GMat mskFacesGaussed = mskBlurFinal + mskSharpG; // | -cv::GMat mskFacesWhite = cv::gapi::threshold(mskFacesGaussed, 0, 255, cv::THRESH_BINARY); // | -cv::GMat mskNoFaces = cv::gapi::bitwise_not(mskFacesWhite); // | -``` +.. code-block:: cpp + + cv::GMat mskSharp = custom::GFillPolyGContours::on(gimgIn, garElsConts); // | + cv::GMat mskSharpG = cv::gapi::gaussianBlur(mskSharp, config::kGKernelSize, // | + config::kGSigma); // | + cv::GMat mskBlur = custom::GFillPolyGContours::on(gimgIn, garFaceConts); // | + cv::GMat mskBlurG = cv::gapi::gaussianBlur(mskBlur, config::kGKernelSize, // | + config::kGSigma); // |draw masks + // The first argument in mask() is Blur as we want to subtract from // | + // BlurG the next step: // | + cv::GMat mskBlurFinal = mskBlurG - cv::gapi::mask(mskBlurG, mskSharpG); // | + cv::GMat mskFacesGaussed = mskBlurFinal + mskSharpG; // | + cv::GMat mskFacesWhite = cv::gapi::threshold(mskFacesGaussed, 0, 255, cv::THRESH_BINARY); // | + cv::GMat mskNoFaces = cv::gapi::bitwise_not(mskFacesWhite); // | + The steps to get the masks are: + * the "sharp" mask calculation: - * fill the contours that should be sharpened; - * blur that to get the "sharp" mask (`mskSharpG`); + + * fill the contours that should be sharpened; + * blur that to get the "sharp" mask (``mskSharpG``); * the "bilateral" mask calculation: - * fill all the face contours fully; - * blur that; - * subtract areas which intersect with the "sharp" mask --- and get the "bilateral" mask (`mskBlurFinal`); + + * fill all the face contours fully; + * blur that; + * subtract areas which intersect with the "sharp" mask --- and get the "bilateral" mask (``mskBlurFinal``); * the background mask calculation: - * add two previous masks - * set all non-zero pixels of the result as 255 (by `cv::gapi::threshold()`) - * revert the output (by `cv::gapi::bitwise_not`) to get the background mask (`mskNoFaces`). + + * add two previous masks + * set all non-zero pixels of the result as 255 (by ``cv::gapi::threshold()``) + * revert the output (by ``cv::gapi::bitwise_not``) to get the background mask (``mskNoFaces``). + +Configuring and Running the Pipeline +#################################### -## Configuring and Running the Pipeline Once the graph is fully expressed, we can finally compile it and run on real data. G-API graph compilation is the stage where the G-API framework actually understands which kernels and networks to use. This configuration happens via G-API compilation arguments. -### DNN Parameters +DNN Parameters +++++++++++++++ + This sample is using OpenVINO™ Toolkit OpenVINO Runtime backend for DL inference, which is configured the following way: -```cpp -auto faceParams = cv::gapi::ie::Params -{ - /*std::string*/ faceXmlPath, - /*std::string*/ faceBinPath, - /*std::string*/ faceDevice -}; -auto landmParams = cv::gapi::ie::Params -{ - /*std::string*/ landmXmlPath, - /*std::string*/ landmBinPath, - /*std::string*/ landmDevice -}; -``` -Every `cv::gapi::ie::Params<>` object is related to the network specified in its template argument. We should pass there the network type we have defined in `G_API_NET()` in the early beginning of the tutorial. - -Network parameters are then wrapped in `cv::gapi::NetworkPackage`: -```cpp -auto networks = cv::gapi::networks(faceParams, landmParams); -``` - -More details in "Face Analytics Pipeline" ([Configuring the Pipeline](@ref gapi_ifd_configuration) section). - -### Kernel Packages + +.. code-block:: cpp + + auto faceParams = cv::gapi::ie::Params + { + /\*std::string\*/ faceXmlPath, + /\*std::string\*/ faceBinPath, + /\*std::string\*/ faceDevice + }; + auto landmParams = cv::gapi::ie::Params + { + /\*std::string\*/ landmXmlPath, + /\*std::string\*/ landmBinPath, + /\*std::string\*/ landmDevice + }; + +Every ``cv::gapi::ie::Params<>`` object is related to the network specified in its template argument. We should pass there the network type we have defined in ``G_API_NET()`` in the early beginning of the tutorial. + +Network parameters are then wrapped in ``cv::gapi::NetworkPackage``: + +.. code-block:: cpp + + auto networks = cv::gapi::networks(faceParams, landmParams); + + +More details in "Face Analytics Pipeline" (:ref:`Configuring the Pipeline ` section). + +Kernel Packages ++++++++++++++++ + In this example we use a lot of custom kernels, in addition to that we use Fluid backend to optimize out memory for G-API's standard kernels where applicable. The resulting kernel package is formed like this: -```cpp -auto customKernels = cv::gapi::kernels(); -auto kernels = cv::gapi::combine(cv::gapi::core::fluid::kernels(), - customKernels); -``` - -### Compiling the Streaming Pipeline + +.. code-block:: cpp + + auto customKernels = cv::gapi::kernels(); + auto kernels = cv::gapi::combine(cv::gapi::core::fluid::kernels(), + customKernels); + + +Compiling the Streaming Pipeline +++++++++++++++++++++++++++++++++ + G-API optimizes execution for video streams when compiled in the "Streaming" mode. -```cpp -cv::GStreamingCompiled stream = pipeline.compileStreaming(cv::compile_args(kernels, networks)); -``` -More on this in "Face Analytics Pipeline" ([Configuring the pipeline](@ref gapi_ifd_configuration) section). - -### Running the streaming pipeline -In order to run the G-API streaming pipeline, all we need is to specify the input video source, call `cv::GStreamingCompiled::start()`, and then fetch the pipeline processing results: -```cpp -if (parser.has("input")) -{ - stream.setSource(cv::gapi::wip::make_src(parser.get("input"))); -} - auto out_vector = cv::gout(imgBeautif, imgShow, vctFaceConts, - vctElsConts, vctRects); - stream.start(); - avg.start(); - while (stream.running()) - { - if (!stream.try_pull(std::move(out_vector))) - { - // Use a try_pull() to obtain data. - // If there's no data, let UI refresh (and handle keypress) - if (cv::waitKey(1) >= 0) break; - else continue; - } - frames++; - // Drawing face boxes and landmarks if necessary: - if (flgLandmarks == true) - { - cv::polylines(imgShow, vctFaceConts, config::kClosedLine, - config::kClrYellow); - cv::polylines(imgShow, vctElsConts, config::kClosedLine, - config::kClrYellow); - } - if (flgBoxes == true) - for (auto rect : vctRects) - cv::rectangle(imgShow, rect, config::kClrGreen); - cv::imshow(config::kWinInput, imgShow); - cv::imshow(config::kWinFaceBeautification, imgBeautif); - } -``` +.. code-block:: cpp + + cv::GStreamingCompiled stream = pipeline.compileStreaming(cv::compile_args(kernels, networks)); + +More on this in "Face Analytics Pipeline" (:ref:`Configuring the Pipeline ` section). + +Running the streaming pipeline +++++++++++++++++++++++++++++++ + + +In order to run the G-API streaming pipeline, all we need is to specify the input video source, call ``cv::GStreamingCompiled::start()``, and then fetch the pipeline processing results: + +.. code-block:: cpp + + if (parser.has("input")) + { + stream.setSource(cv::gapi::wip::make_src(parser.get("input"))); + } + auto out_vector = cv::gout(imgBeautif, imgShow, vctFaceConts, + vctElsConts, vctRects); + stream.start(); + avg.start(); + while (stream.running()) + { + if (!stream.try_pull(std::move(out_vector))) + { + // Use a try_pull() to obtain data. + // If there's no data, let UI refresh (and handle keypress) + if (cv::waitKey(1) >= 0) break; + else continue; + } + frames++; + // Drawing face boxes and landmarks if necessary: + if (flgLandmarks == true) + { + cv::polylines(imgShow, vctFaceConts, config::kClosedLine, + config::kClrYellow); + cv::polylines(imgShow, vctElsConts, config::kClosedLine, + config::kClrYellow); + } + if (flgBoxes == true) + for (auto rect : vctRects) + cv::rectangle(imgShow, rect, config::kClrGreen); + cv::imshow(config::kWinInput, imgShow); + cv::imshow(config::kWinFaceBeautification, imgBeautif); + } + + Once results are ready and can be pulled from the pipeline we display it on the screen and handle GUI events. -See [Running the pipeline](@ref gapi_ifd_running) section in the "Face Analytics Pipeline" tutorial for more details. +See :ref:`Running the pipeline ` section in the "Face Analytics Pipeline" tutorial for more details. + +Conclusion +########## -## Conclusion The tutorial has two goals: to show the use of brand new features of G-API introduced in OpenCV 4.2, and give a basic understanding on a sample face beautification algorithm. The result of the algorithm application: -![Face Beautification example](../img/gapi_face_beautification_example.jpg) +.. image:: _static/images/gapi_face_beautification_example.jpg + +On the test machine (Intel® Core™ i7-8700) the G-API-optimized video pipeline outperforms its serial (non-pipelined) version by a factor of 2.7 – meaning that for such a non-trivial graph, the proper pipelining can bring almost 3x increase in performance. + +@endsphinxdirective -On the test machine (Intel® Core™ i7-8700) the G-API-optimized video pipeline outperforms its serial (non-pipelined) version by a factor of 2.7 – meaning that for such a non-trivial graph, the proper pipelining can bring almost 3x increase in performance. \ No newline at end of file diff --git a/docs/gapi/gapi_face_analytics_pipeline.md b/docs/gapi/gapi_face_analytics_pipeline.md index 781161df8ba281..8ea422f7800bfc 100644 --- a/docs/gapi/gapi_face_analytics_pipeline.md +++ b/docs/gapi/gapi_face_analytics_pipeline.md @@ -1,152 +1,177 @@ # Building a Face Analytics Pipeline {#openvino_docs_gapi_gapi_face_analytics_pipeline} -## Overview +@sphinxdirective + +Overview +######## + In this tutorial you will learn: * How to integrate Deep Learning inference in a G-API graph. * How to run a G-API graph on a video stream and obtain data from it. -## Prerequisites +Prerequisites +############# + This sample requires: * PC with GNU/Linux or Microsoft Windows (Apple macOS is supported but was not tested) -* OpenCV 4.2 or higher built with [Intel® Distribution of OpenVINO™ Toolkit](https://software.intel.com/content/www/us/en/develop/tools/openvino-toolkit.html) (building with [Intel® TBB](https://www.threadingbuildingblocks.org/intel-tbb-tutorial) is a plus) -* The following pre-trained models from the [Open Model Zoo](@ref omz_models_group_intel): - * [face-detection-adas-0001](@ref omz_models_model_face_detection_adas_0001) - * [age-gender-recognition-retail-0013](@ref omz_models_model_age_gender_recognition_retail_0013) - * [emotions-recognition-retail-0003](@ref omz_models_model_emotions_recognition_retail_0003) +* OpenCV 4.2 or higher built with `Intel® Distribution of OpenVINO™ Toolkit `__ (building with `Intel® TBB `__ is a plus) +* The following pre-trained models from the :doc:`Open Model Zoo ` + + * `face-detection-adas-0001 `__ + * `age-gender-recognition-retail-0013 `__ + * `emotions-recognition-retail-0003 `__ + +To download the models from the Open Model Zoo, use the :doc:`Model Downloader ` tool. -To download the models from the Open Model Zoo, use the [Model Downloader](@ref omz_tools_downloader) tool. +Introduction: Why G-API +####################### -## Introduction: Why G-API Many computer vision algorithms run on a video stream rather than on individual images. Stream processing usually consists of multiple steps – like decode, preprocessing, detection, tracking, classification (on detected objects), and visualization – forming a *video processing pipeline*. Moreover, many these steps of such pipeline can run in parallel – modern platforms have different hardware blocks on the same chip like decoders and GPUs, and extra accelerators can be plugged in as extensions for deep learning offload. Given all this manifold of options and a variety in video analytics algorithms, managing such pipelines effectively quickly becomes a problem. For sure it can be done manually, but this approach doesn't scale: if a change is required in the algorithm (e.g. a new pipeline step is added), or if it is ported on a new platform with different capabilities, the whole pipeline needs to be re-optimized. Starting with version 4.2, OpenCV offers a solution to this problem. OpenCV G-API now can manage Deep Learning inference (a cornerstone of any modern analytics pipeline) with a traditional Computer Vision as well as video capturing/decoding, all in a single pipeline. G-API takes care of pipelining itself – so if the algorithm or platform changes, the execution model adapts to it automatically. -## Pipeline Overview -Our sample application is based on [Interactive Face Detection](@ref omz_demos_interactive_face_detection_demo_cpp) demo from Open Model Zoo. A simplified pipeline consists of the following steps: +Pipeline Overview +################# + +Our sample application is based on `Interactive Face Detection `__ demo from Open Model Zoo. A simplified pipeline consists of the following steps: 1. Image acquisition and decode 2. Detection with preprocessing 3. Classification with preprocessing for every detected object with two networks 4. Visualization -![Face Analytics Pipeline Overview](../img/gapi_face_analytics_pipeline.png) +.. image:: _static/images/gapi_face_analytics_pipeline.png + +.. _gapi_ifd_constructing: + +Construct a pipeline +#################### + +Constructing a G-API graph for a video streaming case does not differ much from a `regular usage `__ of G-API -- it is still about defining graph *data* (with cv::GMat, ``cv::GScalar``, and ``cv::GArray``) and *operations* over it. Inference also becomes an operation in the graph, but is defined in a little bit different way. -## Construct a pipeline {#gapi_ifd_constructing} +.. _gapi_ifd_declaring_nets: -Constructing a G-API graph for a video streaming case does not differ much from a [regular usage](https://docs.opencv.org/4.5.0/d0/d1e/gapi.html#gapi_example) of G-API -- it is still about defining graph *data* (with cv::GMat, `cv::GScalar`, and `cv::GArray`) and *operations* over it. Inference also becomes an operation in the graph, but is defined in a little bit different way. +Declare Deep Learning topologies +++++++++++++++++++++++++++++++++ -### Declare Deep Learning topologies {#gapi_ifd_declaring_nets} +In contrast with traditional CV functions (see `core `__ and `imgproc `__) where G-API declares distinct operations for every function, inference in G-API is a single generic operation ``cv::gapi::infer<>``. As usual, it is just an interface and it can be implemented in a number of ways under the hood. In OpenCV 4.2, only OpenVINO™ Runtime-based backend is available, and OpenCV's own DNN module-based backend is to come. -In contrast with traditional CV functions (see [core](https://docs.opencv.org/4.5.0/df/d1f/group__gapi__core.html) and [imgproc](https://docs.opencv.org/4.5.0/d2/d00/group__gapi__imgproc.html)) where G-API declares distinct operations for every function, inference in G-API is a single generic operation `cv::gapi::infer<>`. As usual, it is just an interface and it can be implemented in a number of ways under the hood. In OpenCV 4.2, only OpenVINO™ Runtime-based backend is available, and OpenCV's own DNN module-based backend is to come. +The ``cv::gapi::infer<>`` is _parametrized_ by the details of a topology we are going to execute. Like operations, topologies in G-API are strongly typed and are defined with a special macro ``G_API_NET()``: -`cv::gapi::infer<>` is _parametrized_ by the details of a topology we are going to execute. Like operations, topologies in G-API are strongly typed and are defined with a special macro `G_API_NET()`: +.. code-block:: cpp + + // Face detector: takes one Mat, returns another Mat + G_API_NET(Faces, , "face-detector"); + // Age/Gender recognition - takes one Mat, returns two: + // one for Age and one for Gender. In G-API, multiple-return-value operations + // are defined using std::tuple<>. + using AGInfo = std::tuple; + G_API_NET(AgeGender, , "age-gender-recoginition"); + // Emotion recognition - takes one Mat, returns another. + G_API_NET(Emotions, , "emotions-recognition"); -```cpp -// Face detector: takes one Mat, returns another Mat -G_API_NET(Faces, , "face-detector"); -// Age/Gender recognition - takes one Mat, returns two: -// one for Age and one for Gender. In G-API, multiple-return-value operations -// are defined using std::tuple<>. -using AGInfo = std::tuple; -G_API_NET(AgeGender, , "age-gender-recoginition"); -// Emotion recognition - takes one Mat, returns another. -G_API_NET(Emotions, , "emotions-recognition"); -``` +Similar to how operations are defined with ``G_API_OP()``, network description requires three parameters: -Similar to how operations are defined with `G_API_OP()`, network description requires three parameters: 1. A type name. Every defined topology is declared as a distinct C++ type which is used further in the program -- see below. -2. A `std::function<>`-like API signature. G-API traits networks as regular "functions" which take and return data. Here network `Faces` (a detector) takes a `cv::GMat` and returns a `cv::GMat`, while network `AgeGender` is known to provide two outputs (age and gender blobs, respectively) -- so its has a `std::tuple<>` as a return type. +2. A ``std::function<>``-like API signature. G-API traits networks as regular "functions" which take and return data. Here network ``Faces`` (a detector) takes a ``cv::GMat`` and returns a ``cv::GMat``, while network ``AgeGender`` is known to provide two outputs (age and gender blobs, respectively) -- so its has a ``std::tuple<>`` as a return type. 3. A topology name -- can be any non-empty string, G-API is using these names to distinguish networks inside. Names should be unique in the scope of a single graph. -## Building a GComputation {#gapi_ifd_gcomputation} +.. _gapi_ifd_gcomputation: + +Building a GComputation +####################### Now the above pipeline is expressed in G-API like this: -```cpp -cv::GComputation pp([]() { - // Declare an empty GMat - the beginning of the pipeline. - cv::GMat in; - // Run face detection on the input frame. Result is a single GMat, - // internally representing an 1x1x200x7 SSD output. - // This is a single-patch version of infer: - // - Inference is running on the whole input image; - // - Image is converted and resized to the network's expected format - // automatically. - cv::GMat detections = cv::gapi::infer(in); - // Parse SSD output to a list of ROI (rectangles) using - // a custom kernel. Note: parsing SSD may become a "standard" kernel. - cv::GArray faces = custom::PostProc::on(detections, in); - // Now run Age/Gender model on every detected face. This model has two - // outputs (for age and gender respectively). - // A special ROI-list-oriented form of infer<>() is used here: - // - First input argument is the list of rectangles to process, - // - Second one is the image where to take ROI from; - // - Crop/Resize/Layout conversion happens automatically for every image patch - // from the list - // - Inference results are also returned in form of list (GArray<>) - // - Since there're two outputs, infer<> return two arrays (via std::tuple). - cv::GArray ages; - cv::GArray genders; - std::tie(ages, genders) = cv::gapi::infer(faces, in); - // Recognize emotions on every face. - // ROI-list-oriented infer<>() is used here as well. - // Since custom::Emotions network produce a single output, only one - // GArray<> is returned here. - cv::GArray emotions = cv::gapi::infer(faces, in); - // Return the decoded frame as a result as well. - // Input matrix can't be specified as output one, so use copy() here - // (this copy will be optimized out in the future). - cv::GMat frame = cv::gapi::copy(in); - // Now specify the computation's boundaries - our pipeline consumes - // one images and produces five outputs. - return cv::GComputation(cv::GIn(in), - cv::GOut(frame, faces, ages, genders, emotions)); -}); -``` - -Every pipeline starts with declaring empty data objects – which act as inputs to the pipeline. Then we call a generic `cv::gapi::infer<>` specialized to Faces detection network. `cv::gapi::infer<>` inherits its signature from its template parameter – and in this case it expects one input cv::GMat and produces one output cv::GMat. - -In this sample we use a pre-trained SSD-based network and its output needs to be parsed to an array of detections (object regions of interest, ROIs). It is done by a custom operation custom::PostProc, which returns an array of rectangles (of type `cv::GArray`) back to the pipeline. This operation also filters out results by a confidence threshold – and these details are hidden in the kernel itself. Still, at the moment of graph construction we operate with interfaces only and don't need actual kernels to express the pipeline – so the implementation of this post-processing will be listed later. - -After detection result output is parsed to an array of objects, we can run classification on any of those. G-API doesn't support syntax for in-graph loops like `for_each()` yet, but instead `cv::gapi::infer<>` comes with a special list-oriented overload. - -User can call `cv::gapi::infer<>` with a `cv::GArray` as the first argument, so then G-API assumes it needs to run the associated network on every rectangle from the given list of the given frame (second argument). Result of such operation is also a list – a cv::GArray of `cv::GMat`. - -Since AgeGender network itself produces two outputs, it's output type for a list-based version of `cv::gapi::infer` is a tuple of arrays. We use `std::tie()` to decompose this input into two distinct objects. - -Emotions network produces a single output so its list-based inference's return type is `cv::GArray`. - -## Configure the Pipeline {#gapi_ifd_configuration} +.. code-block:: cpp + + cv::GComputation pp([]() { + // Declare an empty GMat - the beginning of the pipeline. + cv::GMat in; + // Run face detection on the input frame. Result is a single GMat, + // internally representing an 1x1x200x7 SSD output. + // This is a single-patch version of infer: + // - Inference is running on the whole input image; + // - Image is converted and resized to the network's expected format + // automatically. + cv::GMat detections = cv::gapi::infer(in); + // Parse SSD output to a list of ROI (rectangles) using + // a custom kernel. Note: parsing SSD may become a "standard" kernel. + cv::GArray faces = custom::PostProc::on(detections, in); + // Now run Age/Gender model on every detected face. This model has two + // outputs (for age and gender respectively). + // A special ROI-list-oriented form of infer<>() is used here: + // - First input argument is the list of rectangles to process, + // - Second one is the image where to take ROI from; + // - Crop/Resize/Layout conversion happens automatically for every image patch + // from the list + // - Inference results are also returned in form of list (GArray<>) + // - Since there're two outputs, infer<> return two arrays (via std::tuple). + cv::GArray ages; + cv::GArray genders; + std::tie(ages, genders) = cv::gapi::infer(faces, in); + // Recognize emotions on every face. + // ROI-list-oriented infer<>() is used here as well. + // Since custom::Emotions network produce a single output, only one + // GArray<> is returned here. + cv::GArray emotions = cv::gapi::infer(faces, in); + // Return the decoded frame as a result as well. + // Input matrix can't be specified as output one, so use copy() here + // (this copy will be optimized out in the future). + cv::GMat frame = cv::gapi::copy(in); + // Now specify the computation's boundaries - our pipeline consumes + // one images and produces five outputs. + return cv::GComputation(cv::GIn(in), + cv::GOut(frame, faces, ages, genders, emotions)); + }); + +Every pipeline starts with declaring empty data objects – which act as inputs to the pipeline. Then we call a generic ``cv::gapi::infer<>`` specialized to Faces detection network. ``cv::gapi::infer<>`` inherits its signature from its template parameter – and in this case it expects one input cv::GMat and produces one output cv::GMat. + +In this sample we use a pre-trained SSD-based network and its output needs to be parsed to an array of detections (object regions of interest, ROIs). It is done by a custom operation custom::PostProc, which returns an array of rectangles (of type ``cv::GArray``) back to the pipeline. This operation also filters out results by a confidence threshold – and these details are hidden in the kernel itself. Still, at the moment of graph construction we operate with interfaces only and don't need actual kernels to express the pipeline – so the implementation of this post-processing will be listed later. + +After detection result output is parsed to an array of objects, we can run classification on any of those. G-API doesn't support syntax for in-graph loops like ``for_each()`` yet, but instead ``cv::gapi::infer<>`` comes with a special list-oriented overload. + +User can call ``cv::gapi::infer<>`` with a ``cv::GArray`` as the first argument, so then G-API assumes it needs to run the associated network on every rectangle from the given list of the given frame (second argument). Result of such operation is also a list – a cv::GArray of ``cv::GMat``. + +Since AgeGender network itself produces two outputs, it's output type for a list-based version of ``cv::gapi::infer`` is a tuple of arrays. We use ``std::tie()`` to decompose this input into two distinct objects. + +Emotions network produces a single output so its list-based inference's return type is ``cv::GArray``. + +.. _gapi_ifd_configuration: + +Configure the Pipeline +###################### G-API strictly separates construction from configuration -- with the idea to keep algorithm code itself platform-neutral. In the above listings we only declared our operations and expressed the overall data flow, but didn't even mention that we use OpenVINO™. We only described *what* we do, but not *how* we do it. Keeping these two aspects clearly separated is the design goal for G-API. Platform-specific details arise when the pipeline is *compiled* -- i.e. is turned from a declarative to an executable form. The way *how* to run stuff is specified via compilation arguments, and new inference/streaming features are no exception from this rule. -G-API is built on backends which implement interfaces (see [Architecture](https://docs.opencv.org/4.5.0/de/d4d/gapi_hld.html) and [Kernels](kernel_api.md) for details) -- thus `cv::gapi::infer<>` is a function which can be implemented by different backends. In OpenCV 4.2, only OpenVINO™ Runtime backend for inference is available. Every inference backend in G-API has to provide a special parameterizable structure to express *backend-specific* neural network parameters -- and in this case, it is `cv::gapi::ie::Params`: - -```cpp -auto det_net = cv::gapi::ie::Params { - cmd.get("fdm"), // read cmd args: path to topology IR - cmd.get("fdw"), // read cmd args: path to weights - cmd.get("fdd"), // read cmd args: device specifier -}; -auto age_net = cv::gapi::ie::Params { - cmd.get("agem"), // read cmd args: path to topology IR - cmd.get("agew"), // read cmd args: path to weights - cmd.get("aged"), // read cmd args: device specifier -}.cfgOutputLayers({ "age_conv3", "prob" }); -auto emo_net = cv::gapi::ie::Params { - cmd.get("emom"), // read cmd args: path to topology IR - cmd.get("emow"), // read cmd args: path to weights - cmd.get("emod"), // read cmd args: device specifier -}; -``` - -Here we define three parameter objects: `det_net`, `age_net`, and `emo_net`. Every object is a `cv::gapi::ie::Params` structure parametrization for each particular network we use. On a compilation stage, G-API automatically matches network parameters with their `cv::gapi::infer<>` calls in graph using this information. +G-API is built on backends which implement interfaces (see `Architecture `__ and :doc:`Kernels ` for details) thus ``cv::gapi::infer<>`` is a function which can be implemented by different backends. In OpenCV 4.2, only OpenVINO™ Runtime backend for inference is available. Every inference backend in G-API has to provide a special parameterizable structure to express *backend-specific* neural network parameters and in this case, it is ``cv::gapi::ie::Params``: + +.. code-block:: cpp + + auto det_net = cv::gapi::ie::Params { + cmd.get("fdm"), // read cmd args: path to topology IR + cmd.get("fdw"), // read cmd args: path to weights + cmd.get("fdd"), // read cmd args: device specifier + }; + auto age_net = cv::gapi::ie::Params { + cmd.get("agem"), // read cmd args: path to topology IR + cmd.get("agew"), // read cmd args: path to weights + cmd.get("aged"), // read cmd args: device specifier + }.cfgOutputLayers({ "age_conv3", "prob" }); + auto emo_net = cv::gapi::ie::Params { + cmd.get("emom"), // read cmd args: path to topology IR + cmd.get("emow"), // read cmd args: path to weights + cmd.get("emod"), // read cmd args: device specifier + }; + + +Here we define three parameter objects: ``det_net``, ``age_net``, and ``emo_net``. Every object is a ``cv::gapi::ie::Params`` structure parametrization for each particular network we use. On a compilation stage, G-API automatically matches network parameters with their ``cv::gapi::infer<>`` calls in graph using this information. Regardless of the topology, every parameter structure is constructed with three string arguments – specific to the OpenVINO™ Runtime: @@ -155,171 +180,188 @@ Regardless of the topology, every parameter structure is constructed with three * Device where to run – "CPU", "GPU", and others – based on your OpenVINO™ Toolkit installation. These arguments are taken from the command-line parser. Once networks are defined and custom kernels are implemented, the pipeline is compiled for streaming: -```cpp -// Form a kernel package (with a single OpenCV-based implementation of our -// post-processing) and a network package (holding our three networks). -auto kernels = cv::gapi::kernels(); -auto networks = cv::gapi::networks(det_net, age_net, emo_net); -// Compile our pipeline and pass our kernels & networks as -// parameters. This is the place where G-API learns which -// networks & kernels we're actually operating with (the graph -// description itself known nothing about that). -auto cc = pp.compileStreaming(cv::compile_args(kernels, networks)); -``` -`cv::GComputation::compileStreaming()` triggers a special video-oriented form of graph compilation where G-API is trying to optimize throughput. Result of this compilation is an object of special type `cv::GStreamingCompiled` – in contrast to a traditional callable `cv::GCompiled`, these objects are closer to media players in their semantics. +.. code-block:: cpp + + // Form a kernel package (with a single OpenCV-based implementation of our + // post-processing) and a network package (holding our three networks). + auto kernels = cv::gapi::kernels(); + auto networks = cv::gapi::networks(det_net, age_net, emo_net); + // Compile our pipeline and pass our kernels & networks as + // parameters. This is the place where G-API learns which + // networks & kernels we're actually operating with (the graph + // description itself known nothing about that). + auto cc = pp.compileStreaming(cv::compile_args(kernels, networks)); + + +The ``cv::GComputation::compileStreaming()`` triggers a special video-oriented form of graph compilation where G-API is trying to optimize throughput. Result of this compilation is an object of special type ``cv::GStreamingCompiled`` – in contrast to a traditional callable ``cv::GCompiled``, these objects are closer to media players in their semantics. -> **NOTE**: There is no need to pass metadata arguments describing the format of the input video stream in `cv::GComputation::compileStreaming()` – G-API figures automatically what are the formats of the input vector and adjusts the pipeline to these formats on-the-fly. User still can pass metadata there as with regular `cv::GComputation::compile()` in order to fix the pipeline to the specific input format. +.. note:: + There is no need to pass metadata arguments describing the format of the input video stream in ``cv::GComputation::compileStreaming()`` – G-API figures automatically what are the formats of the input vector and adjusts the pipeline to these formats on-the-fly. User still can pass metadata there as with regular ``cv::GComputation::compile()`` in order to fix the pipeline to the specific input format. -## Running the Pipeline {#gapi_ifd_running} +.. _gapi_ifd_running: + +Running the Pipeline +#################### Pipelining optimization is based on processing multiple input video frames simultaneously, running different steps of the pipeline in parallel. This is why it works best when the framework takes full control over the video stream. The idea behind streaming API is that user specifies an *input source* to the pipeline and then G-API manages its execution automatically until the source ends or user interrupts the execution. G-API pulls new image data from the source and passes it to the pipeline for processing. -Streaming sources are represented by the interface `cv::gapi::wip::IStreamSource`. Objects implementing this interface may be passed to `GStreamingCompiled` as regular inputs via `cv::gin()` helper function. In OpenCV 4.2, only one streaming source is allowed per pipeline -- this requirement will be relaxed in the future. +Streaming sources are represented by the interface ``cv::gapi::wip::IStreamSource``. Objects implementing this interface may be passed to ``GStreamingCompiled`` as regular inputs via ``cv::gin()`` helper function. In OpenCV 4.2, only one streaming source is allowed per pipeline -- this requirement will be relaxed in the future. -OpenCV comes with a great class cv::VideoCapture and by default G-API ships with a stream source class based on it -- `cv::gapi::wip::GCaptureSource`. Users can implement their own -streaming sources e.g. using [VAAPI](https://01.org/vaapi) or other Media or Networking APIs. +OpenCV comes with a great class cv::VideoCapture and by default G-API ships with a stream source class based on it -- ``cv::gapi::wip::GCaptureSource``. Users can implement their own +streaming sources e.g. using `VAAPI `__ or other Media or Networking APIs. Sample application specifies the input source as follows: -```cpp -auto in_src = cv::gapi::wip::make_src(input); -cc.setSource(cv::gin(in_src)); -``` - -Please note that a GComputation may still have multiple inputs like `cv::GMat`, `cv::GScalar`, or `cv::GArray` objects. User can pass their respective host-side types (`cv::Mat`, `cv::Scalar`, `std::vector<>`) in the input vector as well, but in Streaming mode these objects will create "endless" constant streams. Mixing a real video source stream and a const data stream is allowed. - -Running a pipeline is easy – just call `cv::GStreamingCompiled::start()` and fetch your data with blocking `cv::GStreamingCompiled::pull()` or non-blocking `cv::GStreamingCompiled::try_pull()`; repeat until the stream ends: - -```cpp -// After data source is specified, start the execution -cc.start(); -// Declare data objects we will be receiving from the pipeline. -cv::Mat frame; // The captured frame itself -std::vector faces; // Array of detected faces -std::vector out_ages; // Array of inferred ages (one blob per face) -std::vector out_genders; // Array of inferred genders (one blob per face) -std::vector out_emotions; // Array of classified emotions (one blob per face) -// Implement different execution policies depending on the display option -// for the best performance. -while (cc.running()) { - auto out_vector = cv::gout(frame, faces, out_ages, out_genders, out_emotions); - if (no_show) { - // This is purely a video processing. No need to balance - // with UI rendering. Use a blocking pull() to obtain - // data. Break the loop if the stream is over. - if (!cc.pull(std::move(out_vector))) - break; - } else if (!cc.try_pull(std::move(out_vector))) { - // Use a non-blocking try_pull() to obtain data. - // If there's no data, let UI refresh (and handle keypress) - if (cv::waitKey(1) >= 0) break; - else continue; - } - // At this point we have data for sure (obtained in either - // blocking or non-blocking way). - frames++; - labels::DrawResults(frame, faces, out_ages, out_genders, out_emotions); - labels::DrawFPS(frame, frames, avg.fps(frames)); - if (!no_show) cv::imshow("Out", frame); -} -``` + +.. code-block:: cpp + + auto in_src = cv::gapi::wip::make_src(input); + cc.setSource(cv::gin(in_src)); + +Please note that a GComputation may still have multiple inputs like ``cv::GMat``, ``cv::GScalar``, or ``cv::GArray`` objects. User can pass their respective host-side types (``cv::Mat``, ``cv::Scalar``, ``std::vector<>``) in the input vector as well, but in Streaming mode these objects will create "endless" constant streams. Mixing a real video source stream and a const data stream is allowed. + +Running a pipeline is easy – just call ``cv::GStreamingCompiled::start()`` and fetch your data with blocking ``cv::GStreamingCompiled::pull()`` or non-blocking ``cv::GStreamingCompiled::try_pull()``; repeat until the stream ends: + +.. code-block:: cpp + + // After data source is specified, start the execution + cc.start(); + // Declare data objects we will be receiving from the pipeline. + cv::Mat frame; // The captured frame itself + std::vector faces; // Array of detected faces + std::vector out_ages; // Array of inferred ages (one blob per face) + std::vector out_genders; // Array of inferred genders (one blob per face) + std::vector out_emotions; // Array of classified emotions (one blob per face) + // Implement different execution policies depending on the display option + // for the best performance. + while (cc.running()) { + auto out_vector = cv::gout(frame, faces, out_ages, out_genders, out_emotions); + if (no_show) { + // This is purely a video processing. No need to balance + // with UI rendering. Use a blocking pull() to obtain + // data. Break the loop if the stream is over. + if (!cc.pull(std::move(out_vector))) + break; + } else if (!cc.try_pull(std::move(out_vector))) { + // Use a non-blocking try_pull() to obtain data. + // If there's no data, let UI refresh (and handle keypress) + if (cv::waitKey(1) >= 0) break; + else continue; + } + // At this point we have data for sure (obtained in either + // blocking or non-blocking way). + frames++; + labels::DrawResults(frame, faces, out_ages, out_genders, out_emotions); + labels::DrawFPS(frame, frames, avg.fps(frames)); + if (!no_show) cv::imshow("Out", frame); + } The above code may look complex but in fact it handles two modes – with and without graphical user interface (GUI): -* When a sample is running in a "headless" mode (`--pure` option is set), this code simply pulls data from the pipeline with the blocking `pull()` until it ends. This is the most performant mode of execution. -* When results are also displayed on the screen, the Window System needs to take some time to refresh the window contents and handle GUI events. In this case, the demo pulls data with a non-blocking `try_pull()` until there is no more data available (but it does not mark end of the stream – just means new data is not ready yet), and only then displays the latest obtained result and refreshes the screen. Reducing the time spent in GUI with this trick increases the overall performance a little bit. - -## Comparison with Serial Mode -The sample can also run in a serial mode for a reference and benchmarking purposes. In this case, a regular `cv::GComputation::compile()` is used and a regular single-frame `cv::GCompiled` object is produced; the pipelining optimization is not applied within G-API; it is the user responsibility to acquire image frames from `cv::VideoCapture` object and pass those to G-API. - -```cpp -cv::VideoCapture cap(input); -cv::Mat in_frame, frame; // The captured frame itself -std::vector faces; // Array of detected faces -std::vector out_ages; // Array of inferred ages (one blob per face) -std::vector out_genders; // Array of inferred genders (one blob per face) -std::vector out_emotions; // Array of classified emotions (one blob per face) -while (cap.read(in_frame)) { - pp.apply(cv::gin(in_frame), - cv::gout(frame, faces, out_ages, out_genders, out_emotions), - cv::compile_args(kernels, networks)); - labels::DrawResults(frame, faces, out_ages, out_genders, out_emotions); - frames++; - if (frames == 1u) { - // Start timer only after 1st frame processed -- compilation - // happens on-the-fly here - avg.start(); - } else { - // Measurfe & draw FPS for all other frames - labels::DrawFPS(frame, frames, avg.fps(frames-1)); - } - if (!no_show) { - cv::imshow("Out", frame); - if (cv::waitKey(1) >= 0) break; - } -} -``` - -On a test machine (Intel® Core™ i5-6600), with OpenCV built with [Intel® TBB](https://www.threadingbuildingblocks.org/intel-tbb-tutorial) support, detector network assigned to CPU, and classifiers to iGPU, the pipelined sample outperformes the serial one by the factor of 1.36x (thus adding +36% in overall throughput). - -## Conclusion +* When a sample is running in a "headless" mode (``--pure`` option is set), this code simply pulls data from the pipeline with the blocking ``pull()`` until it ends. This is the most performant mode of execution. +* When results are also displayed on the screen, the Window System needs to take some time to refresh the window contents and handle GUI events. In this case, the demo pulls data with a non-blocking ``try_pull()`` until there is no more data available (but it does not mark end of the stream – just means new data is not ready yet), and only then displays the latest obtained result and refreshes the screen. Reducing the time spent in GUI with this trick increases the overall performance a little bit. + +Comparison with Serial Mode +########################### + +The sample can also run in a serial mode for a reference and benchmarking purposes. In this case, a regular ``cv::GComputation::compile()`` is used and a regular single-frame ``cv::GCompiled`` object is produced; the pipelining optimization is not applied within G-API; it is the user responsibility to acquire image frames from ``cv::VideoCapture`` object and pass those to G-API. + +.. code-block:: cpp + + cv::VideoCapture cap(input); + cv::Mat in_frame, frame; // The captured frame itself + std::vector faces; // Array of detected faces + std::vector out_ages; // Array of inferred ages (one blob per face) + std::vector out_genders; // Array of inferred genders (one blob per face) + std::vector out_emotions; // Array of classified emotions (one blob per face) + while (cap.read(in_frame)) { + pp.apply(cv::gin(in_frame), + cv::gout(frame, faces, out_ages, out_genders, out_emotions), + cv::compile_args(kernels, networks)); + labels::DrawResults(frame, faces, out_ages, out_genders, out_emotions); + frames++; + if (frames == 1u) { + // Start timer only after 1st frame processed -- compilation + // happens on-the-fly here + avg.start(); + } else { + // Measurfe & draw FPS for all other frames + labels::DrawFPS(frame, frames, avg.fps(frames-1)); + } + if (!no_show) { + cv::imshow("Out", frame); + if (cv::waitKey(1) >= 0) break; + } + } + +On a test machine (Intel® Core™ i5-6600), with OpenCV built with `Intel® TBB `__ support, detector network assigned to CPU, and classifiers to iGPU, the pipelined sample outperformes the serial one by the factor of 1.36x (thus adding +36% in overall throughput). + +Conclusion +########### + G-API introduces a technological way to build and optimize hybrid pipelines. Switching to a new execution model does not require changes in the algorithm code expressed with G-API – only the way how graph is triggered differs. -## Listing: Post-Processing Kernel -G-API gives an easy way to plug custom code into the pipeline even if it is running in a streaming mode and processing tensor data. Inference results are represented by multi-dimensional `cv::Mat` objects so accessing those is as easy as with a regular DNN module. +Listing: Post-Processing Kernel +############################### + +G-API gives an easy way to plug custom code into the pipeline even if it is running in a streaming mode and processing tensor data. Inference results are represented by multi-dimensional ``cv::Mat`` objects so accessing those is as easy as with a regular DNN module. The OpenCV-based SSD post-processing kernel is defined and implemented in this sample as follows: -```cpp -// SSD Post-processing function - this is not a network but a kernel. -// The kernel body is declared separately, this is just an interface. -// This operation takes two Mats (detections and the source image), -// and returns a vector of ROI (filtered by a default threshold). -// Threshold (or a class to select) may become a parameter, but since -// this kernel is custom, it doesn't make a lot of sense. -G_API_OP(PostProc, (cv::GMat, cv::GMat)>, "custom.fd_postproc") { - static cv::GArrayDesc outMeta(const cv::GMatDesc &, const cv::GMatDesc &) { - // This function is required for G-API engine to figure out - // what the output format is, given the input parameters. - // Since the output is an array (with a specific type), - // there's nothing to describe. - return cv::empty_array_desc(); - } -}; -// OpenCV-based implementation of the above kernel. -GAPI_OCV_KERNEL(OCVPostProc, PostProc) { - static void run(const cv::Mat &in_ssd_result, - const cv::Mat &in_frame, - std::vector &out_faces) { - const int MAX_PROPOSALS = 200; - const int OBJECT_SIZE = 7; - const cv::Size upscale = in_frame.size(); - const cv::Rect surface({0,0}, upscale); - out_faces.clear(); - const float *data = in_ssd_result.ptr(); - for (int i = 0; i < MAX_PROPOSALS; i++) { - const float image_id = data[i * OBJECT_SIZE + 0]; // batch id - const float confidence = data[i * OBJECT_SIZE + 2]; - const float rc_left = data[i * OBJECT_SIZE + 3]; - const float rc_top = data[i * OBJECT_SIZE + 4]; - const float rc_right = data[i * OBJECT_SIZE + 5]; - const float rc_bottom = data[i * OBJECT_SIZE + 6]; - if (image_id < 0.f) { // indicates end of detections - break; - } - if (confidence < 0.5f) { // a hard-coded snapshot - continue; - } - // Convert floating-point coordinates to the absolute image - // frame coordinates; clip by the source image boundaries. - cv::Rect rc; - rc.x = static_cast(rc_left * upscale.width); - rc.y = static_cast(rc_top * upscale.height); - rc.width = static_cast(rc_right * upscale.width) - rc.x; - rc.height = static_cast(rc_bottom * upscale.height) - rc.y; - out_faces.push_back(rc & surface); - } - } -}; -``` \ No newline at end of file + +.. code-block:: cpp + + // SSD Post-processing function - this is not a network but a kernel. + // The kernel body is declared separately, this is just an interface. + // This operation takes two Mats (detections and the source image), + // and returns a vector of ROI (filtered by a default threshold). + // Threshold (or a class to select) may become a parameter, but since + // this kernel is custom, it doesn't make a lot of sense. + G_API_OP(PostProc, (cv::GMat, cv::GMat)>, "custom.fd_postproc") { + static cv::GArrayDesc outMeta(const cv::GMatDesc &, const cv::GMatDesc &) { + // This function is required for G-API engine to figure out + // what the output format is, given the input parameters. + // Since the output is an array (with a specific type), + // there's nothing to describe. + return cv::empty_array_desc(); + } + }; + // OpenCV-based implementation of the above kernel. + GAPI_OCV_KERNEL(OCVPostProc, PostProc) { + static void run(const cv::Mat &in_ssd_result, + const cv::Mat &in_frame, + std::vector &out_faces) { + const int MAX_PROPOSALS = 200; + const int OBJECT_SIZE = 7; + const cv::Size upscale = in_frame.size(); + const cv::Rect surface({0,0}, upscale); + out_faces.clear(); + const float \*data = in_ssd_result.ptr(); + for (int i = 0; i < MAX_PROPOSALS; i++) { + const float image_id = data[i \* OBJECT_SIZE + 0]; // batch id + const float confidence = data[i \* OBJECT_SIZE + 2]; + const float rc_left = data[i \* OBJECT_SIZE + 3]; + const float rc_top = data[i \* OBJECT_SIZE + 4]; + const float rc_right = data[i \* OBJECT_SIZE + 5]; + const float rc_bottom = data[i \* OBJECT_SIZE + 6]; + if (image_id < 0.f) { // indicates end of detections + break; + } + if (confidence < 0.5f) { // a hard-coded snapshot + continue; + } + // Convert floating-point coordinates to the absolute image + // frame coordinates; clip by the source image boundaries. + cv::Rect rc; + rc.x = static_cast(rc_left \* upscale.width); + rc.y = static_cast(rc_top \* upscale.height); + rc.width = static_cast(rc_right \* upscale.width) - rc.x; + rc.height = static_cast(rc_bottom \* upscale.height) - rc.y; + out_faces.push_back(rc & surface); + } + } + }; + +@endsphinxdirective + diff --git a/docs/gapi/gapi_intro.md b/docs/gapi/gapi_intro.md index c6568389987293..c24b6cc9dd5c4a 100644 --- a/docs/gapi/gapi_intro.md +++ b/docs/gapi/gapi_intro.md @@ -10,56 +10,64 @@ openvino_docs_gapi_face_beautification openvino_docs_gapi_gapi_face_analytics_pipeline -@endsphinxdirective - OpenCV Graph API (G-API) is an OpenCV module targeted to make regular image and video processing fast and portable. G-API is a special module in OpenCV – in contrast with the majority of other main modules, this one acts as a framework rather than some specific CV algorithm. G-API is positioned as a next level optimization enabler for computer vision, focusing not on particular CV functions but on the whole algorithm optimization. G-API provides means to define CV operations, construct graphs (in form of expressions) using it, and finally implement and run the operations for a particular backend. -The idea behind G-API is that if an algorithm can be expressed in a special embedded language (currently in C++), the framework can catch its sense and apply a number of optimizations to the whole thing automatically. Particular optimizations are selected based on which [kernels](kernel_api.md) and [backends](https://docs.opencv.org/4.5.0/dc/d1c/group__gapi__std__backends.html) are involved in the graph compilation process, for example, the graph can be offloaded to GPU via the OpenCL backend, or optimized for memory consumption with the Fluid backend. Kernels, backends, and their settings are parameters to the graph compilation, so the graph itself does not depend on any platform-specific details and can be ported easily. +The idea behind G-API is that if an algorithm can be expressed in a special embedded language (currently in C++), the framework can catch its sense and apply a number of optimizations to the whole thing automatically. Particular optimizations are selected based on which :doc:`kernels ` and `backends `__ are involved in the graph compilation process, for example, the graph can be offloaded to GPU via the OpenCL backend, or optimized for memory consumption with the Fluid backend. Kernels, backends, and their settings are parameters to the graph compilation, so the graph itself does not depend on any platform-specific details and can be ported easily. -> **NOTE**: Graph API (G-API) was introduced in the most recent major OpenCV 4.0 release and now is being actively developed. The API is volatile at the moment and there may be minor but compatibility-breaking changes in the future. +.. note:: + Graph API (G-API) was introduced in the most recent major OpenCV 4.0 release and now is being actively developed. The API is volatile at the moment and there may be minor but compatibility-breaking changes in the future. -## G-API Concepts +G-API Concepts +############## * *Graphs* are built by applying operations to data objects. - * API itself has no "graphs", it is expression-based instead. + + * API itself has no "graphs", it is expression-based instead. + * *Data objects* do not hold actual data, only capture dependencies. * *Operations* consume and produce data objects. * A graph is defined by specifying its boundaries with data objects: - * What data objects are inputs to the graph? - * What are its outputs? + + * What data objects are inputs to the graph? + * What are its outputs? The paragraphs below explain the G-API programming model and development workflow. -## Programming Model -Building graphs is easy with G-API. In fact, there is no notion of graphs exposed in the API, so the user doesn’t need to operate in terms of “nodes” and “edges” — instead, graphs are constructed implicitly via expressions in a "functional" way. Expression-based graphs are built using two major concepts: *[operations](kernel_api.md)* and *[data objects](https://docs.opencv.org/4.2.0/db/df1/group__gapi__data__objects.html)*. +Programming Model +################# + +Building graphs is easy with G-API. In fact, there is no notion of graphs exposed in the API, so the user doesn’t need to operate in terms of “nodes” and “edges” — instead, graphs are constructed implicitly via expressions in a "functional" way. Expression-based graphs are built using two major concepts: :doc:`operations ` and `data objects `__ . In G-API, every graph begins and ends with data objects; data objects are passed to operations which produce (“return”) their results — new data objects, which are then passed to other operations, and so on. You can declare their own operations, G-API does not distinguish user-defined operations from its own predefined ones in any way. -After the graph is defined, it needs to be compiled for execution. During the compilation, G-API figures out what the graph looks like, which kernels are available to run the operations in the graph, how to manage heterogeneity and to optimize the execution path. The result of graph compilation is a so-called “compiled” object. This object encapsulates the execution sequence for the graph inside and operates on real image data. You can set up the compilation process using various [compilation arguments](https://docs.opencv.org/4.5.0/dc/d1c/group__gapi__std__backends.html). Backends expose some of their options as these arguments; also, actual kernels and DL network settings are passed into the framework this way. +After the graph is defined, it needs to be compiled for execution. During the compilation, G-API figures out what the graph looks like, which kernels are available to run the operations in the graph, how to manage heterogeneity and to optimize the execution path. The result of graph compilation is a so-called “compiled” object. This object encapsulates the execution sequence for the graph inside and operates on real image data. You can set up the compilation process using various `compilation arguments `__. Backends expose some of their options as these arguments; also, actual kernels and DL network settings are passed into the framework this way. G-API supports graph compilation for two execution modes, *regular* and *streaming*, producing different types of compiled objects as the result. -* Regular compiled objects are represented with class GCompiled, which follows functor-like semantics and has an overloaded operator(). When called for execution on the given input data, the GCompiled functor blocks the current thread and processes the data immediately — like a regular C++ function. By default, G-API tries to optimize the execution time for latency in this compilation mode. + +* **Regular** compiled objects are represented with class GCompiled, which follows functor-like semantics and has an overloaded operator(). When called for execution on the given input data, the GCompiled functor blocks the current thread and processes the data immediately — like a regular C++ function. By default, G-API tries to optimize the execution time for latency in this compilation mode. * Starting with OpenCV 4.2, G-API can also produce GStreamingCompiled objects that better fit the asynchronous pipelined execution model. This compilation mode is called **streaming mode**, and G-API tries to optimize the overall throughput by implementing the pipelining technique as described above. We will use both in our example. The overall process for the regular case is summarized in the diagram below: -![G-API Programming Model](../img/gapi_programming_model.png) +.. image:: _static/images/gapi_programming_model.png -The graph is built with operations so having operations defined (**0**) is a basic prerequisite; a constructed expression graph (**1**) forms a `cv::GComputation` object; kernels (**2**) which implement operations are the basic requirement to the graph compilation (**3**); the actual execution (**4**) is handled by a `cv::GCompiled` object with takes input and produces output data. +The graph is built with operations so having operations defined (**0**) is a basic prerequisite; a constructed expression graph (**1**) forms a ``cv::GComputation`` object; kernels (**2**) which implement operations are the basic requirement to the graph compilation (**3**); the actual execution (**4**) is handled by a ``cv::GCompiled`` object with takes input and produces output data. + +Development Workflow +#################### -## Development Workflow One of the ways to organize a G-API development workflow is presented in the diagram below: -![G-API development workflow](../img/gapi_development_workflow.png) +.. image:: _static/images/gapi_development_workflow.png Basically, it is a derivative from the programming model illustrated in the previous chapter. You start with an algorithm or a data flow in mind (**0**), mapping it to a graph model (**1**), then identifying what operations you need (**2**) to construct this graph. These operations may already exist in G-API or be missing, in the latter case we implement the missing ones as kernels (**3**). Then decide which execution model fits our case better, pass kernels and DL networks as arguments to the compilation process (**4**), and finally switch to the execution (**5**). The process is iterative, so if you want to change anything based on the execution results, get back to steps (**0**) or (**1**) (a dashed line). - +@endsphinxdirective diff --git a/docs/gapi/kernel_api.md b/docs/gapi/kernel_api.md index f004ea9e89d922..df4e183f5248b1 100644 --- a/docs/gapi/kernel_api.md +++ b/docs/gapi/kernel_api.md @@ -1,188 +1,212 @@ # Graph API Kernel API {#openvino_docs_gapi_kernel_api} +@sphinxdirective + The core idea behind Graph API (G-API) is portability – a pipeline built with G-API must be portable (or at least able to be portable). It means that either it works out-of-the box when compiled for new platform, or G-API provides necessary tools to make it running there, with little-to-no changes in the algorithm itself. This idea can be achieved by separating kernel interface from its implementation. Once a pipeline is built using kernel interfaces, it becomes implementation-neutral – the implementation details (i.e. which kernels to use) are passed on a separate stage (graph compilation). Kernel-implementation hierarchy may look like: -![Kernel API/implementation hierarchy example](../img/gapi_kernel_implementation_hierarchy.png) - -A pipeline itself then can be expressed only in terms of `A`, `B`, and so on, and choosing which implementation to use in execution becomes an external parameter. - -## Define a Kernel -G-API provides a macro to define a new kernel interface `G_TYPED_KERNEL()`: - -```cpp -#include -G_TYPED_KERNEL(GFilter2D, - , - "org.opencv.imgproc.filters.filter2D") -{ - static cv::GMatDesc // outMeta's return value type - outMeta(cv::GMatDesc in , // descriptor of input GMat - int ddepth , // depth parameter - cv::Mat /* coeffs */, // (unused) - cv::Point /* anchor */, // (unused) - double /* scale */, // (unused) - int /* border */, // (unused) - cv::Scalar /* bvalue */ ) // (unused) - { - return in.withDepth(ddepth); - } -}; -``` + +.. image:: _static/images/gapi_kernel_implementation_hierarchy.png + +A pipeline itself then can be expressed only in terms of ``A``, ``B``, and so on, and choosing which implementation to use in execution becomes an external parameter. + +Define a Kernel +############### + +G-API provides a macro to define a new kernel interface ``G_TYPED_KERNEL()``: + +.. code-block:: cpp + + #include + G_TYPED_KERNEL(GFilter2D, + , + "org.opencv.imgproc.filters.filter2D") + { + static cv::GMatDesc // outMeta's return value type + outMeta(cv::GMatDesc in , // descriptor of input GMat + int ddepth , // depth parameter + cv::Mat /\* coeffs \*/, // (unused) + cv::Point /\* anchor \*/, // (unused) + double /\* scale \*/, // (unused) + int /\* border \*/, // (unused) + cv::Scalar /\* bvalue \*/ ) // (unused) + { + return in.withDepth(ddepth); + } + }; + This macro is a shortcut to a new type definition. It takes three arguments to register a new type, and requires type body to be present (see below). The macro arguments are: * Kernel interface name -- Also serves as a name of new type defined with this macro; -* Kernel signature -- An `std::function<>`-like signature which defines API of the kernel; +* Kernel signature -- An ``std::function<>``-like signature which defines API of the kernel; * Kernel's unique name -- Used to identify kernel when its type information is stripped within the system. * Kernel declaration may be seen as function declaration -- In both cases a new entity must be used then according to the way it was defined. -Kernel signature defines kernel's usage syntax -- which parameters it takes during graph construction. Implementations can also use this signature to derive it into backend-specific callback signatures (see next chapter). +Kernel signature defines kernel's usage syntax which parameters it takes during graph construction. Implementations can also use this signature to derive it into backend-specific callback signatures (see next chapter). + +Kernel may accept values of any type, and G-API dynamic types are handled in a special way. All other types are opaque to G-API and passed to kernel in ``outMeta()`` or in execution callbacks as-is. + +Kernel's return value can only be of G-API dynamic type – ``cv::GMat``, ``cv::GScalar``, or ``cv::GArray``. If an operation has more than one output, it should be wrapped into an ``std::tuple<>`` (which can contain only mentioned G-API types). Arbitrary-output-number operations are not supported. -Kernel may accept values of any type, and G-API dynamic types are handled in a special way. All other types are opaque to G-API and passed to kernel in `outMeta()` or in execution callbacks as-is. +Once a kernel is defined, it can be used in pipelines with special, G-API-supplied method ``on()``. This method has the same signature as defined in kernel, so the following code is a perfectly legal construction: -Kernel's return value can only be of G-API dynamic type – `cv::GMat`, `cv::GScalar`, or `cv::GArray`. If an operation has more than one output, it should be wrapped into an `std::tuple<>` (which can contain only mentioned G-API types). Arbitrary-output-number operations are not supported. +.. code-block:: cpp + + cv::GMat in; + cv::GMat out = GFilter2D::on(/\* GMat \*/ in, + /\* int \*/ -1, + /\* Mat \*/ conv_kernel_mat, + /\* Point \*/ cv::Point(-1,-1), + /\* double \*/ 0., + /\* int \*/ cv::BORDER_DEFAULT, + /\* Scalar \*/ cv::Scalar(0)); -Once a kernel is defined, it can be used in pipelines with special, G-API-supplied method `on()`. This method has the same signature as defined in kernel, so the following code is a perfectly legal construction: -```cpp -cv::GMat in; -cv::GMat out = GFilter2D::on(/* GMat */ in, - /* int */ -1, - /* Mat */ conv_kernel_mat, - /* Point */ cv::Point(-1,-1), - /* double */ 0., - /* int */ cv::BORDER_DEFAULT, - /* Scalar */ cv::Scalar(0)); -``` This example has some verbosity, though, so usually a kernel declaration comes with a C++ function wrapper ("factory method") which enables optional parameters, more compact syntax, Doxygen comments, etc.: -```cpp -cv::GMat filter2D(cv::GMat in, - int ddepth, - cv::Mat k, - cv::Point anchor = cv::Point(-1,-1), - double scale = 0., - int border = cv::BORDER_DEFAULT, - cv::Scalar bval = cv::Scalar(0)) -{ - return GFilter2D::on(in, ddepth, k, anchor, scale, border, bval); -} -``` +.. code-block:: cpp + + cv::GMat filter2D(cv::GMat in, + int ddepth, + cv::Mat k, + cv::Point anchor = cv::Point(-1,-1), + double scale = 0., + int border = cv::BORDER_DEFAULT, + cv::Scalar bval = cv::Scalar(0)) + { + return GFilter2D::on(in, ddepth, k, anchor, scale, border, bval); + } + + So now it can be used like: -```cpp -cv::GMat in; -cv::GMat out = filter2D(in, -1, conv_kernel_mat); -``` -### Extra information -In the current version, kernel declaration body (everything within the curly braces) must contain a static function `outMeta()`. This function establishes a functional dependency between operation's input and output metadata. +.. code-block:: cpp + + cv::GMat in; + cv::GMat out = filter2D(in, -1, conv_kernel_mat); + + +Extra information ++++++++++++++++++ + +In the current version, kernel declaration body (everything within the curly braces) must contain a static function ``outMeta()``. This function establishes a functional dependency between operation's input and output metadata. -Metadata is an information about data kernel operates on. Since non-G-API types are opaque to G-API, G-API cares only about G* data descriptors (i.e. dimensions and format of `cv::GMat`, etc). +Metadata is an information about data kernel operates on. Since non-G-API types are opaque to G-API, G-API cares only about G* data descriptors (i.e. dimensions and format of ``cv::GMat``, etc). -`outMeta()` is also an example of how kernel's signature can be transformed into a derived callback – note that in this example, outMeta() signature exactly follows the kernel signature (defined within the macro) but is different – where kernel expects `cv::GMat`, `outMeta()` takes and returns `cv::GMatDesc` (a G-API structure metadata for `cv::GMat`). +The ``outMeta()`` is also an example of how kernel's signature can be transformed into a derived callback – note that in this example, outMeta() signature exactly follows the kernel signature (defined within the macro) but is different – where kernel expects ``cv::GMat``, ``outMeta()`` takes and returns ``cv::GMatDesc`` (a G-API structure metadata for ``cv::GMat``). -The point of `outMeta()` is to propagate metadata information within computation from inputs to outputs and infer metadata of internal (intermediate, temporary) data objects. This information is required for further pipeline optimizations, memory allocation, and other operations done by G-API framework during graph compilation. +The point of ``outMeta()`` is to propagate metadata information within computation from inputs to outputs and infer metadata of internal (intermediate, temporary) data objects. This information is required for further pipeline optimizations, memory allocation, and other operations done by G-API framework during graph compilation. + +Implement a Kernel +################## -## Implement a Kernel Once a kernel is declared, its interface can be used to implement versions of this kernel in different backends. This concept is naturally projected from object-oriented programming "Interface/Implementation" idiom: an interface can be implemented multiple times, and different implementations of a kernel should be substitutable with each other without breaking the algorithm (pipeline) logic (Liskov Substitution Principle). Every backend defines its own way to implement a kernel interface. This way is regular, though – whatever plugin is, its kernel implementation must be "derived" from a kernel interface type. -Kernel implementation are then organized into kernel packages. Kernel packages are passed to `cv::GComputation::compile()` as compile arguments, with some hints to G-API on how to select proper kernels (see more on this in "Heterogeneity"[TBD]). +Kernel implementation are then organized into kernel packages. Kernel packages are passed to ``cv::GComputation::compile()`` as compile arguments, with some hints to G-API on how to select proper kernels. For example, the aforementioned Filter2D is implemented in "reference" CPU (OpenCV) plugin this way (NOTE – this is a simplified form with improper border handling): -```cpp -#include // GAPI_OCV_KERNEL() -#include // cv::filter2D() -GAPI_OCV_KERNEL(GCPUFilter2D, GFilter2D) -{ - static void - run(const cv::Mat &in, // in - derived from GMat - const int ddepth, // opaque (passed as-is) - const cv::Mat &k, // opaque (passed as-is) - const cv::Point &anchor, // opaque (passed as-is) - const double delta, // opaque (passed as-is) - const int border, // opaque (passed as-is) - const cv::Scalar &, // opaque (passed as-is) - cv::Mat &out) // out - derived from GMat (retval) - { - cv::filter2D(in, out, ddepth, k, anchor, delta, border); - } -}; -``` +.. code-block:: cpp + + #include // GAPI_OCV_KERNEL() + #include // cv::filter2D() + GAPI_OCV_KERNEL(GCPUFilter2D, GFilter2D) + { + static void + run(const cv::Mat &in, // in - derived from GMat + const int ddepth, // opaque (passed as-is) + const cv::Mat &k, // opaque (passed as-is) + const cv::Point &anchor, // opaque (passed as-is) + const double delta, // opaque (passed as-is) + const int border, // opaque (passed as-is) + const cv::Scalar &, // opaque (passed as-is) + cv::Mat &out) // out - derived from GMat (retval) + { + cv::filter2D(in, out, ddepth, k, anchor, delta, border); + } + }; + + Note how CPU (OpenCV) plugin has transformed the original kernel signature: -* Input `cv::GMat` has been substituted with `cv::Mat`, holding actual input data for the underlying OpenCV function call; -* Output `cv::GMat `has been transformed into extra output parameter, thus `GCPUFilter2D::run()` takes one argument more than the original kernel signature. - -The basic intuition for kernel developer here is not to care where that cv::Mat objects come from instead of the original `cv::GMat` – and just follow the signature conventions defined by the plugin. G-API will call this method during execution and supply all the necessary information (and forward the original opaque data as-is). - -## Compound Kernels -Sometimes kernel is a single thing only on API level. It is convenient for users, but on a particular implementation side it would be better to have multiple kernels (a subgraph) doing the thing instead. An example is `goodFeaturesToTrack()` – while in OpenCV backend it may remain a single kernel, with Fluid it becomes compound – Fluid can handle Harris response calculation but can't do sparse non-maxima suppression and point extraction to an STL vector: - -A compound kernel implementation can be defined using a generic macro `GAPI_COMPOUND_KERNEL()`: - -```cpp -#include // GAPI_COMPOUND_KERNEL() -using PointArray2f = cv::GArray; -G_TYPED_KERNEL(HarrisCorners, - , - "org.opencv.imgproc.harris_corner") -{ - static cv::GArrayDesc outMeta(const cv::GMatDesc &, - int, - double, - double, - int, - double) - { - // No special metadata for arrays in G-API (yet) - return cv::empty_array_desc(); - } -}; -// Define Fluid-backend-local kernels which form GoodFeatures -G_TYPED_KERNEL(HarrisResponse, - , - "org.opencv.fluid.harris_response") -{ - static cv::GMatDesc outMeta(const cv::GMatDesc &in, - double, - int, - double) - { - return in.withType(CV_32F, 1); - } -}; -G_TYPED_KERNEL(ArrayNMS, - , - "org.opencv.cpu.nms_array") -{ - static cv::GArrayDesc outMeta(const cv::GMatDesc &, - int, - double) - { - return cv::empty_array_desc(); - } -}; -GAPI_COMPOUND_KERNEL(GFluidHarrisCorners, HarrisCorners) -{ - static PointArray2f - expand(cv::GMat in, - int maxCorners, - double quality, - double minDist, - int blockSize, - double k) - { - cv::GMat response = HarrisResponse::on(in, quality, blockSize, k); - return ArrayNMS::on(response, maxCorners, minDist); - } -}; -// Then implement HarrisResponse as Fluid kernel and NMSresponse -// as a generic (OpenCV) kernel -``` -It is important to distinguish a compound kernel from G-API high-order function, i.e. a C++ function which looks like a kernel but in fact generates a subgraph. The core difference is that a compound kernel is an *implementation detail* and a kernel implementation may be either compound or not (depending on backend capabilities), while a high-order function is a "macro" in terms of G-API and so cannot act as an interface which then needs to be implemented by a backend. \ No newline at end of file +* Input ``cv::GMat`` has been substituted with ``cv::Mat``, holding actual input data for the underlying OpenCV function call; +* Output ``cv::GMat`` has been transformed into extra output parameter, thus ``GCPUFilter2D::run()`` takes one argument more than the original kernel signature. + +The basic intuition for kernel developer here is not to care where that cv::Mat objects come from instead of the original ``cv::GMat`` – and just follow the signature conventions defined by the plugin. G-API will call this method during execution and supply all the necessary information (and forward the original opaque data as-is). + +Compound Kernels +################ + +Sometimes kernel is a single thing only on API level. It is convenient for users, but on a particular implementation side it would be better to have multiple kernels (a subgraph) doing the thing instead. An example is ``goodFeaturesToTrack()`` – while in OpenCV backend it may remain a single kernel, with Fluid it becomes compound – Fluid can handle Harris response calculation but can't do sparse non-maxima suppression and point extraction to an STL vector: + +A compound kernel implementation can be defined using a generic macro ``GAPI_COMPOUND_KERNEL()``: + +.. code-block:: cpp + + #include // GAPI_COMPOUND_KERNEL() + using PointArray2f = cv::GArray; + G_TYPED_KERNEL(HarrisCorners, + , + "org.opencv.imgproc.harris_corner") + { + static cv::GArrayDesc outMeta(const cv::GMatDesc &, + int, + double, + double, + int, + double) + { + // No special metadata for arrays in G-API (yet) + return cv::empty_array_desc(); + } + }; + // Define Fluid-backend-local kernels which form GoodFeatures + G_TYPED_KERNEL(HarrisResponse, + , + "org.opencv.fluid.harris_response") + { + static cv::GMatDesc outMeta(const cv::GMatDesc &in, + double, + int, + double) + { + return in.withType(CV_32F, 1); + } + }; + G_TYPED_KERNEL(ArrayNMS, + , + "org.opencv.cpu.nms_array") + { + static cv::GArrayDesc outMeta(const cv::GMatDesc &, + int, + double) + { + return cv::empty_array_desc(); + } + }; + GAPI_COMPOUND_KERNEL(GFluidHarrisCorners, HarrisCorners) + { + static PointArray2f + expand(cv::GMat in, + int maxCorners, + double quality, + double minDist, + int blockSize, + double k) + { + cv::GMat response = HarrisResponse::on(in, quality, blockSize, k); + return ArrayNMS::on(response, maxCorners, minDist); + } + }; + // Then implement HarrisResponse as Fluid kernel and NMSresponse + // as a generic (OpenCV) kernel + +It is important to distinguish a compound kernel from G-API high-order function, i.e. a C++ function which looks like a kernel but in fact generates a subgraph. The core difference is that a compound kernel is an *implementation detail* and a kernel implementation may be either compound or not (depending on backend capabilities), while a high-order function is a "macro" in terms of G-API and so cannot act as an interface which then needs to be implemented by a backend. + +@endsphinxdirective + diff --git a/docs/install_guides/installing-model-dev-tools.md b/docs/install_guides/installing-model-dev-tools.md index a2959f190b40e9..b701f6efafe5a7 100644 --- a/docs/install_guides/installing-model-dev-tools.md +++ b/docs/install_guides/installing-model-dev-tools.md @@ -165,6 +165,7 @@ Get started with Python Try the `Python Quick Start Example `__ to estimate depth in a scene using an OpenVINO monodepth model in a Jupyter Notebook inside your web browser. Visit the :doc:`Tutorials ` page for more Jupyter Notebooks to get you started with OpenVINO, such as: + * `OpenVINO Python API Tutorial `__ * `Basic image classification program with Hello Image Classification `__ * `Convert a PyTorch model and use it for image background removal `__ @@ -186,16 +187,17 @@ Visit the :doc:`Samples ` page for other C Learn OpenVINO Development Tools ++++++++++++++++++++++++++++++++ -* Explore a variety of pre-trained deep learning models in the :ref:`Open Model Zoo ` and deploy them in demo applications to see how they work. -* Want to import a model from another framework and optimize its performance with OpenVINO? Visit the :ref:`Model Optimizer Developer Guide `. -* Accelerate your model's speed even further with quantization and other compression techniques using :ref:`Post-Training Optimization Tool `. -* Benchmark your model's inference speed with one simple command using the :ref:`Benchmark Tool . +* Explore a variety of pre-trained deep learning models in the :doc:`Open Model Zoo ` and deploy them in demo applications to see how they work. +* Want to import a model from another framework and optimize its performance with OpenVINO? Visit the :doc:`Model Optimizer Developer Guide `. +* Accelerate your model's speed even further with quantization and other compression techniques using :doc:`Post-Training Optimization Tool `. +* Benchmark your model's inference speed with one simple command using the :doc:`Benchmark Tool `. -## Additional Resources +Additional Resources +#################### - `Intel® Distribution of OpenVINO™ toolkit home page `__ - For IoT Libraries & Code Samples, see `Intel® IoT Developer Kit `__ . -- `OpenVINO Installation Selector Tool __ +- `OpenVINO Installation Selector Tool `__ @endsphinxdirective diff --git a/docs/install_guides/installing-openvino-brew.md b/docs/install_guides/installing-openvino-brew.md index 557e38872e84c2..1bbf98042015a2 100644 --- a/docs/install_guides/installing-openvino-brew.md +++ b/docs/install_guides/installing-openvino-brew.md @@ -2,58 +2,52 @@ @sphinxdirective -With the OpenVINO™ 2022.3 release, you can install OpenVINO Runtime on macOS and Linux via `Homebrew `_. OpenVINO™ Development Tools can be installed via PyPI only. See :ref:`Installing Additional Components ` for more information. +With the OpenVINO™ 2022.3 release, you can install OpenVINO Runtime on macOS and Linux via `Homebrew `_. OpenVINO™ Development Tools can be installed via PyPI only. See `Installing Additional Components <#optional-installing-additional-components>`__ for more information. -See the `Release Notes `_ for more information on updates in the latest release. +See the `Release Notes `__ for more information on updates in the latest release. Installing OpenVINO Runtime from Homebrew is recommended for C++ developers. If you are working with Python, the PyPI package has everything needed for Python development and deployment on CPU and GPUs. Visit the :doc:`Install OpenVINO from PyPI ` page for instructions on how to install OpenVINO Runtime for Python using PyPI. -.. note:: +.. note:: Only CPU is supported for inference if you install OpenVINO via HomeBrew. -.. warning:: +.. warning:: - By downloading and using this container and the included software, you agree to the terms and conditions of the `software license agreements `_. + By downloading and using this container and the included software, you agree to the terms and conditions of the `software license agreements `__. -@endsphinxdirective - -## Prerequisites - -### System Requirements -@sphinxdirective - -Full requirement listing is available on the `System Requirements Page `_ +Prerequisites +#################### -@endsphinxdirective +System Requirements +++++++++++++++++++++ -### Software Requirements +Full requirement listing is available on the `System Requirements Page `__ -@sphinxdirective +Software Requirements ++++++++++++++++++++++ .. tab:: macOS * `Homebrew `_ - * `CMake 3.13 or higher `_ (choose "macOS 10.13 or later"). Add `/Applications/CMake.app/Contents/bin` to path (for default installation). - * `Python 3.7 - 3.10 `_ (choose 3.7 - 3.10). Install and add it to path. - * Apple Xcode Command Line Tools. In the terminal, run `xcode-select --install` from any directory to install it. + * `CMake 3.13 or higher `__ (choose "macOS 10.13 or later"). Add ``/Applications/CMake.app/Contents/bin`` to path (for default installation). + * `Python 3.7 - 3.10 `__ (choose 3.7 - 3.10). Install and add it to path. + * Apple Xcode Command Line Tools. In the terminal, run ``xcode-select --install`` from any directory to install it. * (Optional) Apple Xcode IDE (not required for OpenVINO™, but useful for development) .. tab:: Linux * `Homebrew `_ - * `CMake 3.13 or higher, 64-bit `_ + * `CMake 3.13 or higher, 64-bit `__ * GCC 7.5.0 (for Ubuntu 18.04) or GCC 9.3.0 (for Ubuntu 20.04) - * `Python 3.7 - 3.10, 64-bit `_ - -@endsphinxdirective + * `Python 3.7 - 3.10, 64-bit `__ -## Installing OpenVINO Runtime -@sphinxdirective +Installing OpenVINO Runtime +########################### -1. Make sure that you have installed HomeBrew on your system. If not, follow the instructions on `the Homebrew website `_ to install and configure it. +1. Make sure that you have installed HomeBrew on your system. If not, follow the instructions on `the Homebrew website `__ to install and configure it. 2. Open a command prompt terminal window, and run the following command to install OpenVINO Runtime: @@ -61,15 +55,11 @@ Full requirement listing is available on the `System Requirements Page `_. -@endsphinxdirective - -## Uninstalling OpenVINO +Uninstalling OpenVINO +##################### To uninstall OpenVINO via HomeBrew, use the following command: -```sh -brew uninstall openvino -``` -## What's Next? +.. code-block:: sh -@sphinxdirective + brew uninstall openvino + + +What's Next? +#################### -Now that you've installed OpenVINO Runtime, you can try the following things: +Now that you've installed OpenVINO Runtime, you can try the following things: * Learn more about :doc:`OpenVINO Workflow `. * To prepare your models for working with OpenVINO, see :doc:`Model Preparation `. @@ -99,8 +89,9 @@ Now that you've installed OpenVINO Runtime, you can try the following things: * See sample applications in :doc:`OpenVINO toolkit Samples Overview `. * Take a glance at the OpenVINO product home page: https://software.intel.com/en-us/openvino-toolkit. -@endsphinxdirective +Additional Resources +#################### -## Additional Resources +* `OpenVINO Installation Selector Tool `__ -- [OpenVINO Installation Selector Tool](https://www.intel.com/content/www/us/en/developer/tools/openvino-toolkit/download.html) \ No newline at end of file +@endsphinxdirective diff --git a/docs/install_guides/installing-openvino-docker-windows.md b/docs/install_guides/installing-openvino-docker-windows.md index b42034cb7bc9b2..d10bf08df67a2e 100644 --- a/docs/install_guides/installing-openvino-docker-windows.md +++ b/docs/install_guides/installing-openvino-docker-windows.md @@ -1,97 +1,124 @@ # Install Intel® Distribution of OpenVINO™ toolkit for Windows from Docker Image {#openvino_docs_install_guides_installing_openvino_docker_windows} +@sphinxdirective + This guide provides steps for creating a Docker image with Intel® Distribution of OpenVINO™ toolkit for Windows and using the Docker image on different devices. -## System Requirements +.. _system-requirements-docker-windows: + +System Requirements +#################### + -@sphinxdirective .. tab:: Target Operating System with Python Versions - +------------------------------------+--------------------------+ - | Operating System | Supported Python Version | - +====================================+==========================+ - | Windows Server Core base LTSC 2019 | 3.8 | - +------------------------------------+--------------------------+ - | Windows 10, version 20H2 | 3.8 | - +------------------------------------+--------------------------+ + .. list-table:: + :header-rows: 1 + + * - Operating System + - Supported Python Version + * - Windows Server Core base LTSC 2019 + - 3.8 + * - Windows 10, version 20H2 + - 3.8 .. tab:: Host Operating Systems - * Windows 10, 64-bit Pro, Enterprise or Education (1607 Anniversary Update, Build 14393 or later) editions - * Windows Server 2016 or higher + * Windows 10, 64-bit Pro, Enterprise or Education (1607 Anniversary Update, Build 14393 or later) editions + * Windows Server 2016 or higher -@endsphinxdirective -### Additional Requirements for GPU +Additional Requirements for GPU ++++++++++++++++++++++++++++++++ To use GPU Acceleration in Windows containers, make sure that the following requirements for Windows host, OpenVINO and Docker are met: -- [Windows requirements](https://docs.microsoft.com/en-us/virtualization/windowscontainers/deploy-containers/gpu-acceleration): +- `Windows requirements `__: + - The container host must be running Windows Server 2019 or Windows 10 of version 1809 or higher. - - The container base image must be `mcr.microsoft.com/windows:1809` or higher. Windows Server Core and Nano Server container images are not currently supported. + - The container base image must be ``mcr.microsoft.com/windows:1809`` or higher. Windows Server Core and Nano Server container images are not currently supported. - The container host must be running Docker Engine 19.03 or higher. - The container host must have GPU running display drivers of version WDDM 2.5 or higher. + - GPU requirement for OpenVINO: Intel Graphics Driver for Windows of version 15.65 or higher. -- [Docker isolation mode requirements](https://docs.microsoft.com/en-us/virtualization/windowscontainers/manage-containers/hyperv-container): +- `Docker isolation mode requirements `__: + - Windows host and container version tags must match. - - [Windows host and container isolation process support](https://docs.microsoft.com/en-us/virtualization/windowscontainers/deploy-containers/version-compatibility). + - `Windows host and container isolation process support `__. -## Installation Flow +Installation Flow +#################### There are two ways to install OpenVINO with Docker. You can choose either of them according to your needs: + * Use a prebuilt image. Do the following steps: - 1. Get a prebuilt image from provided sources. - 2. Run the image on different devices. + + 1. `Get a prebuilt image from provided sources <#getting-a-prebuilt-image-from-provided-sources>`__. + 2. `Run the image on different devices <#running-the-docker-image-on-different-devices>`__. + * If you want to customize your image, you can also build a Docker image manually by using the following steps: - 1. Prepare a Dockerfile. - 2. Configure the Docker image. - 3. Run the image on different devices. -## Getting a Prebuilt Image from Provided Sources + 1. `Prepare a Dockerfile <#preparing-a-dockerfile>`__. + 2. `Configure the Docker image <#configuring-the-docker-image-for-different-devices>`__. + 3. `Run the image on different devices <#running-the-docker-image-on-different-devices>`__. + +Getting a Prebuilt Image from Provided Sources +############################################## You can find prebuilt images on: -- [Docker Hub](https://hub.docker.com/u/openvino) -- [Azure Marketplace](https://azuremarketplace.microsoft.com/en-us/marketplace/apps/intel_corporation.openvino) +- `Docker Hub `__ +- `Azure Marketplace `__ -## Preparing a Dockerfile +Preparing a Dockerfile +###################### -You can use the [available Dockerfiles on GitHub](https://github.com/openvinotoolkit/docker_ci/tree/master/dockerfiles) or generate a Dockerfile with your settings via [DockerHub CI Framework](https://github.com/openvinotoolkit/docker_ci) which can generate a Dockerfile, build, test and deploy an image with the Intel® Distribution of OpenVINO™ toolkit. +You can use the `available Dockerfiles on GitHub `__ or generate a Dockerfile with your settings via `DockerHub CI Framework `__ which can generate a Dockerfile, build, test and deploy an image with the Intel® Distribution of OpenVINO™ toolkit. -## Configuring the Docker Image for Different Devices +Configuring the Docker Image for Different Devices +################################################## -### Installing Additional Dependencies for CPU +Installing Additional Dependencies for CPU +++++++++++++++++++++++++++++++++++++++++++ -#### Installing CMake +Installing CMake +---------------- + +To add CMake to the image, add the following commands to the Dockerfile: + +.. code-block:: bat - To add CMake to the image, add the following commands to the Dockerfile: - ```bat RUN powershell.exe -Command ` Invoke-WebRequest -URI https://cmake.org/files/v3.14/cmake-3.14.7-win64-x64.msi -OutFile %TMP%\\cmake-3.14.7-win64-x64.msi ; ` Start-Process %TMP%\\cmake-3.14.7-win64-x64.msi -ArgumentList '/quiet /norestart' -Wait ; ` Remove-Item %TMP%\\cmake-3.14.7-win64-x64.msi -Force RUN SETX /M PATH "C:\Program Files\CMake\Bin;%PATH%" - ``` - In case of proxy issues, please add the `ARG HTTPS_PROXY` and `-Proxy %%HTTPS_PROXY%` settings to the `powershell.exe` command to the Dockerfile. Then build a Docker image: - ```bat + +In case of proxy issues, please add the ``ARG HTTPS_PROXY`` and ``-Proxy %%HTTPS_PROXY%`` settings to the ``powershell.exe`` command to the Dockerfile. Then build a Docker image: + +.. code-block:: bat + docker build . -t ` --build-arg HTTPS_PROXY= - ``` - -#### Installing Microsoft Visual Studio Build Tools - You can add Microsoft Visual Studio Build Tools to a Windows OS Docker image using the [offline](https://docs.microsoft.com/en-us/visualstudio/install/create-an-offline-installation-of-visual-studio?view=vs-2019) or [online](https://docs.microsoft.com/en-us/visualstudio/install/build-tools-container?view=vs-2019) installers for Build Tools. - - Microsoft Visual Studio Build Tools are licensed as a supplement your existing Microsoft Visual Studio license. - - Any images built with these tools should be for your personal use or for use in your organization in accordance with your existing Visual Studio and Windows licenses. - To add MSBuild 2019 to the image, add the following commands to the Dockerfile: - ```bat - RUN powershell.exe -Command Invoke-WebRequest -URI https://aka.ms/vs/16/release/vs_buildtools.exe -OutFile %TMP%\\vs_buildtools.exe +Installing Microsoft Visual Studio Build Tools +---------------------------------------------- + +You can add Microsoft Visual Studio Build Tools to a Windows OS Docker image using the `offline `__ or `online `__ installers for Build Tools. + +Microsoft Visual Studio Build Tools are licensed as a supplement your existing Microsoft Visual Studio license. + +Any images built with these tools should be for your personal use or for use in your organization in accordance with your existing Visual Studio and Windows licenses. + +To add MSBuild 2019 to the image, add the following commands to the Dockerfile: +.. code-block:: bat + + RUN powershell.exe -Command Invoke-WebRequest -URI https://aka.ms/vs/16/release/vs_buildtools.exe -OutFile %TMP%\\vs_buildtools.exe + RUN %TMP%\\vs_buildtools.exe --quiet --norestart --wait --nocache ` --installPath "C:\Program Files (x86)\Microsoft Visual Studio\2019\BuildTools" ` --add Microsoft.VisualStudio.Workload.MSBuildTools ` @@ -101,75 +128,110 @@ You can use the [available Dockerfiles on GitHub](https://github.com/openvinotoo --remove Microsoft.VisualStudio.Component.Windows10SDK.10586 ` --remove Microsoft.VisualStudio.Component.Windows10SDK.14393 ` --remove Microsoft.VisualStudio.Component.Windows81SDK || IF "%ERRORLEVEL%"=="3010" EXIT 0 && powershell set-executionpolicy remotesigned - ``` - In case of proxy issues, please use the [offline installer for Build Tools](https://docs.microsoft.com/en-us/visualstudio/install/create-an-offline-installation-of-visual-studio?view=vs-2019). -### Configuring the Image for GPU +In case of proxy issues, please use the `offline installer for Build Tools `__. + +Configuring the Image for GPU ++++++++++++++++++++++++++++++ + +.. note:: + + Since GPU is not supported in `prebuilt images <#getting-a-prebuilt-image-from-provided-sources>`__ or `default Dockerfiles `__, you must make sure the Additional Requirements for GPU in `System Requirements <#system-requirements>`__ are met, and do the following steps to build the image manually. + +1. Reuse one of `available Dockerfiles `__. You can also use your own Dockerfile. +2. Check your `Windows host and container isolation process compatibility `__. +3. Find the appropriate Windows container base image on `DockerHub `__ and set up your host/container version in the ``FROM`` Dockerfile instruction. + + For example, in the ``openvino_c_dev_.dockerfile``, change: + + .. code-block:: bat -> **NOTE**: Since GPU is not supported in prebuilt images or [default Dockerfiles](https://github.com/openvinotoolkit/docker_ci/tree/master/dockerfiles), you must make sure the Additional Requirements for GPU in System Requirements are met, and do the following steps to build the image manually. + FROM mcr.microsoft.com/windows/servercore:ltsc2019 AS ov_base -1. Reuse one of [available Dockerfiles](https://github.com/openvinotoolkit/docker_ci/tree/master/dockerfiles). You can also use your own Dockerfile. -2. Check your [Windows host and container isolation process compatibility](https://docs.microsoft.com/en-us/virtualization/windowscontainers/deploy-containers/version-compatibility). -3. Find the appropriate Windows container base image on [DockerHub](https://hub.docker.com/_/microsoft-windows) and set up your host/container version in the `FROM` Dockerfile instruction. - For example, in the `openvino_c_dev_.dockerfile`, change: - ```bat - FROM mcr.microsoft.com/windows/servercore:ltsc2019 AS ov_base - ``` to: - ```bat - FROM mcr.microsoft.com/windows:20H2 - ``` + + .. code-block:: bat + + FROM mcr.microsoft.com/windows:20H2 + + 4. Build the Docker image by running the following command: - ```bat - docker build --build-arg package_url= -f -t . - ``` -5. Copy `OpenCL.dll` from your `C:\Windows\System32` host folder to any `temp` directory: - ```bat - mkdir C:\tmp - copy C:\Windows\System32\OpenCL.dll C:\tmp - ``` -## Running the Docker Image on Different Devices + .. code-block:: bat + + docker build --build-arg package_url= -f -t . + + +5. Copy ``OpenCL.dll`` from your ``C:\Windows\System32`` host folder to any ``temp`` directory: + + .. code-block:: bat + + mkdir C:\tmp + copy C:\Windows\System32\OpenCL.dll C:\tmp -### Running the Image on CPU + +Running the Docker Image on Different Devices +############################################# + +Running the Image on CPU +++++++++++++++++++++++++ To start the interactive session, run the following command: -```bat -docker run -it --rm -``` + +.. code-block:: bat + + docker run -it --rm + If you want to try some samples, run the image with the following command: -```bat -docker run -it --rm -cmd /S /C "omz_downloader --name googlenet-v1 --precisions FP16 && omz_converter --name googlenet-v1 --precision FP16 && curl -kO https://storage.openvinotoolkit.org/data/test_data/images/car_1.bmp && python samples\python\hello_classification\hello_classification.py public\googlenet-v1\FP16\googlenet-v1.xml car_1.bmp CPU" -``` -### Running the Image on GPU +.. code-block:: bat + + docker run -it --rm + cmd /S /C "omz_downloader --name googlenet-v1 --precisions FP16 && omz_converter --name googlenet-v1 --precision FP16 && curl -kO https://storage.openvinotoolkit.org/data/test_data/images/car_1.bmp && python samples\python\hello_classification\hello_classification.py public\googlenet-v1\FP16\googlenet-v1.xml car_1.bmp CPU" + + +Running the Image on GPU +++++++++++++++++++++++++ + +.. note:: -> **NOTE**: Since GPU is not supported in prebuilt images or [default Dockerfiles](https://github.com/openvinotoolkit/docker_ci/tree/master/dockerfiles), you must make sure the Additional Requirements for GPU in System Requirements are met, and configure and build the image manually before you can run inferences on a GPU. + Since GPU is not supported in `prebuilt images <#getting-a-prebuilt-image-from-provided-sources>`__ or `default Dockerfiles `__, you must make sure the Additional Requirements for GPU in `System Requirements <#system-requirements>`__ are met, and `configure and build the image manually <#configuring-the-image-for-gpu>`__ before you can run inferences on a GPU. 1. To try inference on a GPU, run the image with the following command: - ```bat - docker run -it --rm -u ContainerAdministrator --isolation process --device class/5B45201D-F2F2-4F3B-85BB-30FF1F953599 -v C:\Windows\System32\DriverStore\FileRepository\iigd_dch.inf_amd64_518f2921ba495409:C:\Windows\System32\DriverStore\FileRepository\iigd_dch.inf_amd64_518f2921ba495409 -v C:\tmp:C:\tmp - ``` + + .. code-block:: bat + + docker run -it --rm -u ContainerAdministrator --isolation process --device class/5B45201D-F2F2-4F3B-85BB-30FF1F953599 -v C:\Windows\System32\DriverStore\FileRepository\iigd_dch. inf_amd64_518f2921ba495409:C:\Windows\System32\DriverStore\FileRepository\iigd_dch.inf_amd64_518f2921ba495409 -v C:\tmp:C:\tmp + + where - - `--device class/5B45201D-F2F2-4F3B-85BB-30FF1F953599` is a reserved interface class GUID for a GPU device. - - `C:\Windows\System32\DriverStore\FileRepository\iigd_dch.inf_amd64_518f2921ba495409` is the path to OpenCL driver home directory. To find it on your PC, run the `C:\Windows\System32\DriverStore\FileRepository\iigd_dch.inf_amd64_*` regular expression. - - `C:\tmp` is the folder with the copy of `OpenCL.dll` from your `C:\Windows\System32` host folder. -2. Copy `OpenCL.dll` to the `C:\Windows\System32` folder inside the container and set appropriate registry entry. Now you can run inference on a GPU device: - ```bat - copy C:\tmp\OpenCL.dll C:\Windows\System32\ && reg add "HKLM\SOFTWARE\Khronos\OpenCL\Vendors" /v "C:\Windows\System32\DriverStore\FileRepository\iigd_dch.inf_amd64_518f2921ba495409\ocl\bin\x64\intelocl64.dll" /t REG_DWORD /d 0 - ``` - For example, run the `Hello Classification Python` sample with the following command: - ```bat - omz_downloader --name googlenet-v1 --precisions FP16 && omz_converter --name googlenet-v1 --precision FP16 && curl -kO https://storage.openvinotoolkit.org/data/test_data/images/car_1.bmp && python samples\python\hello_classification\hello_classification.py public\googlenet-v1\FP16\googlenet-v1.xml car_1.bmp GPU - ``` - - -## Additional Resources - -- [DockerHub CI Framework](https://github.com/openvinotoolkit/docker_ci) for Intel® Distribution of OpenVINO™ toolkit. The Framework can generate a Dockerfile, build, test, and deploy an image with the Intel® Distribution of OpenVINO™ toolkit. You can reuse available Dockerfiles, add your layer and customize the image of OpenVINO™ for your needs. -- Intel® Distribution of OpenVINO™ toolkit home page: [https://software.intel.com/en-us/openvino-toolkit](https://software.intel.com/en-us/openvino-toolkit) -- [OpenVINO Installation Selector Tool](https://www.intel.com/content/www/us/en/developer/tools/openvino-toolkit/download.html) \ No newline at end of file + + - ``--device class/5B45201D-F2F2-4F3B-85BB-30FF1F953599`` is a reserved interface class GUID for a GPU device. + - ``C:\Windows\System32\DriverStore\FileRepository\iigd_dch.inf_amd64_518f2921ba495409`` is the path to OpenCL driver home directory. To find it on your PC, run the ``C:\Windows\System32\DriverStore\FileRepository\iigd_dch.inf_amd64_*`` regular expression. + - ``C:\tmp`` is the folder with the copy of ``OpenCL.dll`` from your ``C:\Windows\System32`` host folder. + +2. Copy ``OpenCL.dll`` to the ``C:\Windows\System32`` folder inside the container and set appropriate registry entry. Now you can run inference on a GPU device: + + .. code-block:: bat + + copy C:\tmp\OpenCL.dll C:\Windows\System32\ && reg add "HKLM\SOFTWARE\Khronos\OpenCL\Vendors" /v "C:\Windows\System32\DriverStore\FileRepository\iigd_dch. inf_amd64_518f2921ba495409\ocl\bin\x64\intelocl64.dll" /t REG_DWORD /d 0 + + + For example, run the ``Hello Classification Python`` sample with the following command: + + .. code-block:: bat + + omz_downloader --name googlenet-v1 --precisions FP16 && omz_converter --name googlenet-v1 --precision FP16 && curl -kO https://storage.openvinotoolkit.org/data/test_data/images/ car_1.bmp && python samples\python\hello_classification\hello_classification.py public\googlenet-v1\FP16\googlenet-v1.xml car_1.bmp GPU + + +Additional Resources +#################### + +- `DockerHub CI Framework `__ for Intel® Distribution of OpenVINO™ toolkit. The Framework can generate a Dockerfile, build, test, and deploy an image with the Intel® Distribution of OpenVINO™ toolkit. You can reuse available Dockerfiles, add your layer and customize the image of OpenVINO™ for your needs. +- Intel® Distribution of OpenVINO™ toolkit home page: `https://software.intel.com/en-us/openvino-toolkit `__ +- `OpenVINO Installation Selector Tool `__ + +@endsphinxdirective diff --git a/docs/install_guides/installing-openvino-from-archive-macos.md b/docs/install_guides/installing-openvino-from-archive-macos.md index 23d20d6e666130..2e3793bce92e3f 100644 --- a/docs/install_guides/installing-openvino-from-archive-macos.md +++ b/docs/install_guides/installing-openvino-from-archive-macos.md @@ -1,53 +1,59 @@ # Install OpenVINO™ Runtime on macOS from an Archive File {#openvino_docs_install_guides_installing_openvino_from_archive_macos} +@sphinxdirective + With the OpenVINO™ 2022.3 release, you can download and use archive files to install OpenVINO Runtime. The archive files contain pre-built binaries and library files needed for OpenVINO Runtime, as well as code samples. -Installing OpenVINO Runtime from archive files is recommended for C++ developers. If you are working with Python, the PyPI package has everything needed for Python development and deployment on CPU and GPUs. Visit the [Install OpenVINO from PyPI](installing-openvino-pip.md) page for instructions on how to install OpenVINO Runtime for Python using PyPI. +Installing OpenVINO Runtime from archive files is recommended for C++ developers. If you are working with Python, the PyPI package has everything needed for Python development and deployment on CPU and GPUs. Visit the :doc:`Install OpenVINO from PyPI ` page for instructions on how to install OpenVINO Runtime for Python using PyPI. -See the [Release Notes](https://software.intel.com/en-us/articles/OpenVINO-RelNotes) for more information on updates in the latest release. +See the `Release Notes `__ for more information on updates in the latest release. -> **NOTE**: Since the OpenVINO™ 2022.1 release, the following development tools: Model Optimizer, Post-Training Optimization Tool, Model Downloader and other Open Model Zoo tools, Accuracy Checker, and Annotation Converter can be installed via [pypi.org](https://pypi.org/project/openvino-dev/) only. +.. note:: + + Since the OpenVINO™ 2022.1 release, the following development tools: Model Optimizer, Post-Training Optimization Tool, Model Downloader and other Open Model Zoo tools, Accuracy Checker, and Annotation Converter can be installed via `pypi.org `__ only. -@sphinxdirective .. tab:: System Requirements | Full requirement listing is available in: - | `System Requirements Page `_ + | `System Requirements Page `__ .. tab:: Software Requirements - * `CMake 3.13 or higher `_ (choose "macOS 10.13 or later"). Add `/Applications/CMake.app/Contents/bin` to path (for default install). - * `Python 3.7 - 3.10 `_ (choose 3.7 - 3.10). Install and add to path. - * Apple Xcode Command Line Tools. In the terminal, run `xcode-select --install` from any directory + * `CMake 3.13 or higher `__ (choose "macOS 10.13 or later"). Add ``/Applications/CMake.app/Contents/bin`` to path (for default install). + * `Python 3.7 - 3.10 `__ (choose 3.7 - 3.10). Install and add to path. + * Apple Xcode Command Line Tools. In the terminal, run ``xcode-select --install`` from any directory * (Optional) Apple Xcode IDE (not required for OpenVINO™, but useful for development) -@endsphinxdirective -## Installing OpenVINO Runtime +Installing OpenVINO Runtime +########################### -### Step 1: Install OpenVINO Core Components +Step 1: Install OpenVINO Core Components +++++++++++++++++++++++++++++++++++++++++ -@sphinxdirective -1. Open a command prompt terminal window. -2. Create the `/opt/intel` folder for OpenVINO by using the following command. If the folder already exists, skip this command. +1. Open a command prompt terminal window. +2. Create the ``/opt/intel`` folder for OpenVINO by using the following command. If the folder already exists, skip this command. .. code-block:: sh sudo mkdir /opt/intel - - .. note:: - - The `/opt/intel` path is the recommended folder path for installing OpenVINO. You may use a different path if desired. -3. Browse to the current user's `Downloads` folder: + + .. note:: + + The ``/opt/intel`` path is the recommended folder path for installing OpenVINO. You may use a different path if desired. + + +3. Browse to the current user's ``Downloads`` folder: .. code-block:: sh cd /Downloads - -4. Download the `OpenVINO Runtime archive file for macOS `_, extract the files, rename the extracted folder and move it to the desired path: + + +4. Download the `OpenVINO Runtime archive file for macOS `__, extract the files, rename the extracted folder and move it to the desired path: .. tab:: x86, 64-bit @@ -65,55 +71,62 @@ See the [Release Notes](https://software.intel.com/en-us/articles/OpenVINO-RelNo tar -xf openvino_2022.3.0.tgz sudo mv m_openvino_toolkit_macos_11_0_2022.3.0.9052.9752fafe8eb_arm64 /opt/intel/openvino_2022.3.0 + 5. For simplicity, it is useful to create a symbolic link as below: .. code-block:: sh sudo ln -s openvino_2022.3.0 openvino_2022 - - .. note:: - - If you have already installed a previous release of OpenVINO 2022, a symbolic link to the `openvino_2022` folder may already exist. Unlink the previous link with `sudo unlink openvino_2022`, and then re-run the command above. -@endsphinxdirective + .. note:: + + If you have already installed a previous release of OpenVINO 2022, a symbolic link to the ``openvino_2022`` folder may already exist. Unlink the previous link with ``sudo unlink openvino_2022``, and then re-run the command above. + -Congratulations, you finished the installation! The `/opt/intel/openvino_2022` folder now contains the core components for OpenVINO. If you used a different path in Step 2, you will find the `openvino_2022` folder there. The path to the `openvino_2022` directory is also referred as `` throughout the OpenVINO documentation. +Congratulations, you finished the installation! The ``/opt/intel/openvino_2022`` folder now contains the core components for OpenVINO. If you used a different path in Step 2, you will find the ``openvino_2022`` folder there. The path to the ``openvino_2022`` directory is also referred as ```` throughout the OpenVINO documentation. -### Step 2: Configure the Environment +Step 2: Configure the Environment ++++++++++++++++++++++++++++++++++ -You must update several environment variables before you can compile and run OpenVINO applications. Open a terminal window and run the `setupvars.sh` script as shown below to temporarily set your environment variables. If your is not `/opt/intel/openvino_2022`, use the correct one instead. +You must update several environment variables before you can compile and run OpenVINO applications. Open a terminal window and run the ``setupvars.sh`` script as shown below to temporarily set your environment variables. If your ```` is not ``/opt/intel/openvino_2022``, use the correct one instead. -```sh -source /opt/intel/openvino_2022/setupvars.sh -``` +.. code-block:: sh -If you have more than one OpenVINO™ version on your machine, you can easily switch its version by sourcing the `setupvars.sh` of your choice. + source /opt/intel/openvino_2022/setupvars.sh -> **NOTE**: The above command must be re-run every time you start a new terminal session. To set up macOS to automatically run the command every time a new terminal is opened, open `~/.zshrc` in your favorite editor and add `source /opt/intel/openvino_2022/setupvars.sh` after the last line. Next time when you open a terminal, you will see `[setupvars.sh] OpenVINO™ environment initialized`. Changing `~/.zshrc` is not recommended when you have multiple OpenVINO versions on your machine and want to switch among them. + +If you have more than one OpenVINO™ version on your machine, you can easily switch its version by sourcing the ``setupvars.sh`` of your choice. + +.. note:: + + The above command must be re-run every time you start a new terminal session. To set up macOS to automatically run the command every time a new terminal is opened, open ``~/.zshrc`` in your favorite editor and add ``source /opt/intel/openvino_2022/setupvars.sh`` after the last line. Next time when you open a terminal, you will see ``[setupvars.sh] OpenVINO™ environment initialized``. Changing ``~/.zshrc`` is not recommended when you have multiple OpenVINO versions on your machine and want to switch among them. The environment variables are set. Continue to the next section if you want to download any additional components. -### Step 3 (Optional): Install Additional Components +Step 3 (Optional): Install Additional Components +++++++++++++++++++++++++++++++++++++++++++++++++ OpenVINO Development Tools is a set of utilities for working with OpenVINO and OpenVINO models. It provides tools like Model Optimizer, Benchmark Tool, Post-Training Optimization Tool, and Open Model Zoo Downloader. If you install OpenVINO Runtime using archive files, OpenVINO Development Tools must be installed separately. -See the [Install OpenVINO Development Tools](installing-model-dev-tools.md) page for step-by-step installation instructions. +See the :doc:`Install OpenVINO Development Tools ` page for step-by-step installation instructions. + +OpenCV is necessary to run demos from Open Model Zoo (OMZ). Some OpenVINO samples can also extend their capabilities when compiled with OpenCV as a dependency. To install OpenCV for OpenVINO, see the `instructions on GitHub `__. -OpenCV is necessary to run demos from Open Model Zoo (OMZ). Some OpenVINO samples can also extend their capabilities when compiled with OpenCV as a dependency. To install OpenCV for OpenVINO, see the [instructions on GitHub](https://github.com/opencv/opencv/wiki/BuildOpenCV4OpenVINO). +What's Next? +#################### -## What's Next? Now that you've installed OpenVINO Runtime, you're ready to run your own machine learning applications! Learn more about how to integrate a model in OpenVINO applications by trying out the following tutorials. -@sphinxdirective + .. tab:: Get started with Python Try the `Python Quick Start Example `_ to estimate depth in a scene using an OpenVINO monodepth model in a Jupyter Notebook inside your web browser. - + .. image:: https://user-images.githubusercontent.com/15709723/127752390-f6aa371f-31b5-4846-84b9-18dd4f662406.gif :width: 400 Visit the :ref:`Tutorials ` page for more Jupyter Notebooks to get you started with OpenVINO, such as: - + * `OpenVINO Python API Tutorial `_ * `Basic image classification program with Hello Image Classification `_ * `Convert a PyTorch model and use it for image background removal `_ @@ -121,44 +134,38 @@ Now that you've installed OpenVINO Runtime, you're ready to run your own machine .. tab:: Get started with C++ Try the `C++ Quick Start Example `_ for step-by-step instructions on building and running a basic image classification C++ application. - + .. image:: https://user-images.githubusercontent.com/36741649/127170593-86976dc3-e5e4-40be-b0a6-206379cd7df5.jpg :width: 400 Visit the :ref:`Samples ` page for other C++ example applications to get you started with OpenVINO, such as: - + * `Basic object detection with the Hello Reshape SSD C++ sample `_ * `Automatic speech recognition C++ sample `_ -@endsphinxdirective - -## Uninstalling Intel® Distribution of OpenVINO™ Toolkit +Uninstalling Intel® Distribution of OpenVINO™ Toolkit +##################################################### -To uninstall the toolkit, follow the steps on the [Uninstalling page](uninstalling-openvino.md). +To uninstall the toolkit, follow the steps on the :doc:`Uninstalling page `. -## Additional Resources - -@sphinxdirective +Additional Resources +#################### +* `OpenVINO Installation Selector Tool `__ * :ref:`Troubleshooting Guide for OpenVINO Installation & Configuration ` * Converting models for use with OpenVINO™: :ref:`Model Optimizer User Guide ` * Writing your own OpenVINO™ applications: :ref:`OpenVINO™ Runtime User Guide ` * Sample applications: :ref:`OpenVINO™ Toolkit Samples Overview ` * Pre-trained deep learning models: :ref:`Overview of OpenVINO™ Toolkit Pre-Trained Models ` -* IoT libraries and code samples in the GitHUB repository: `Intel® IoT Developer Kit`_ +* IoT libraries and code samples in the GitHUB repository: `Intel® IoT Developer Kit `__ -.. _Intel® IoT Developer Kit: https://github.com/intel-iot-devkit +---> @endsphinxdirective - -## Additional Resources - -- [OpenVINO Installation Selector Tool](https://www.intel.com/content/www/us/en/developer/tools/openvino-toolkit/download.html) \ No newline at end of file diff --git a/docs/install_guides/installing-openvino-from-archive-windows.md b/docs/install_guides/installing-openvino-from-archive-windows.md index 2300a1dceb8423..a0bc2b7649c94e 100644 --- a/docs/install_guides/installing-openvino-from-archive-windows.md +++ b/docs/install_guides/installing-openvino-from-archive-windows.md @@ -1,112 +1,158 @@ # Install OpenVINO™ Runtime on Windows from an Archive File {#openvino_docs_install_guides_installing_openvino_from_archive_windows} -With the OpenVINO™ 2022.3 release, you can download and use archive files to install OpenVINO Runtime. The archive files contain pre-built binaries and library files needed for OpenVINO Runtime, as well as code samples. +@sphinxdirective -Installing OpenVINO Runtime from archive files is recommended for C++ developers. If you are working with Python, the PyPI package has everything needed for Python development and deployment on CPU and GPUs. See the [Install OpenVINO from PyPI](installing-openvino-pip.md) page for instructions on how to install OpenVINO Runtime for Python using PyPI. +With the OpenVINO™ 2022.3 release, you can download and use archive files to install OpenVINO Runtime. The archive files contain pre-built binaries and library files needed for OpenVINO Runtime, as well as code samples. -> **NOTE**: Since the OpenVINO™ 2022.1 release, the following development tools: Model Optimizer, Post-Training Optimization Tool, Model Downloader and other Open Model Zoo tools, Accuracy Checker, and Annotation Converter can be installed via [pypi.org](https://pypi.org/project/openvino-dev/) only. +Installing OpenVINO Runtime from archive files is recommended for C++ developers. If you are working with Python, the PyPI package has everything needed for Python development and deployment on CPU and GPUs. See the :doc:`Install OpenVINO from PyPI ` page for instructions on how to install OpenVINO Runtime for Python using PyPI. -See the [Release Notes](https://software.intel.com/en-us/articles/OpenVINO-RelNotes) for more information on updates in the latest release. +.. note:: -## System Requirements + Since the OpenVINO™ 2022.1 release, the following development tools: Model Optimizer, Post-Training Optimization Tool, Model Downloader and other Open Model Zoo tools, Accuracy Checker, and Annotation Converter can be installed via `pypi.org `__ only. + + +See the `Release Notes `__ for more information on updates in the latest release. + +System Requirements +#################### -@sphinxdirective .. tab:: System Requirements | Full requirement listing is available in: | `System Requirements Page `_ - + .. tab:: Processor Notes - Processor graphics are not included in all processors. - See `Product Specifications`_ for information about your processor. - - .. _Product Specifications: https://ark.intel.com/ + Processor graphics are not included in all processors. + See `Product Specifications`_ for information about your processor. + + .. _Product Specifications: https://ark.intel.com/ .. tab:: Software - * `Microsoft Visual Studio 2019 with MSBuild `_ or `Microsoft Visual Studio 2022 `_ - * `CMake 3.14 or higher, 64-bit `_ (optional, only required for building sample applications) - * `Python 3.7 - 3.10, 64-bit `_ + * `Microsoft Visual Studio 2019 with MSBuild `_ or `Microsoft Visual Studio 2022 `_ + * `CMake 3.14 or higher, 64-bit `_ (optional, only required for building sample applications) + * `Python 3.7 - 3.10, 64-bit `_ - .. note:: - To install Microsoft Visual Studio 2019, follow the `Microsoft Visual Studio installation guide `_. You can choose to download the Community version. During installation in the **Workloads** tab, choose **Desktop development with C++**. + .. note:: - .. note:: - You can either use `cmake.msi` which is the installation wizard or `cmake.zip` where you have to go into the `bin` folder and then manually add the path to environmental variables. - - .. important:: - When installing Python, make sure you click the option **Add Python 3.x to PATH** to `add Python `_ to your `PATH` environment variable. + To install Microsoft Visual Studio 2019, follow the `Microsoft Visual Studio installation guide `_. You can choose to download the Community version. During installation in the **Workloads** tab, choose **Desktop development with C++**. -@endsphinxdirective + .. note:: -## Installing OpenVINO Runtime + You can either use `cmake.msi` which is the installation wizard or `cmake.zip` where you have to go into the `bin` folder and then manually add the path to environmental variables. -### Step 1: Download and Install OpenVINO Core Components + .. important:: + + When installing Python, make sure you click the option **Add Python 3.x to PATH** to `add Python `_ to your `PATH` environment variable. + + + +Installing OpenVINO Runtime +########################### + +.. _install-openvino-archive-windows: + +Step 1: Download and Install OpenVINO Core Components ++++++++++++++++++++++++++++++++++++++++++++++++++++++ + +1. Create an ``Intel`` folder in the ``C:\Program Files (x86)\`` directory. Skip this step if the folder already exists. -1. Create an `Intel` folder in the `C:\Program Files (x86)\` directory. Skip this step if the folder already exists. - You can also do this via command-lines. Open a new command prompt window as administrator by right-clicking **Command Prompt** from the Start menu and select **Run as administrator**, and then run the following command: - ```sh - mkdir "C:\Program Files (x86)\Intel" - ``` - > **NOTE**: `C:\Program Files (x86)\Intel` is the recommended folder. You may also use a different path if desired or if you don't have administrator privileges on your computer. -2. Download the [OpenVINO Runtime archive file for Windows](https://storage.openvinotoolkit.org/repositories/openvino/packages/2022.3/windows/) to your local `Downloads` folder. - + .. code-block:: sh + + mkdir "C:\Program Files (x86)\Intel" + + + .. note:: + + ``C:\Program Files (x86)\Intel`` is the recommended folder. You may also use a different path if desired or if you don't have administrator privileges on your computer. + + +2. Download the `OpenVINO Runtime archive file for Windows `__ to your local ``Downloads`` folder. + If you prefer using command-lines, run the following commands in the command prompt window you opened: - ```sh - cd /Downloads - curl -L https://storage.openvinotoolkit.org/repositories/openvino/packages/2022.3/windows/w_openvino_toolkit_windows_2022.3.0.9052.9752fafe8eb_x86_64.zip --output openvino_2022.3.0.zip - ``` - > **NOTE**: A `.sha256` file is provided together with the archive file to validate your download process. To do that, download the `.sha256` file from the same repository and run `CertUtil -hashfile openvino_2022.3.0.zip SHA256`. Compare the returned value in the output with what's in the `.sha256` file: if the values are the same, you have downloaded the correct file successfully; if not, create a Support ticket [here](https://www.intel.com/content/www/us/en/support/contact-intel.html). + + .. code-block:: sh + + cd /Downloads + curl -L https://storage.openvinotoolkit.org/repositories/openvino/packages/2022.3/windows/w_openvino_toolkit_windows_2022.3.0.9052.9752fafe8eb_x86_64.zip --output openvino_2022. 3.0.zip -3. Use your favorite tool to extract the archive file, rename the extracted folder, and move it to the `C:\Program Files (x86)\Intel` directory. - + .. note:: + + A ``.sha256`` file is provided together with the archive file to validate your download process. To do that, download the ``.sha256`` file from the same repository and run ``CertUtil -hashfile openvino_2022.3.0.zip SHA256``. Compare the returned value in the output with what's in the ``.sha256`` file: if the values are the same, you have downloaded the correct file successfully; if not, create a Support ticket `here `__. + + +3. Use your favorite tool to extract the archive file, rename the extracted folder, and move it to the ``C:\Program Files (x86)\Intel`` directory. + To do this step using command-lines, run the following commands in the command prompt window you opened: - ```sh - tar -xf openvino_2022.3.0.zip - ren w_openvino_toolkit_windows_2022.3.0.9052.9752fafe8eb_x86_64 openvino_2022.3.0 - move openvino_2022.3.0 "C:\Program Files (x86)\Intel" - ``` + + .. code-block:: sh + + tar -xf openvino_2022.3.0.zip + ren w_openvino_toolkit_windows_2022.3.0.9052.9752fafe8eb_x86_64 openvino_2022.3.0 + move openvino_2022.3.0 "C:\Program Files (x86)\Intel" + 4. For simplicity, it is useful to create a symbolic link. Open a command prompt window as administrator (see Step 1 for how to do this) and run the following commands: - ```sh - cd C:\Program Files (x86)\Intel - mklink /D openvino_2022 openvino_2022.3.0 - ``` - > **NOTE**: If you have already installed a previous release of OpenVINO 2022, a symbolic link to the `openvino_2022` folder may already exist. If you want to override it, nagivate to the `C:\Program Files (x86)\Intel` folder and delete the existing linked folder before running the `mklink` command. + .. code-block:: sh + + cd C:\Program Files (x86)\Intel + mklink /D openvino_2022 openvino_2022.3.0 -Congratulations, you finished the installation! The `C:\Program Files (x86)\Intel\openvino_2022` folder now contains the core components for OpenVINO. If you used a different path in Step 1, you will find the `openvino_2022` folder there. The path to the `openvino_2022` directory is also referred as `` throughout the OpenVINO documentation. -### Step 2: Configure the Environment + .. note:: -You must update several environment variables before you can compile and run OpenVINO™ applications. Open the Command Prompt, and run the `setupvars.bat` batch file to temporarily set your environment variables. If your is not `C:\Program Files (x86)\Intel\openvino_2022`, use the correct directory instead. + If you have already installed a previous release of OpenVINO 2022, a symbolic link to the ``openvino_2022`` folder may already exist. If you want to override it, nagivate to the ``C:\Program Files (x86)\Intel`` folder and delete the existing linked folder before running the ``mklink`` command. -```sh -"C:\Program Files (x86)\Intel\openvino_2022\setupvars.bat" -``` -> **Important**: The above command must be re-run every time a new Command Prompt window is opened. +Congratulations, you finished the installation! The ``C:\Program Files (x86)\Intel\openvino_2022`` folder now contains the core components for OpenVINO. If you used a different path in Step 1, you will find the ``openvino_2022`` folder there. The path to the ``openvino_2022`` directory is also referred as ```` throughout the OpenVINO documentation. + +.. _set-the-environment-variables-windows: + +Step 2: Configure the Environment ++++++++++++++++++++++++++++++++++ + +You must update several environment variables before you can compile and run OpenVINO™ applications. Open the Command Prompt, and run the ``setupvars.bat`` batch file to temporarily set your environment variables. If your ```` is not ``C:\Program Files (x86)\Intel\openvino_2022``, use the correct directory instead. + +.. code-block: sh + + "C:\Program Files (x86)\Intel\openvino_2022\setupvars.bat" + + +.. important:: + + The above command must be re-run every time a new Command Prompt window is opened. + + +.. note:: + + If you see an error indicating Python is not installed, Python may not be added to the PATH environment variable (as described `here `__). Check your system environment variables, and add Python if necessary. -> **NOTE**: If you see an error indicating Python is not installed, Python may not be added to the PATH environment variable (as described [here](https://docs.python.org/3/using/windows.html#finding-the-python-executable)). Check your system environment variables, and add Python if necessary. The environment variables are set. Continue to the next section if you want to download any additional components. -### Step 3 (Optional): Install Additional Components +.. _model-optimizer-windows: + +Step 3 (Optional): Install Additional Components +++++++++++++++++++++++++++++++++++++++++++++++++ OpenVINO Development Tools is a set of utilities for working with OpenVINO and OpenVINO models. It provides tools like Model Optimizer, Benchmark Tool, Post-Training Optimization Tool, and Open Model Zoo Downloader. If you install OpenVINO Runtime using archive files, OpenVINO Development Tools must be installed separately. -See the [Install OpenVINO Development Tools](installing-model-dev-tools.md) page for step-by-step installation instructions. +See the :doc:`Install OpenVINO Development Tools ` page for step-by-step installation instructions. -OpenCV is necessary to run demos from Open Model Zoo (OMZ). Some OpenVINO samples can also extend their capabilities when compiled with OpenCV as a dependency. To install OpenCV for OpenVINO, see the [instructions on GitHub](https://github.com/opencv/opencv/wiki/BuildOpenCV4OpenVINO). +OpenCV is necessary to run demos from Open Model Zoo (OMZ). Some OpenVINO samples can also extend their capabilities when compiled with OpenCV as a dependency. To install OpenCV for OpenVINO, see the `instructions on GitHub `. + +.. _optional-steps-windows: + +Step 4 (Optional): Configure Inference on non-CPU Devices ++++++++++++++++++++++++++++++++++++++++++++++++++++++++++ -### Step 4 (Optional): Configure Inference on non-CPU Devices OpenVINO Runtime has a plugin architecture that enables you to run inference on multiple devices without rewriting your code. Supported devices include integrated GPUs, discrete GPUs and GNAs. See the instructions below to set up OpenVINO on these devices. -@sphinxdirective .. tab:: GPU To enable the toolkit components to use processor graphics (GPU) on your system, follow the steps in :ref:`GPU Setup Guide `. @@ -114,69 +160,67 @@ OpenVINO Runtime has a plugin architecture that enables you to run inference on .. tab:: GNA To enable the toolkit components to use Intel® Gaussian & Neural Accelerator (GNA) on your system, follow the steps in :ref:`GNA Setup Guide `. - -@endsphinxdirective -## What's Next? + +.. _get-started-windows: + +What's Next? +#################### + Now that you've installed OpenVINO Runtime, you're ready to run your own machine learning applications! Learn more about how to integrate a model in OpenVINO applications by trying out the following tutorials. -@sphinxdirective + .. tab:: Get started with Python - Try the `Python Quick Start Example `_ to estimate depth in a scene using an OpenVINO monodepth model in a Jupyter Notebook inside your web browser. - + Try the `Python Quick Start Example `__ to estimate depth in a scene using an OpenVINO monodepth model in a Jupyter Notebook inside your web browser. + .. image:: https://user-images.githubusercontent.com/15709723/127752390-f6aa371f-31b5-4846-84b9-18dd4f662406.gif :width: 400 Visit the :ref:`Tutorials ` page for more Jupyter Notebooks to get you started with OpenVINO, such as: - - * `OpenVINO Python API Tutorial `_ - * `Basic image classification program with Hello Image Classification `_ - * `Convert a PyTorch model and use it for image background removal `_ + + * `OpenVINO Python API Tutorial `__ + * `Basic image classification program with Hello Image Classification `__ + * `Convert a PyTorch model and use it for image background removal `__ .. tab:: Get started with C++ Try the `C++ Quick Start Example `_ for step-by-step instructions on building and running a basic image classification C++ application. - + .. image:: https://user-images.githubusercontent.com/36741649/127170593-86976dc3-e5e4-40be-b0a6-206379cd7df5.jpg :width: 400 Visit the :ref:`Samples ` page for other C++ example applications to get you started with OpenVINO, such as: - + * `Basic object detection with the Hello Reshape SSD C++ sample `_ * `Automatic speech recognition C++ sample `_ -@endsphinxdirective - -## Uninstalling OpenVINO Runtime -To uninstall OpenVINO, follow the steps on the [Uninstalling page](uninstalling-openvino.md). +.. _uninstall-from-windows: -## Additional Resources +Uninstalling OpenVINO Runtime +############################# -## Additional Resources +To uninstall OpenVINO, follow the steps on the :doc:`Uninstalling page ` -@sphinxdirective +Additional Resources +#################### +* `OpenVINO Installation Selector Tool `__ * :ref:`Troubleshooting Guide for OpenVINO Installation & Configuration ` * Converting models for use with OpenVINO™: :ref:`Model Optimizer Developer Guide ` * Writing your own OpenVINO™ applications: :ref:`OpenVINO™ Runtime User Guide ` * Sample applications: :ref:`OpenVINO™ Toolkit Samples Overview ` * Pre-trained deep learning models: :ref:`Overview of OpenVINO™ Toolkit Pre-Trained Models ` -* IoT libraries and code samples in the GitHUB repository: `Intel® IoT Developer Kit`_ - - -.. _Intel® IoT Developer Kit: https://github.com/intel-iot-devkit +---> @endsphinxdirective - -## Additional Resources - -- [OpenVINO Installation Selector Tool](https://www.intel.com/content/www/us/en/developer/tools/openvino-toolkit/download.html) \ No newline at end of file diff --git a/docs/install_guides/installing-openvino-macos-header.md b/docs/install_guides/installing-openvino-macos-header.md index 7054d0c6e0dbc3..69b1e93df437c3 100644 --- a/docs/install_guides/installing-openvino-macos-header.md +++ b/docs/install_guides/installing-openvino-macos-header.md @@ -10,12 +10,13 @@ From PyPI Using HomeBrew -@endsphinxdirective +If you want to install OpenVINO™ Runtime on macOS, there are a few ways to accomplish this. We prepared following options for you: -If you want to install OpenVINO™ Runtime on macOS, there are a few ways to accomplish this. We prepared following options for you: +* :doc:`Install OpenVINO Runtime from an Archive File ` +* :doc:`Install OpenVINO Runtime via HomeBrew ` +* :doc:`Install OpenVINO from PyPI ` -* [Install OpenVINO Runtime from an Archive File](installing-openvino-from-archive-macos.md) -* [Install OpenVINO Runtime via HomeBrew](installing-openvino-brew.md) -* [Install OpenVINO from PyPI](installing-openvino-pip.md) +For a full selection of distribution channels, +see the `OpenVINO Installation Selector Tool `__ -For a full selection of distribution channels, see the [OpenVINO Installation Selector Tool](https://www.intel.com/content/www/us/en/developer/tools/openvino-toolkit/download.html) +@endsphinxdirective \ No newline at end of file diff --git a/docs/install_guides/installing-openvino-macos.md b/docs/install_guides/installing-openvino-macos.md index 9a98f9335e1ded..21c5053e082821 100644 --- a/docs/install_guides/installing-openvino-macos.md +++ b/docs/install_guides/installing-openvino-macos.md @@ -1,12 +1,16 @@ # Install OpenVINO™ Runtime for macOS from Installer +@sphinxdirective + Currently only the following ways are provided to install OpenVINO™: -* [Install OpenVINO Runtime from an Archive File](installing-openvino-from-archive-macos.md) -* [Install OpenVINO Runtime via HomeBrew](installing-openvino-brew.md) -* [Install OpenVINO from PyPI](installing-openvino-pip.md) -* [Build From Source](https://github.com/openvinotoolkit/openvino/blob/master/docs/dev/build.md) +* :doc:`Install OpenVINO Runtime from an Archive File ` +* :doc:`Install OpenVINO Runtime via HomeBrew ` +* :doc:`Install OpenVINO from PyPI ` +* `Build From Source `__ The other installation methods are temporarily unavailable. For a full selection of distribution channels, see the [OpenVINO Installation Selector Tool](https://www.intel.com/content/www/us/en/developer/tools/openvino-toolkit/download.html) + +@endsphinxdirective \ No newline at end of file diff --git a/docs/install_guides/installing-openvino-raspbian.md b/docs/install_guides/installing-openvino-raspbian.md index 8e87595aca6022..d47cea800092e6 100644 --- a/docs/install_guides/installing-openvino-raspbian.md +++ b/docs/install_guides/installing-openvino-raspbian.md @@ -8,12 +8,8 @@ * These steps have been validated with Raspberry Pi 3. * There is also an open-source version of OpenVINO™ that can be compiled for arch64 (see `build instructions `_). -@endsphinxdirective - - -## Development and Target Systems - -@sphinxdirective +Development and Target Systems +############################### .. tab:: System Requirements @@ -28,12 +24,8 @@ .. _install-openvino: -@endsphinxdirective - - -## Step 1: Download and Install OpenVINO Runtime - -@sphinxdirective +Step 1: Download and Install OpenVINO Runtime +############################################# #. Open the Terminal or your preferred console application. #. Create an installation folder for OpenVINO. If the folder already exists, skip this step. @@ -99,12 +91,8 @@ Congratulations, you finished the installation! The ``/opt/intel/openvino_2022`` .. _install-external-dependencies: -@endsphinxdirective - - -## Step 2: Install External Software Dependencies - -@sphinxdirective +Step 2: Install External Software Dependencies +############################################## CMake version 3.10 or higher is required for building the OpenVINO™ toolkit sample application. To install, open a Terminal window and run the following command: @@ -117,12 +105,8 @@ CMake is installed. Continue to the next section to set the environment variable .. _set-the-environment-variables-raspbian: -@endsphinxdirective - - -## Step 3: Set the Environment Variables - -@sphinxdirective +Step 3: Set the Environment Variables +##################################### You must update several environment variables before you can compile and run OpenVINO applications. Open a terminal window and run the ``setupvars.sh`` script as shown below to temporarily set your environment variables. If your is not ``/opt/intel/openvino_2022``, use the correct one instead. @@ -141,12 +125,8 @@ The environment variables are set. Continue to the next section if you want to d .. _model-optimizer: -@endsphinxdirective - - -## Step 4 (Optional): Install Additional Components - -@sphinxdirective +Step 4 (Optional): Install Additional Components +################################################ If you want to use your model for inference, the model must be converted to the ``.bin`` and ``.xml`` Intermediate Representation (IR) files that are used as input by OpenVINO Runtime. To get the optimized models, you can use one of the following options: @@ -158,16 +138,11 @@ If you want to use your model for inference, the model must be converted to the * OpenVINO Development Tools is a set of utilities for working with OpenVINO and OpenVINO models. It provides tools like Model Optimizer, Benchmark Tool, Post-Training Optimization Tool, and Open Model Zoo Downloader. See the :doc:`Install OpenVINO Development Tools ` page for step-by-step installation instructions. -@endsphinxdirective - - -## What's Next? - -@sphinxdirective +What's Next? +#################### Now that you've installed OpenVINO Runtime, you're ready to run your own machine learning applications! Learn more about how to integrate a model in OpenVINO applications by trying out the following tutorials. - .. tab:: Get started with Python Try the `Python Quick Start Example `_ to estimate depth in a scene using an OpenVINO monodepth model in a Jupyter Notebook inside your web browser. @@ -181,6 +156,7 @@ Now that you've installed OpenVINO Runtime, you're ready to run your own machine * `Basic image classification program with Hello Image Classification `_ * `Convert a PyTorch model and use it for image background removal `_ + .. tab:: Get started with C++ Try the `C++ Quick Start Example `_ for step-by-step instructions on building and running a basic image classification C++ application. @@ -193,24 +169,17 @@ Now that you've installed OpenVINO Runtime, you're ready to run your own machine * `Basic object detection with the Hello Reshape SSD C++ sample `_ * `Automatic speech recognition C++ sample `_ - To uninstall the toolkit, follow the steps on the :doc:`Uninstalling page `. -@endsphinxdirective - - -## Additional Resources - -@sphinxdirective +Additional Resources +#################### * :ref:`Troubleshooting Guide for OpenVINO Installation & Configuration ` * Converting models for use with OpenVINO™: :ref:`Model Optimizer User Guide ` * Writing your own OpenVINO™ applications: :ref:`OpenVINO™ Runtime User Guide ` * Sample applications: :ref:`OpenVINO™ Toolkit Samples Overview ` * Pre-trained deep learning models: :ref:`Overview of OpenVINO™ Toolkit Pre-Trained Models ` -* IoT libraries and code samples in the GitHUB repository: `Intel® IoT Developer Kit`_ +* IoT libraries and code samples in the GitHUB repository: `Intel® IoT Developer Kit `__ * :ref:`OpenVINO Installation Selector Tool ` -.. _Intel® IoT Developer Kit: https://github.com/intel-iot-devkit - -@endsphinxdirective \ No newline at end of file +@endsphinxdirective diff --git a/docs/install_guides/installing-openvino-windows-header.md b/docs/install_guides/installing-openvino-windows-header.md index 2b0489fd384379..f5480a6a350f3a 100644 --- a/docs/install_guides/installing-openvino-windows-header.md +++ b/docs/install_guides/installing-openvino-windows-header.md @@ -10,12 +10,14 @@ From PyPI Using Docker -@endsphinxdirective -If you want to install OpenVINO™ Runtime on Windows, you have the following options: +If you want to install OpenVINO™ Runtime on Windows, you have the following options: + +* :doc:`Install OpenVINO Runtime from an Archive File ` +* :doc:`Install OpenVINO from PyPI ` +* :doc:`Install OpenVINO with Docker ` -* [Install OpenVINO Runtime from an Archive File](installing-openvino-from-archive-windows.md) -* [Install OpenVINO from PyPI](installing-openvino-pip.md) -* [Install OpenVINO with Docker](installing-openvino-docker-windows.md) +For a full selection of distribution channels, +see the `OpenVINO Installation Selector Tool `__ -For a full selection of distribution channels, see the [OpenVINO Installation Selector Tool](https://www.intel.com/content/www/us/en/developer/tools/openvino-toolkit/download.html) +@endsphinxdirective diff --git a/docs/install_guides/installing-openvino-yocto.md b/docs/install_guides/installing-openvino-yocto.md index 2aa69d56d3db52..479d03f2e87c49 100644 --- a/docs/install_guides/installing-openvino-yocto.md +++ b/docs/install_guides/installing-openvino-yocto.md @@ -116,7 +116,7 @@ If the image build is successful, it will return the list of packages as below: Additional Resources #################### -- :ref:`Troubleshooting Guide ` +- :ref:`Troubleshooting Guide ` - `Yocto Project `__ - official documentation webpage - `BitBake Tool `__ - `Poky `__ diff --git a/docs/install_guides/troubleshooting-issues.md b/docs/install_guides/troubleshooting-issues.md index fd539d6ea64845..a381f0f46c1517 100644 --- a/docs/install_guides/troubleshooting-issues.md +++ b/docs/install_guides/troubleshooting-issues.md @@ -1,52 +1,71 @@ # Issues & Solutions for OpenVINO™ Installation & Configuration {#openvino_docs_get_started_guide_troubleshooting_issues} +@sphinxdirective + This page lists issues that you may encounter during the installation and configuration of OpenVINO™, as well as their possible solutions. -## Errors with Installing via PIP for Users in China +.. _install_for_prc: + +Errors with Installing via PIP for Users in China +################################################# Users in China might encounter errors while downloading sources via PIP during OpenVINO™ installation. To resolve the issues, try one of the following options: * Add the download source using the ``-i`` parameter with the Python ``pip`` command. For example: - ``` sh - pip install openvino-dev -i https://mirrors.aliyun.com/pypi/simple/ - ``` - Use the ``--trusted-host`` parameter if the URL above is ``http`` instead of ``https``. - You can also run the following command to install specific framework. For example: + .. code-block:: sh + + pip install openvino-dev -i https://mirrors.aliyun.com/pypi/simple/ + + Use the ``--trusted-host`` parameter if the URL above is ``http`` instead of ``https``. + You can also run the following command to install specific framework. For example: + + .. code-block:: sh + + pip install openvino-dev[tensorflow2] -i https://mirrors.aliyun.com/pypi/simple/ - ``` - pip install openvino-dev[tensorflow2] -i https://mirrors.aliyun.com/pypi/simple/ - ``` * For C++ developers, if you have installed OpenVINO Runtime via APT, YUM, or the archive file, and then installed OpenVINO Development Tools via PyPI, you may run into issues. To resolve that, install the components in ``requirements.txt`` by using the following command: - ``` sh - pip install -r /tools/requirements.txt - ``` - For APT and YUM users, replace the `INSTALL_DIR` with `/usr/share/openvino`. + + .. code-block:: sh + + pip install -r /tools/requirements.txt + + For APT and YUM users, replace the ``INSTALL_DIR`` with ``/usr/share/openvino``. -## Issues with Installing OpenVINO on Linux from Docker +Issues with Installing OpenVINO on Linux from Docker +#################################################### + +.. _proxy-issues: -### Proxy Issues +Proxy Issues +++++++++++++ -If you met proxy issues during the installation with Docker, you need set up proxy settings for Docker. See the [Docker guide](https://docs.docker.com/network/proxy/) for more details. +If you met proxy issues during the installation with Docker, you need set up proxy settings for Docker. See the `Docker guide `__ for more details. +.. _yocto_install_issues: -@anchor yocto-install-issues -## Issues with Creating a Yocto Image for OpenVINO +Issues with Creating a Yocto Image for OpenVINO +############################################### -### Error while adding "meta-intel" layer +Error while adding "meta-intel" layer ++++++++++++++++++++++++++++++++++++++ -When using the `bitbake-layers add-layer meta-intel` command, the following error might occur: -```sh -NOTE: Starting bitbake server... -ERROR: The following required tools (as specified by HOSTTOOLS) appear to be unavailable in PATH, please install them in order to proceed: chrpath diffstat pzstd zstd -``` +When using the ``bitbake-layers add-layer meta-intel`` command, the following error might occur: -To resolve the issue, install the `chrpath diffstat zstd` tools: +.. code-block:: sh + + NOTE: Starting bitbake server... + ERROR: The following required tools (as specified by HOSTTOOLS) appear to be unavailable in PATH, please install them in order to proceed: chrpath diffstat pzstd zstd + + +To resolve the issue, install the ``chrpath diffstat zstd`` tools: + +.. code-block:: sh + + sudo apt-get install chrpath diffstat zstd -```sh -sudo apt-get install chrpath diffstat zstd -``` +@endsphinxdirective diff --git a/docs/install_guides/troubleshooting-steps.md b/docs/install_guides/troubleshooting-steps.md index ec4049f445325b..fd5f03c73b21d2 100644 --- a/docs/install_guides/troubleshooting-steps.md +++ b/docs/install_guides/troubleshooting-steps.md @@ -1,56 +1,69 @@ # Troubleshooting Steps for OpenVINO™ Installation and Configurations {#openvino_docs_get_started_guide_troubleshooting_steps} +@sphinxdirective + If you run into issues while installing or configuring OpenVINO™, you can try the following methods to do some quick checks first. -## Check the versions of OpenVINO Runtime and Developement Tools +Check the versions of OpenVINO Runtime and Developement Tools +############################################################# * To check the version of OpenVINO Development Tools, use the following command: - ```sh - mo --version - ``` + + .. code-block:: sh + + mo --version + * To check the version of OpenVINO Runtime, use the following code: - ```sh - from openvino.runtime import get_version get_version() - ``` + + .. code-block:: sh + + from openvino.runtime import get_version get_version() + -## Check the versions of Python and PIP +Check the versions of Python and PIP +#################################### -To check your Python version, run `python -VV` or `python --version`. The supported Python versions should be 64-bit and between 3.7 and 3.10. If you are using Python 3.6, you are recommended to upgrade the version to 3.7 or higher. +To check your Python version, run ``python -VV`` or ``python --version``. The supported Python versions should be 64-bit and between 3.7 and 3.10. If you are using Python 3.6, you are recommended to upgrade the version to 3.7 or higher. If your Python version does not meet the requirements, update Python: -* For Windows, **do not install Python from a Windows Store** as it can cause issues. You are highly recommended to install Python from . -* For Linux and macOS systems, download and install a proper Python version from . See the [Python Beginners' Guide](https://wiki.python.org/moin/BeginnersGuide/Download) for more information on selecting a version. Note that macOS 10.x comes with python 2.7 installed, which is not supported, so you must install Python from the official website. +* For Windows, **do not install Python from a Windows Store** as it can cause issues. You are highly recommended to install Python from `official website `__ . +* For Linux and macOS systems, download and install a proper Python version from `official website `__ . See the `Python Beginners' Guide `__ for more information on selecting a version. Note that macOS 10.x comes with python 2.7 installed, which is not supported, so you must install Python from the official website. For PIP, make sure that you have installed the latest version. To check and upgrade your PIP version, run the following command: -```sh -python -m pip install --upgrade pip -``` + +.. code-block:: sh + + python -m pip install --upgrade pip - -## Check if required external dependencies are installed (for pre-2022.2 releases) +Check if required external dependencies are installed (for pre-2022.2 releases) +############################################################################### For OpenVINO releases prior to 2022.2: -- If you are using Ubuntu or RHEL 8 systems, and installed OpenVINO Runtime via the archive file, APT, or YUM repository, and then decided to [install OpenVINO Development Tools](installing-model-dev-tools.md), make sure that you **Install External Software Dependencies** first by following the steps in the corresponding installation pages. -- For C++ developers with Windows systems, make sure that Microsoft Visual Studio 2019 with MSBuild and CMake 3.14 or higher (64-bit) are installed. While installing Microsoft Visual Studio 2019, make sure that you have selected **Desktop development with C++** in the **Workloads** tab. If not, launch the installer again to select that option. For more information on modifying the installation options for Microsoft Visual Studio, see its [official support page](https://docs.microsoft.com/en-us/visualstudio/install/modify-visual-studio?view=vs-2019). -## Check if environment variables are set correctly +- If you are using Ubuntu or RHEL 8 systems, and installed OpenVINO Runtime via the archive file, APT, or YUM repository, and then decided to :doc:`install OpenVINO Development Tools `, make sure that you **Install External Software Dependencies** first by following the steps in the corresponding installation pages. +- For C++ developers with Windows systems, make sure that Microsoft Visual Studio 2019 with MSBuild and CMake 3.14 or higher (64-bit) are installed. While installing Microsoft Visual Studio 2019, make sure that you have selected **Desktop development with C++** in the **Workloads** tab. If not, launch the installer again to select that option. For more information on modifying the installation options for Microsoft Visual Studio, see its `official support page `__ . -- For Python developers, if you previously installed OpenVINO using the archive file, and are now installing OpenVINO using PIP, remove all the PATH settings and the lines with `setupvars` from `.bashrc`. Note that if you installed OpenVINO with PIP in a virtual environment, you don't need to set any environment variables. -- If you have installed OpenVINO before, you probably have added `setupvars` to your `PATH /.bashrc` or Windows environment variables. After restarting your environment, you should see similar information as below: -```sh -[setupvars.sh] OpenVINO™ environment initialized -``` - - If you don't see the information above, your PATH variables may be configured incorrectly. Check if you have typed the correct or you are trying to activate in the correct directory. - - If you added it to a `.bashrc` file, make sure that the command is correctly written and the file is found in the `~/.bashrc` folder. +Check if environment variables are set correctly +################################################ -## Verify that OpenVINO is correctly installed +- For Python developers, if you previously installed OpenVINO using the archive file, and are now installing OpenVINO using PIP, remove all the PATH settings and the lines with ``setupvars`` from ``.bashrc``. Note that if you installed OpenVINO with PIP in a virtual environment, you don't need to set any environment variables. +- If you have installed OpenVINO before, you probably have added ``setupvars`` to your ``PATH /.bashrc`` or Windows environment variables. After restarting your environment, you should see similar information as below: -@sphinxdirective + .. code-block:: sh + + [setupvars.sh] OpenVINO™ environment initialized + + + - If you don't see the information above, your PATH variables may be configured incorrectly. Check if you have typed the correct or you are trying to activate in the correct directory. + - If you added it to a ``.bashrc`` file, make sure that the command is correctly written and the file is found in the ``~/.bashrc`` folder. + +Verify that OpenVINO is correctly installed +########################################### * For Python developers, to verify if OpenVINO is correctly installed, use the following command: @@ -66,24 +79,24 @@ For OpenVINO releases prior to 2022.2: * If you installed OpenVINO Runtime from YUM, use the ``yum list installed 'openvino*'`` command to list the installed OpenVINO packages. -@endsphinxdirective - -## Check if GPU drvier is installed +Check if GPU drvier is installed +################################ -[Additional configurations](configurations-header.md) may be required in order to use OpenVINO with different hardware such as Intel® GPUs. +:doc:`Additional configurations ` may be required in order to use OpenVINO with different hardware such as Intel® GPUs. -To run inference on an Intel® GPU, make sure that you have installed the correct GPU driver. To check that, see [additional configurations for GPU](configurations-for-intel-gpu.md). +To run inference on an Intel® GPU, make sure that you have installed the correct GPU driver. To check that, see :doc:`additional configurations for GPU `. -## Check firewall and network settings +Check firewall and network settings +################################### Make sure that your firewall and network settings are configured correctly. For example, consider configuring system-wide proxy settings and specifying options for using PIP behind the proxy: -@sphinxdirective +.. code-block:: sh + + pip install --proxy http://address:port --trusted-host pypi.org openvino - .. code-block:: sh - pip install --proxy http://address:port --trusted-host pypi.org openvino +For specific issues, see :ref:`Errors with Installing via PIP for Users in China ` and :ref:`proxy issues with installing OpenVINO on Linux from Docker `. @endsphinxdirective -For specific issues, see Errors with Installing via PIP for Users in China and proxy issues with installing OpenVINO on Linux from Docker. \ No newline at end of file diff --git a/docs/install_guides/troubleshooting.md b/docs/install_guides/troubleshooting.md index 9963a579978025..99e3fd7ca8ea97 100644 --- a/docs/install_guides/troubleshooting.md +++ b/docs/install_guides/troubleshooting.md @@ -9,16 +9,14 @@ Issues & Solutions Troubleshooting Steps -@endsphinxdirective - -@sphinxdirective .. _troubleshooting guide for install: -@endsphinxdirective - This guide provides general troubleshooting steps and solutions to possible issues that can be encountered while installing and configuring OpenVINO™. -The [Issues & Solutions](./troubleshooting-issues.md) page lists common installation and configuration errors, and their possible solutions. If you encountered a specific error while installing or configuring OpenVINO, check this page to see if there is a solution. +The :doc:`Issues & Solutions ` page lists common installation and configuration errors, and their possible solutions. If you encountered a specific error while installing or configuring OpenVINO, check this page to see if there is a solution. + +The :doc:`Troubleshooting Steps ` page provides a set of instructions for diagnosing and resolving installation and configuration issues. If you had problems during installation and configuration, walk through these steps to try and resolve your issue. + +@endsphinxdirective -The [Troubleshooting Steps](./troubleshooting-steps.md) page provides a set of instructions for diagnosing and resolving installation and configuration issues. If you had problems during installation and configuration, walk through these steps to try and resolve your issue. diff --git a/docs/nbdoc/consts.py b/docs/nbdoc/consts.py index 06a1ad6a403f2e..d633d63d1e578e 100644 --- a/docs/nbdoc/consts.py +++ b/docs/nbdoc/consts.py @@ -8,7 +8,7 @@ repo_name = "openvino_notebooks" -artifacts_link = "http://repository.toolbox.iotg.sclab.intel.com/projects/ov-notebook/0.1.0-latest/20230309220806/dist/rst_files/" +artifacts_link = "http://repository.toolbox.iotg.sclab.intel.com/projects/ov-notebook/0.1.0-latest/20230317115622/dist/rst_files/" blacklisted_extensions = ['.xml', '.bin'] diff --git a/docs/optimization_guide/dldt_deployment_optimization_common.md b/docs/optimization_guide/dldt_deployment_optimization_common.md index 6438f87b7e9d0a..9307f5db945d05 100644 --- a/docs/optimization_guide/dldt_deployment_optimization_common.md +++ b/docs/optimization_guide/dldt_deployment_optimization_common.md @@ -1,64 +1,92 @@ # General Optimizations {#openvino_docs_deployment_optimization_guide_common} +@sphinxdirective + This article covers application-level optimization techniques, such as asynchronous execution, to improve data pipelining, pre-processing acceleration and so on. While the techniques (e.g. pre-processing) can be specific to end-user applications, the associated performance improvements are general and shall improve any target scenario -- both latency and throughput. -@anchor inputs_pre_processing -## Inputs Pre-Processing with OpenVINO +.. _inputs_pre_processing: + +Inputs Pre-Processing with OpenVINO +################################### In many cases, a network expects a pre-processed image. It is advised not to perform any unnecessary steps in the code: -- Model Optimizer can efficiently incorporate the mean and normalization (scale) values into a model (for example, to the weights of the first convolution). For more details, see the [relevant Model Optimizer command-line options](../MO_DG/prepare_model/Additional_Optimizations.md). -- Let OpenVINO accelerate other means of [Image Pre-processing and Conversion](../OV_Runtime_UG/preprocessing_overview.md). -- Data which is already in the "on-device" memory can be input directly by using the [remote tensors API of the GPU Plugin](../OV_Runtime_UG//supported_plugins/GPU_RemoteTensor_API.md). -@anchor async_api -## Prefer OpenVINO Async API -The API of the inference requests offers Sync and Async execution. While the `ov::InferRequest::infer()` is inherently synchronous and executes immediately (effectively serializing the execution flow in the current application thread), the Async "splits" the `infer()` into `ov::InferRequest::start_async()` and `ov::InferRequest::wait()`. For more information, see the [API examples](../OV_Runtime_UG/ov_infer_request.md). +* Model Optimizer can efficiently incorporate the mean and normalization (scale) values into a model (for example, to the weights of the first convolution). For more details, see the :doc:`relevant Model Optimizer command-line options `. +* Let OpenVINO accelerate other means of :doc:`Image Pre-processing and Conversion ` +* Data which is already in the "on-device" memory can be input directly by using the :doc:`remote tensors API of the GPU Plugin `. + +.. _async_api: + +Prefer OpenVINO Async API +######################### -A typical use case for the `ov::InferRequest::infer()` is running a dedicated application thread per source of inputs (e.g. a camera), so that every step (frame capture, processing, parsing the results, and associated logic) is kept serial within the thread. -In contrast, the `ov::InferRequest::start_async()` and `ov::InferRequest::wait()` allow the application to continue its activities and poll or wait for the inference completion when really needed. Therefore, one reason for using an asynchronous code is "efficiency". +The API of the inference requests offers Sync and Async execution. While the `ov::InferRequest::infer() `__ is inherently synchronous and executes immediately (effectively serializing the execution flow in the current application thread), the Async "splits" the ``infer()`` into ``ov::InferRequest::start_async()`` and ``ov::InferRequest::wait()``. For more information, see the :doc:`API examples `. + +A typical use case for the ``ov::InferRequest::infer()`` is running a dedicated application thread per source of inputs (e.g. a camera), so that every step (frame capture, processing, parsing the results, and associated logic) is kept serial within the thread. +In contrast, the ``ov::InferRequest::start_async()`` and ``ov::InferRequest::wait()`` allow the application to continue its activities and poll or wait for the inference completion when really needed. Therefore, one reason for using an asynchronous code is "efficiency". + +.. note:: + + Although the Synchronous API can be somewhat easier to start with, prefer to use the Asynchronous (callbacks-based, below) API in the production code. The reason is that it is the most general and scalable way to implement the flow control for any possible number of requests (and hence both latency and throughput scenarios). -> **NOTE**: Although the Synchronous API can be somewhat easier to start with, prefer to use the Asynchronous (callbacks-based, below) API in the production code. The reason is that it is the most general and scalable way to implement the flow control for any possible number of requests (and hence both latency and throughput scenarios). The key advantage of the Async approach is that when a device is busy with the inference, the application can do other things in parallel (e.g. populating inputs or scheduling other requests) rather than wait for the current inference to complete first. In the example below, inference is applied to the results of the video decoding. It is possible to keep two parallel infer requests, and while the current one is processed, the input frame for the next one is being captured. This essentially hides the latency of capturing, so that the overall frame rate is rather determined only by the slowest part of the pipeline (decoding vs inference) and not by the sum of the stages. -![Intel® VTune™ screenshot](../img/synch-vs-asynch.svg) +.. image:: _static/images/synch-vs-asynch.svg + :alt: Intel® VTune™ screenshot Below are example-codes for the regular and async-based approaches to compare: -- Normally, the frame is captured with OpenCV and then immediately processed:
- @snippet snippets/dldt_optimization_guide8.cpp part8 +* Normally, the frame is captured with OpenCV and then immediately processed:
+ + .. doxygensnippet:: docs/snippets/dldt_optimization_guide8.cpp + :language: cpp + :fragment: [part8] + +* In the "true" async mode, the ``NEXT`` request is populated in the main (application) thread, while the ``CURRENT`` request is processed:
-- In the "true" async mode, the `NEXT` request is populated in the main (application) thread, while the `CURRENT` request is processed:
- @snippet snippets/dldt_optimization_guide9.cpp part9 + .. doxygensnippet:: docs/snippets/dldt_optimization_guide9.cpp + :language: cpp + :fragment: [part9] The technique can be generalized to any available parallel slack. For example, you can do inference and simultaneously encode the resulting or previous frames or run further inference, like emotion detection on top of the face detection results. -Refer to the [Object Detection С++ Demo](@ref omz_demos_object_detection_demo_cpp), [Object Detection Python Demo](@ref omz_demos_object_detection_demo_python)(latency-oriented Async API showcase) and [Benchmark App Sample](../../samples/cpp/benchmark_app/README.md) for complete examples of the Async API in action. +Refer to the `Object Detection C++ Demo `__, `Object Detection Python Demo `__ (latency-oriented Async API showcase) and :doc:`Benchmark App Sample ` for complete examples of the Async API in action. -> **NOTE**: Using the Asynchronous API is a must for [throughput-oriented scenarios](./dldt_deployment_optimization_tput.md). +.. note:: -### Notes on Callbacks -Keep in mind that the `ov::InferRequest::wait()` of the Async API waits for the specific request only. However, running multiple inference requests in parallel provides no guarantees on the completion order. This may complicate a possible logic based on the `ov::InferRequest::wait`. The most scalable approach is using callbacks (set via the `ov::InferRequest::set_callback`) that are executed upon completion of the request. The callback functions will be used by OpenVINO Runtime to notify you of the results (or errors). + Using the Asynchronous API is a must for :doc:`throughput-oriented scenarios `. + +Notes on Callbacks +++++++++++++++++++++ + +Keep in mind that the ``ov::InferRequest::wait()`` of the Async API waits for the specific request only. However, running multiple inference requests in parallel provides no guarantees on the completion order. This may complicate a possible logic based on the ``ov::InferRequest::wait``. The most scalable approach is using callbacks (set via the ``ov::InferRequest::set_callback``) that are executed upon completion of the request. The callback functions will be used by OpenVINO Runtime to notify you of the results (or errors). This is a more event-driven approach. A few important points on the callbacks: -- It is the job of the application to ensure that any callback function is thread-safe. -- Although executed asynchronously by a dedicated threads, the callbacks should NOT include heavy operations (e.g. I/O) and/or blocking calls. Work done by any callback should be kept to a minimum. -@anchor tensor_idiom -## The "get_tensor" Idiom +* It is the job of the application to ensure that any callback function is thread-safe. +* Although executed asynchronously by a dedicated threads, the callbacks should NOT include heavy operations (e.g. I/O) and/or blocking calls. Work done by any callback should be kept to a minimum. + +.. _tensor_idiom: + +The "get_tensor" Idiom +###################### + Each device within OpenVINO may have different internal requirements on the memory padding, alignment, etc., for intermediate tensors. The **input/output tensors** are also accessible by the application code. -As every `ov::InferRequest` is created by the particular instance of the `ov::CompiledModel`(that is already device-specific) the requirements are respected and the input/output tensors of the requests are still device-friendly. +As every ``ov::InferRequest`` is created by the particular instance of the ``ov::CompiledModel`` (that is already device-specific) the requirements are respected and the input/output tensors of the requests are still device-friendly. To sum it up: -* The `get_tensor` (that offers the `data()` method to get a system-memory pointer to the content of a tensor), is a recommended way to populate the inference inputs (and read back the outputs) **from/to the host memory**: - * For example, for the GPU device, the **input/output tensors** are mapped to the host (which is fast) only when the `get_tensor` is used, while for the `set_tensor` a copy into the internal GPU structures may happen. -* In contrast, when the input tensors are already in the **on-device memory** (e.g. as a result of the video-decoding), prefer the `set_tensor` as a zero-copy way to proceed. For more details, see the [GPU device Remote tensors API](../OV_Runtime_UG//supported_plugins/GPU_RemoteTensor_API.md). -@sphinxdirective +* The ``get_tensor`` (that offers the ``data()`` method to get a system-memory pointer to the content of a tensor), is a recommended way to populate the inference inputs (and read back the outputs) **from/to the host memory**: + + * For example, for the GPU device, the **input/output tensors** are mapped to the host (which is fast) only when the ``get_tensor`` is used, while for the ``set_tensor`` a copy into the internal GPU structures may happen. + +* In contrast, when the input tensors are already in the **on-device memory** (e.g. as a result of the video-decoding), prefer the ``set_tensor`` as a zero-copy way to proceed. For more details, see the :doc:`GPU device Remote tensors API `. + -Consider the :ref:`API examples ` for the `get_tensor` and `set_tensor`. +Consider the :ref:`API examples ` for the ``get_tensor`` and ``set_tensor``. @endsphinxdirective diff --git a/docs/optimization_guide/dldt_deployment_optimization_guide.md b/docs/optimization_guide/dldt_deployment_optimization_guide.md index ac42c884d5080c..9ae41edc37a0dc 100644 --- a/docs/optimization_guide/dldt_deployment_optimization_guide.md +++ b/docs/optimization_guide/dldt_deployment_optimization_guide.md @@ -14,55 +14,51 @@ openvino_docs_OV_UG_Preprocessing_Overview openvino_docs_deployment_optimization_guide_internals -@endsphinxdirective Runtime optimization, or deployment optimization, focuses on tuning inference parameters and execution means (e.g., the optimum number of requests executed simultaneously). Unlike model-level optimizations, they are highly specific to the hardware and case they are used for, and often come at a cost. -`ov::inference_precision` is a "typical runtime configuration" which trades accuracy for performance, allowing `fp16/bf16` execution for the layers that remain in `fp32` after quantization of the original `fp32` model. +`ov::inference_precision `__ is a "typical runtime configuration" which trades accuracy for performance, allowing ``fp16/bf16`` execution for the layers that remain in ``fp32`` after quantization of the original ``fp32`` model. Therefore, optimization should start with defining the use case. For example, if it is about processing millions of samples by overnight jobs in data centers, throughput could be prioritized over latency. On the other hand, real-time usages would likely trade off throughput to deliver the results at minimal latency. A combined scenario is also possible, targeting the highest possible throughput, while maintaining a specific latency threshold. It is also important to understand how the full-stack application would use the inference component "end-to-end." For example, to know what stages need to be orchestrated to save workload devoted to fetching and preparing input data. - - - For more information on this topic, see the following articles: -@sphinxdirective * :ref:`feature support by device ` -@endsphinxdirective +* :ref:`Inputs Pre-processing with the OpenVINO ` +* :ref:`Async API ` +* :ref:`The 'get_tensor' Idiom ` +* For variably-sized inputs, consider :doc:`dynamic shapes ` -* [Inputs Pre-processing with the OpenVINO](@ref inputs_pre_processing) -* [Async API](@ref async_api) -* [The 'get_tensor' Idiom](@ref tensor_idiom) -* For variably-sized inputs, consider [dynamic shapes](../OV_Runtime_UG/ov_dynamic_shapes.md) +See the :doc:`latency ` and :doc:`throughput ` optimization guides, for **use-case-specific optimizations** +Writing Performance-Portable Inference Applications +################################################### - - -See the [latency](./dldt_deployment_optimization_latency.md) and [throughput](./dldt_deployment_optimization_tput.md) optimization guides, for **use-case-specific optimizations** - -## Writing Performance-Portable Inference Applications Although inference performed in OpenVINO Runtime can be configured with a multitude of low-level performance settings, it is not recommended in most cases. Firstly, achieving the best performance with such adjustments requires deep understanding of device architecture and the inference engine. Secondly, such optimization may not translate well to other device-model combinations. In other words, one set of execution parameters is likely to result in different performance when used under different conditions. For example: - * both the CPU and GPU support the notion of [streams](./dldt_deployment_optimization_tput_advanced.md), yet they deduce their optimal number very differently. - * Even among devices of the same type, different execution configurations can be considered optimal, as in the case of instruction sets or the number of cores for the CPU and the batch size for the GPU. - * Different models have different optimal parameter configurations, considering factors such as compute vs memory-bandwidth, inference precision, and possible model quantization. - * Execution "scheduling" impacts performance strongly and is highly device-specific, for example, GPU-oriented optimizations like batching, combining multiple inputs to achieve the optimal throughput, [do not always map well to the CPU](dldt_deployment_optimization_internals.md). +* both the CPU and GPU support the notion of :ref:`streams `, yet they deduce their optimal number very differently. +* Even among devices of the same type, different execution configurations can be considered optimal, as in the case of instruction sets or the number of cores for the CPU and the batch size for the GPU. +* Different models have different optimal parameter configurations, considering factors such as compute vs memory-bandwidth, inference precision, and possible model quantization. +* Execution "scheduling" impacts performance strongly and is highly device-specific, for example, GPU-oriented optimizations like batching, combining multiple inputs to achieve the optimal throughput, :doc:`do not always map well to the CPU `. -To make the configuration process much easier and its performance optimization more portable, the option of [Performance Hints](../OV_Runtime_UG/performance_hints.md) has been introduced. It comprises two high-level "presets" focused on either **latency** or **throughput** and, essentially, makes execution specifics irrelevant. + +To make the configuration process much easier and its performance optimization more portable, the option of :doc:`Performance Hints ` has been introduced. It comprises two high-level "presets" focused on either **latency** or **throughput** and, essentially, makes execution specifics irrelevant. The Performance Hints functionality makes configuration transparent to the application, for example, anticipates the need for explicit (application-side) batching or streams, and facilitates parallel processing of separate infer requests for different input sources -## Additional Resources +Additional Resources +#################### + +* :ref:`Using Async API and running multiple inference requests in parallel to leverage throughput `. +* :doc:`The throughput approach implementation details for specific devices ` +* :doc:`Details on throughput ` +* :doc:`Details on latency ` +* :doc:`API examples and details ` -* [Using Async API and running multiple inference requests in parallel to leverage throughput](@ref throughput_app_design). -* [The throughput approach implementation details for specific devices](dldt_deployment_optimization_internals.md) -* [Details on throughput](dldt_deployment_optimization_tput.md) -* [Details on latency](dldt_deployment_optimization_latency.md) -* [API examples and details](../OV_Runtime_UG/performance_hints.md). +@endsphinxdirective diff --git a/docs/optimization_guide/dldt_deployment_optimization_internals.md b/docs/optimization_guide/dldt_deployment_optimization_internals.md index b03742d351f180..ab596e49c98e0b 100644 --- a/docs/optimization_guide/dldt_deployment_optimization_internals.md +++ b/docs/optimization_guide/dldt_deployment_optimization_internals.md @@ -1,168 +1,70 @@ # Further Low-Level Implementation Details {#openvino_docs_deployment_optimization_guide_internals} -## Throughput on the CPU: Internals -As explained in the [throughput-related section](./dldt_deployment_optimization_tput.md), the OpenVINO streams are means of running multiple requests in parallel. -In order to best serve multiple inference requests executed simultaneously, the inference threads are grouped/pinned to the particular CPU cores, constituting the "CPU" streams. -This provides much better performance for the networks than batching, especially for the multiple-core systems: - -@sphinxdirective - -.. container:: row-two-col-content - - .. container:: column-two-col-content - - **Conventional Approach** - - | Every CNN op is internally parallelized over a full number of CPU cores and it is detrimental for non-scalable ops. - | A lot of synchronization between many threads results in overhead. - | An only option to improve efficiency is batching. - - .. container:: column-two-col-content - - **Streams** - - | CPU cores are evenly distributed between execution streams (each 1-4 threads). - | Less threads per stream means less synchronization, better locality, and finer granularity. - -@endsphinxdirective - -@sphinxdirective - -.. raw:: html - -
-
- -@endsphinxdirective - - -![](../img/cpu_execution_conventional_approach.svg) @sphinxdirective -.. raw:: html +Throughput on the CPU: Internals +################################ -
-
- -@endsphinxdirective - - -![](../img/cpu_execution_streams.svg) - -@sphinxdirective - -.. raw:: html - -
-
- -@endsphinxdirective - - -@sphinxdirective - -.. container:: row-two-col-content - - .. container:: column-two-col-content - - .. raw:: html +As explained in the :doc:`throughput-related section `, the OpenVINO streams are means of running multiple requests in parallel. +In order to best serve multiple inference requests executed simultaneously, the inference threads are grouped/pinned to the particular CPU cores, constituting the "CPU" streams. +This provides much better performance for the networks than batching, especially for the multiple-core systems: -
+.. list-table:: + :header-rows: 1 - .. container:: column-two-col-content + * - Conventional Approach + - Streams + * - | Every CNN op is internally parallelized over a full number of CPU cores and it is detrimental for non-scalable ops. + | A lot of synchronization between many threads results in overhead. + | An only option to improve efficiency is batching. + - | CPU cores are evenly distributed between execution streams (each 1-4 threads). + | Less threads per stream means less synchronization, better locality, and finer granularity. + * - |conventional-approach| + - | |execution-streams| + | Requests are executed in parallel with a small number of threads. + | Layer-wise, the streams imply much less synchronization. - | Requests are executed in parallel with a small number of threads. - | **Layer-wise, the streams imply much less synchronization.** - -@endsphinxdirective +.. |conventional-approach| image:: _static/images/cpu_execution_conventional_approach.svg +.. |execution-streams| image:: _static/images/cpu_execution_streams.svg Compared to the batching, the parallelism is somewhat transposed (performed over inputs with much less synchronization within CNN ops): -@sphinxdirective - -.. container:: row-two-col-content - - .. container:: column-two-col-content - - **Large Batch Approach** - - | All threads process all inputs at once. - | Assumes all layers are parallelized well. - | "Fat" requests are executed one by one. - - .. container:: column-two-col-content - - **Streams** - - | CPU cores are evenly distributed between execution streams. - | "Parallelize the outermost loop" rule of thumb. - | Individual requests are executed in parallel. - -@endsphinxdirective - - -@sphinxdirective - -.. raw:: html - -
-
- -@endsphinxdirective - - -![](../img/large_batch_approach.svg) - -@sphinxdirective - -.. raw:: html +.. list-table:: + :header-rows: 1 -
-
+ * - Large Batch Approach + - Streams + * - | All threads process all inputs at once. + | Assumes all layers are parallelized well. + | “Fat” requests are executed one by one. + - | CPU cores are evenly distributed between execution streams. + | “Parallelize the outermost loop” rule of thumb. + | Individual requests are executed in parallel. + * - |large-batch-approach| + - | |execution-streams-2| + | Inputs-wise the streams are the “transposed” batch. -@endsphinxdirective - - -![](../img/cpu_execution_streams_2.svg) - -@sphinxdirective - -.. raw:: html +.. |large-batch-approach| image:: _static/images/large_batch_approach.svg -
-
- -@endsphinxdirective - - -@sphinxdirective +.. |execution-streams-2| image:: _static/images/cpu_execution_streams_2.svg -.. container:: row-two-col-content - .. container:: column-two-col-content +Keep in mind that :doc:`high-level performance hints ` allow the implementation to select the optimal number of streams depending on model's compute demands and CPU capabilities, including :doc:`int8 inference ` hardware acceleration, number of cores, etc. - .. raw:: html +Automatic Batching Internals +############################ -
- - .. container:: column-two-col-content - - **Inputs-wise the streams are the “transposed” batch.** - -@endsphinxdirective - - -Keep in mind that [high-level performance hints](../OV_Runtime_UG/performance_hints.md) allow the implementation to select the optimal number of streams depending on model's compute demands and CPU capabilities, including [int8 inference](@ref openvino_docs_model_optimization_guide) hardware acceleration, number of cores, etc. - -## Automatic Batching Internals -[Automatic batching](../OV_Runtime_UG/automatic_batching.md) performs on-the-fly grouping of inference requests to improve device utilization. +:doc:`Automatic batching ` performs on-the-fly grouping of inference requests to improve device utilization. It relaxes the requirement for an application to saturate devices such as GPU by using a large batch "explicitly". It performs transparent input gathering from individual inference requests followed by the actual batched execution, with no programming effort from the user: -![](../img/batch_device.svg) + +.. image:: _static/images/batch_device.svg Essentially, Automatic Batching shifts asynchronicity from individual requests to groups of requests that constitute the batches. Furthermore, for the execution to be efficient, it is very important that the requests arrive timely, without causing a batching timeout. Normally, the timeout should never be hit. It is rather a graceful way to handle the application exit (when the inputs are not arriving anymore, so the full batch is not possible to collect). If a workload experiences timeouts, which lead to a drop in performance due to increased latency of every request, consider balancing its value against the batch size. For example, a smaller batch size and timeout value may yield better results than a large batch size coupled with a timeout value that cannot guarantee accommodating all the required requests. -Finally, following the `get_tensor` idiom section from the [general optimizations](./dldt_deployment_optimization_common.md) helps Automatic Batching to save on inputs/outputs copies. According to that, you should always prefer the "get" versions of the tensors' data access APIs in your applications. +Finally, following the ``get_tensor`` idiom section from the :doc:`general optimizations ` helps Automatic Batching to save on inputs/outputs copies. According to that, you should always prefer the "get" versions of the tensors' data access APIs in your applications. + +@endsphinxdirective diff --git a/docs/optimization_guide/dldt_deployment_optimization_latency.md b/docs/optimization_guide/dldt_deployment_optimization_latency.md index f3759d63b4a489..66a5818f7d0ea9 100644 --- a/docs/optimization_guide/dldt_deployment_optimization_latency.md +++ b/docs/optimization_guide/dldt_deployment_optimization_latency.md @@ -8,29 +8,27 @@ openvino_docs_OV_UG_Model_caching_overview -@endsphinxdirective + A significant portion of deep learning use cases involve applications loading a single model and using a single input at a time, which is the of typical "consumer" scenario. -While an application can create more than one request if needed, for example to support [asynchronous inputs population](@ref async_api), its **inference performance depends on how many requests are being inferenced in parallel** on a device. +While an application can create more than one request if needed, for example to support :ref:`asynchronous inputs population `, its **inference performance depends on how many requests are being inferenced in parallel** on a device. Similarly, when multiple models are served on the same device, it is important whether the models are executed simultaneously or in a chain, for example, in the inference pipeline. As expected, the easiest way to achieve **low latency is by running only one inference at a time** on one device. Accordingly, any additional concurrency usually results in latency rising fast. However, some conventional "root" devices (i.e., CPU or GPU) can be in fact internally composed of several "sub-devices". In many cases, letting OpenVINO leverage the "sub-devices" transparently helps to improve application's throughput (e.g., serve multiple clients simultaneously) without degrading latency. For example, multi-socket CPUs can deliver as many requests at the same minimal latency as there are NUMA nodes in the system. Similarly, a multi-tile GPU, which is essentially multiple GPUs in a single package, can deliver a multi-tile scalability with the number of inference requests, while preserving the single-tile latency. -Typically, human expertise is required to get more "throughput" out of the device, even in the inherently latency-oriented cases. OpenVINO can take this configuration burden via [high-level performance hints](../OV_Runtime_UG/performance_hints.md), the `ov::hint::PerformanceMode::LATENCY` specified for the `ov::hint::performance_mode` property for the `compile_model`. +Typically, human expertise is required to get more "throughput" out of the device, even in the inherently latency-oriented cases. OpenVINO can take this configuration burden via :doc:`high-level performance hints `, the `ov::hint::PerformanceMode::LATENCY `__ specified for the ``ov::hint::performance_mode`` property for the ``compile_model``. -> **NOTE**: [OpenVINO performance hints](../OV_Runtime_UG/performance_hints.md) is a recommended way for performance configuration, which is both device-agnostic and future-proof. +.. note:: + :doc:`OpenVINO performance hints ` is a recommended way for performance configuration, which is both device-agnostic and future-proof. -@sphinxdirective -* feature support by device +* feature support by device -When multiple models are to be used simultaneously, consider running inference on separate devices for each of them. Finally, when multiple models are executed in parallel on a device, using additional ``ov::hint::model_priority`` may help to define relative priorities of the models. Refer to the documentation on the :ref:`OpenVINO feature support for devices ` to check if your device supports the feature. - -@endsphinxdirective +When multiple models are to be used simultaneously, consider running inference on separate devices for each of them. Finally, when multiple models are executed in parallel on a device, using additional ``ov::hint::model_priority`` may help to define relative priorities of the models. Refer to the documentation on the :ref:`OpenVINO feature support for devices ` to check if your device supports the feature. **First-Inference Latency and Model Load/Compile Time** @@ -38,8 +36,10 @@ In some cases, model loading and compilation contribute to the "end-to-end" late For example, when the model is used exactly once, or when it is unloaded and reloaded in a cycle, to free the memory for another inference due to on-device memory limitations. Such a "first-inference latency" scenario may pose an additional limitation on the model load\compilation time, as inference accelerators (other than the CPU) usually require a certain level of model compilation upon loading. -The [model caching](../OV_Runtime_UG/Model_caching_overview.md) option is a way to lessen the impact over multiple application runs. If model caching is not possible, for example, it may require write permissions for the application, the CPU offers the fastest model load time almost every time. +The :doc:`model caching ` option is a way to lessen the impact over multiple application runs. If model caching is not possible, for example, it may require write permissions for the application, the CPU offers the fastest model load time almost every time. -Another way of dealing with first-inference latency is using the [AUTO device selection inference mode](../OV_Runtime_UG/auto_device_selection.md). It starts inference on the CPU, while waiting for the actual accelerator to load the model. At that point, it shifts to the new device seamlessly. +Another way of dealing with first-inference latency is using the :doc:`AUTO device selection inference mode `. It starts inference on the CPU, while waiting for the actual accelerator to load the model. At that point, it shifts to the new device seamlessly. -Finally, note that any [throughput-oriented options](./dldt_deployment_optimization_tput.md) may significantly increase the model uptime. +Finally, note that any :doc:`throughput-oriented options ` may significantly increase the model uptime. + +@endsphinxdirective diff --git a/docs/optimization_guide/dldt_deployment_optimization_tput.md b/docs/optimization_guide/dldt_deployment_optimization_tput.md index a2de858ab2786c..27967e821a0be2 100644 --- a/docs/optimization_guide/dldt_deployment_optimization_tput.md +++ b/docs/optimization_guide/dldt_deployment_optimization_tput.md @@ -1,50 +1,66 @@ # Optimizing for Throughput {#openvino_docs_deployment_optimization_guide_tput} -As described in the section on the [latency-specific considerations](./dldt_deployment_optimization_latency.md), one of the possible use cases is *delivering every single request at the minimal delay*. +@sphinxdirective + +As described in the section on the :doc:`latency-specific considerations `, one of the possible use cases is *delivering every single request at the minimal delay*. Throughput, on the other hand, is about inference scenarios in which potentially **large number of inference requests are served simultaneously to improve the device utilization**. The associated increase in latency is not linearly dependent on the number of requests executed in parallel. A trade-off between overall throughput and serial performance of individual requests can be achieved with the right performance configuration of OpenVINO. -## Basic and Advanced Ways of Leveraging Throughput +Basic and Advanced Ways of Leveraging Throughput +################################################ + There are two ways of leveraging throughput with individual devices: -* **Basic (high-level)** flow with [OpenVINO performance hints](../OV_Runtime_UG/performance_hints.md) which is inherently **portable and future-proof**. -* **Advanced (low-level)** approach of explicit **batching** and **streams**. For more details, see the [runtime inference optimizations](dldt_deployment_optimization_tput_advanced.md). + +* **Basic (high-level)** flow with :doc:`OpenVINO performance hints ` which is inherently **portable and future-proof**. +* **Advanced (low-level)** approach of explicit **batching** and **streams**. For more details, see the :doc:`runtime inference optimizations ` In both cases, the application should be designed to execute multiple inference requests in parallel, as described in the following section. -@anchor throughput_app_design -## Throughput-Oriented Application Design +.. _throughput_app_design: + +Throughput-Oriented Application Design +###################################### + In general, most throughput-oriented inference applications should: + * Expose substantial amounts of *input* parallelism (e.g. process multiple video- or audio- sources, text documents, etc). * Decompose the data flow into a collection of concurrent inference requests that are aggressively scheduled to be executed in parallel: - * Setup the configuration for the *device* (for example, as parameters of the `ov::Core::compile_model`) via either previously introduced [low-level explicit options](dldt_deployment_optimization_tput_advanced.md) or [OpenVINO performance hints](../OV_Runtime_UG/performance_hints.md) (**preferable**): -@sphinxdirective -.. tab:: C++ + * Setup the configuration for the *device* (for example, as parameters of the ``ov::Core::compile_model``) via either previously introduced :doc:`low-level explicit options ` or :doc:`OpenVINO performance hints ` (**preferable**): - .. doxygensnippet:: docs/snippets/ov_auto_batching.cpp - :language: cpp - :fragment: [compile_model] -.. tab:: Python + .. tab:: C++ - .. doxygensnippet:: docs/snippets/ov_auto_batching.py - :language: python - :fragment: [compile_model] + .. doxygensnippet:: docs/snippets/ov_auto_batching.cpp + :language: cpp + :fragment: [compile_model] -@endsphinxdirective + .. tab:: Python + + .. doxygensnippet:: docs/snippets/ov_auto_batching.py + :language: python + :fragment: [compile_model] + + + * Query the ``ov::optimal_number_of_infer_requests`` from the ``ov::CompiledModel`` (resulted from a compilation of the model for the device) to create the number of the requests required to saturate the device. +* Use the Async API with callbacks, to avoid any dependency on the completion order of the requests and possible device starvation, as explained in the :doc:`common-optimizations section `. - * Query the `ov::optimal_number_of_infer_requests` from the `ov::CompiledModel` (resulted from a compilation of the model for the device) to create the number of the requests required to saturate the device. -* Use the Async API with callbacks, to avoid any dependency on the completion order of the requests and possible device starvation, as explained in the [common-optimizations section](@ref openvino_docs_deployment_optimization_guide_common). +Multi-Device Execution +###################### -## Multi-Device Execution -OpenVINO offers the automatic, scalable [multi-device inference mode](../OV_Runtime_UG/multi_device.md), which is a simple *application-transparent* way to improve throughput. There is no need to re-architecture existing applications for any explicit multi-device support: no explicit network loading to each device, no separate per-device queues, no additional logic to balance inference requests between devices, etc. For the application using it, multi-device is like any other device, as it manages all processes internally. +OpenVINO offers the automatic, scalable :doc:`multi-device inference mode `, which is a simple *application-transparent* way to improve throughput. There is no need to re-architecture existing applications for any explicit multi-device support: no explicit network loading to each device, no separate per-device queues, no additional logic to balance inference requests between devices, etc. For the application using it, multi-device is like any other device, as it manages all processes internally. Just like with other throughput-oriented scenarios, there are several major pre-requisites for optimal multi-device performance: -* Using the [Asynchronous API](@ref async_api) and [callbacks](../OV_Runtime_UG/ov_infer_request.md) in particular. -* Providing the multi-device (and hence the underlying devices) with enough data to crunch. As the inference requests are naturally independent data pieces, the multi-device performs load-balancing at the "requests" (outermost) level to minimize the scheduling overhead. + +* Using the :ref:`Asynchronous API ` and :doc:`callbacks ` in particular. +* Providing the multi-device (and hence the underlying devices) with enough data to crunch. As the inference requests are naturally independent data pieces, the multi-device performs load-balancing at the "requests" (outermost) level to minimize the scheduling overhead. Keep in mind that the resulting performance is usually a fraction of the "ideal" (plain sum) value, when the devices compete for certain resources such as the memory-bandwidth, which is shared between CPU and iGPU. -> **NOTE**: While the legacy approach of optimizing the parameters of each device separately works, the [OpenVINO performance hints](../OV_Runtime_UG/performance_hints.md) allow configuring all devices (that are part of the specific multi-device configuration) at once. +.. note:: + + While the legacy approach of optimizing the parameters of each device separately works, the :doc:`OpenVINO performance hints ` allow configuring all devices (that are part of the specific multi-device configuration) at once. + +@endsphinxdirective diff --git a/docs/optimization_guide/dldt_deployment_optimization_tput_advanced.md b/docs/optimization_guide/dldt_deployment_optimization_tput_advanced.md index 3465b3dd5e82f9..48a2f70b65a574 100644 --- a/docs/optimization_guide/dldt_deployment_optimization_tput_advanced.md +++ b/docs/optimization_guide/dldt_deployment_optimization_tput_advanced.md @@ -1,42 +1,62 @@ # Using Advanced Throughput Options: Streams and Batching {#openvino_docs_deployment_optimization_guide_tput_advanced} -## OpenVINO Streams -As explained in the [common-optimizations section](@ref openvino_docs_deployment_optimization_guide_common), running multiple inference requests asynchronously is important for general application efficiency. +@sphinxdirective + +OpenVINO Streams +#################### + +As explained in the :doc:`common-optimizations section `, running multiple inference requests asynchronously is important for general application efficiency. Internally, every device implements a queue, which acts as a buffer, storing the inference requests until retrieved by the device at its own pace. The devices may actually process multiple inference requests in parallel in order to improve the device utilization and overall throughput. This configurable method of this device-side parallelism is commonly referred as **streams**. -> **NOTE**: Be aware that streams are **really executing the requests in parallel, but not in the lock step** (as the batching does), which makes the streams fully compatible with [dynamically-shaped inputs](../OV_Runtime_UG/ov_dynamic_shapes.md), while individual requests can have different shapes. +.. note:: + + Be aware that streams are **really executing the requests in parallel, but not in the lock step** (as the batching does), which makes the streams fully compatible with :doc:`dynamically-shaped inputs `, while individual requests can have different shapes. + +.. note:: -> **NOTE**: Most OpenVINO devices (including CPU and GPU) support the streams, yet the *optimal* number of the streams is deduced very differently. More information on this topic can be found in the section [below](@ref stream_considerations). + Most OpenVINO devices (including CPU and GPU) support the streams, yet the *optimal* number of the streams is deduced very differently. More information on this topic can be found in the section `below <#number-of-streams-considerations>`__. A few general considerations: + * Using the streams does increase the latency of an individual request: - * When the number of streams is not specified, a device creates a bare minimum of streams (usually, just one), as the latency-oriented case is default. - * See further tips for the optimal number of the streams [below](@ref throughput_advanced). + + * When the number of streams is not specified, a device creates a bare minimum of streams (usually, just one), as the latency-oriented case is default. + * See further tips for the optimal number of the streams `below <#choosing-the-number-of-streams-and-or-batch-size>`__. + * Streams are memory-intensive, as every stream duplicates the intermediate buffers to do inference in parallel to the rest of the streams: - * Always prefer streams over creating multiple `ov:Compiled_Model` instances for the same model, as weights memory is shared across streams, reducing the memory consumption. + + * Always prefer streams over creating multiple ``ov:Compiled_Model`` instances for the same model, as weights memory is shared across streams, reducing the memory consumption. + * Keep in mind that the streams also inflate the model load (compilation) time. For efficient asynchronous execution, the streams are actually handling the inference with a special pool of the threads (a thread per stream). -Each time you start inference requests (potentially from different application threads), they are actually muxed into an inference queue of the particular `ov:Compiled_Model`. +Each time you start inference requests (potentially from different application threads), they are actually muxed into an inference queue of the particular ``ov:Compiled_Model``. If there is a vacant stream, it pulls the request from the queue and actually expedites that to the on-device execution. -There are further device-specific details, like for the CPU, in the [internals](dldt_deployment_optimization_internals.md) section. +There are further device-specific details, like for the CPU, in the :doc:`internals ` section. + +Batching +#################### -## Batching Hardware accelerators such as GPUs are optimized for a massive compute parallelism, so the batching helps to saturate the device and leads to higher throughput. -While the streams (described in previous section) already help to hide the communication overheads and certain bubbles in the scheduling, running multiple OpenCL kernels simultaneously is less GPU-efficient compared to calling a kernel on the multiple inputs at once. +While the streams (described in previous section) already help to hide the communication overheads and certain bubbles in the scheduling, running multiple OpenCL kernels simultaneously is less GPU-efficient compared to calling a kernel on the multiple inputs at once. As explained in the next section, the batching is a must to leverage maximum throughput on the GPU. There are several primary methods of using the batching to help application performance: + * Collecting the inputs explicitly on the application side and then **sending the batch requests to OpenVINO**: - * Although this gives flexibility with the possible batching strategies, the approach requires redesigning the application logic. -* **Sending individual requests**, while configuring OpenVINO to collect and perform inference on the requests in batch [automatically](../OV_Runtime_UG/automatic_batching.md). + + * Although this gives flexibility with the possible batching strategies, the approach requires redesigning the application logic. + +* **Sending individual requests**, while configuring OpenVINO to collect and perform inference on the requests in batch :doc:`automatically `. In both cases, the optimal batch size is very device-specific. As explained below, the optimal batch size also depends on the model, inference precision and other factors. -@anchor throughput_advanced -## Choosing the Number of Streams and/or Batch Size + +Choosing the Number of Streams and/or Batch Size +################################################ + Predicting the inference performance is difficult and finding optimal execution parameters requires direct experiments with measurements. Run performance testing in the scope of development, and make sure to validate overall (*end-to-end*) application performance. @@ -46,33 +66,54 @@ In some cases, combination of streams and batching may be required to maximize t One possible throughput optimization strategy is to **set an upper bound for latency and then increase the batch size and/or number of the streams until that tail latency is met (or the throughput is not growing anymore)**. -> **NOTE**: When playing with [dynamically-shaped inputs](../OV_Runtime_UG/ov_dynamic_shapes.md), use only the streams (no batching), as they tolerate individual requests having different shapes. +.. note:: + + When playing with :doc:`dynamically-shaped inputs `, use only the streams (no batching), as they tolerate individual requests having different shapes. -> **NOTE**: Using the [High-Level Performance Hints](../OV_Runtime_UG/performance_hints.md) is the alternative, portable and future-proof option, allowing OpenVINO to find the best combination of streams and batching for a given scenario and a model. +.. note:: + + Using the :doc:`High-Level Performance Hints ` is the alternative, portable and future-proof option, allowing OpenVINO to find the best combination of streams and batching for a given scenario and a model. + +Number of Streams Considerations +++++++++++++++++++++++++++++++++ -@anchor stream_considerations -### Number of Streams Considerations * Select the number of streams that is **less or equal** to the number of requests that the application would be able to run simultaneously. * To avoid wasting resources, the number of streams should be enough to meet the *average* parallel slack rather than the peak load. -* Use the `ov::streams::AUTO` as a more portable option (that also respects the underlying hardware configuration). +* Use the `ov::streams::AUTO `__ as a more portable option (that also respects the underlying hardware configuration). * It is very important to keep these streams busy, by running as many inference requests as possible (for example, start the newly-arrived inputs immediately): - * A bare minimum of requests to saturate the device can be queried as the `ov::optimal_number_of_infer_requests` of the `ov:Compiled_Model`. -* *The maximum number of streams* for the device (per model) can be queried as the `ov::range_for_streams`. -### Batch Size Considerations + * A bare minimum of requests to saturate the device can be queried as the `ov::optimal_number_of_infer_requests `__ of the ``ov:Compiled_Model``. + +* *The maximum number of streams* for the device (per model) can be queried as the `ov::range_for_streams `__. + +Batch Size Considerations ++++++++++++++++++++++++++ + * Select the batch size that is **equal** to the number of requests that your application is able to run simultaneously: - * Otherwise (or if the number of "available" requests fluctuates), you may need to keep several instances of the network (reshaped to the different batch size) and select the properly sized instance in the runtime accordingly. -* For OpenVINO devices that implement a dedicated heuristic internally, the `ov::optimal_batch_size` is a *device* property (that accepts the actual model as a parameter) to query the recommended batch size for the model. + * Otherwise (or if the number of "available" requests fluctuates), you may need to keep several instances of the network (reshaped to the different batch size) and select the properly sized instance in the runtime accordingly. + +* For OpenVINO devices that implement a dedicated heuristic internally, the `ov::optimal_batch_size `__ is a *device* property (that accepts the actual model as a parameter) to query the recommended batch size for the model. + + +A Few Device-specific Details ++++++++++++++++++++++++++++++ -### A Few Device-specific Details * For the **GPU**: - * When the parallel slack is small, for example, only 2-4 requests executed simultaneously, then using only the streams for the GPU may suffice: - * The GPU runs 2 requests per stream, so 4 requests can be served by 2 streams. - * Alternatively, consider a single stream with 2 requests (each with a small batch size like 2), which would total the same 4 inputs in flight. - * Typically, for 4 and more requests the batching delivers better throughput. - * A batch size can be calculated as "a number of inference requests executed in parallel" divided by the "number of requests that the streams consume": - * For example, if you process 16 cameras (by 16 requests inferenced *simultaneously*) by 2 GPU streams (each can process two requests), the batch size per request is 16/(2*2)=4. + + * When the parallel slack is small, for example, only 2-4 requests executed simultaneously, then using only the streams for the GPU may suffice: + + * The GPU runs 2 requests per stream, so 4 requests can be served by 2 streams. + * Alternatively, consider a single stream with 2 requests (each with a small batch size like 2), which would total the same 4 inputs in flight. + + * Typically, for 4 and more requests the batching delivers better throughput. + * A batch size can be calculated as "a number of inference requests executed in parallel" divided by the "number of requests that the streams consume": + + * For example, if you process 16 cameras (by 16 requests inferenced *simultaneously*) by 2 GPU streams (each can process two requests), the batch size per request is 16/(2*2)=4. * For the **CPU, always use the streams first!**: - * On high-end CPUs, using moderate (2-8) batch size *in addition* to the maximum number of streams may further improve the performance. + + * On high-end CPUs, using moderate (2-8) batch size *in addition* to the maximum number of streams may further improve the performance. + +@endsphinxdirective + diff --git a/docs/optimization_guide/nncf/introduction.md b/docs/optimization_guide/nncf/introduction.md index ba2a2662ba3a17..a4fcbbead198b4 100644 --- a/docs/optimization_guide/nncf/introduction.md +++ b/docs/optimization_guide/nncf/introduction.md @@ -9,10 +9,11 @@ qat_introduction filter_pruning -@endsphinxdirective -## Introduction -Training-time model compression improves model performance by applying optimizations (such as quantization) during the training. The training process minimizes the loss associated with the lower-precision optimizations, so it is able to maintain the model’s accuracy while reducing its latency and memory footprint. Generally, training-time model optimization results in better model performance and accuracy than [post-training optimization](@ref pot_introduction), but it can require more effort to set up. +Introduction +#################### + +Training-time model compression improves model performance by applying optimizations (such as quantization) during the training. The training process minimizes the loss associated with the lower-precision optimizations, so it is able to maintain the model’s accuracy while reducing its latency and memory footprint. Generally, training-time model optimization results in better model performance and accuracy than :doc:`post-training optimization `, but it can require more effort to set up. OpenVINO provides the Neural Network Compression Framework (NNCF) tool for implementing compression algorithms on models to improve their performance. NNCF is a Python library that integrates into PyTorch and TensorFlow training pipelines to add training-time compression methods to the pipeline. To apply training-time compression methods with NNCF, you need: @@ -22,65 +23,93 @@ OpenVINO provides the Neural Network Compression Framework (NNCF) tool for imple Adding compression to a training pipeline only requires a few lines of code. The compression techniques are defined through a single configuration file that specifies which algorithms to use during fine-tuning. -### NNCF Quick Start Examples +NNCF Quick Start Examples ++++++++++++++++++++++++++ + See the following Jupyter Notebooks for step-by-step examples showing how to add model compression to a PyTorch or Tensorflow training pipeline with NNCF: -- [Quantization Aware Training with NNCF and PyTorch](https://docs.openvino.ai/latest/notebooks/302-pytorch-quantization-aware-training-with-output.html). -- [Quantization Aware Training with NNCF and TensorFlow](https://docs.openvino.ai/latest/notebooks/305-tensorflow-quantization-aware-training-with-output.html). +- `Quantization Aware Training with NNCF and PyTorch `__. +- `Quantization Aware Training with NNCF and TensorFlow `__. + +Installation +#################### + +NNCF is open-sourced on `GitHub `__ and distributed as a separate package from OpenVINO. It is also available on PyPI. Install it to the same Python environment where PyTorch or TensorFlow is installed. -## Installation -NNCF is open-sourced on [GitHub](https://github.com/openvinotoolkit/nncf) and distributed as a separate package from OpenVINO. It is also available on PyPI. Install it to the same Python environment where PyTorch or TensorFlow is installed. +Install from PyPI +++++++++++++++++++++ -### Install from PyPI To install the latest released version via pip manager run the following command: -``` -pip install nncf -``` -> **NOTE**: To install with specific frameworks, use the `pip install nncf[extras]` command, where extras is a list of possible extras, for example, `torch`, `tf`, `onnx`. +.. code-block:: sh + + pip install nncf + + +.. note:: + + To install with specific frameworks, use the `pip install nncf[extras]` command, where extras is a list of possible extras, for example, `torch`, `tf`, `onnx`. + -To install the latest NNCF version from source follow the instruction on [GitHub](https://github.com/openvinotoolkit/nncf#installation). +To install the latest NNCF version from source follow the instruction on `GitHub `__. -> **NOTE**: NNCF does not have OpenVINO as an installation requirement. To deploy optimized models you should install OpenVINO separately. +.. note:: + + NNCF does not have OpenVINO as an installation requirement. To deploy optimized models you should install OpenVINO separately. + +Working with NNCF +#################### -## Working with NNCF The figure below shows a common workflow of applying training-time compressions with NNCF. The NNCF optimizations are added to the TensorFlow or PyTorch training script, and then the model undergoes fine-tuning. The optimized model can then be exported to OpenVINO IR format for accelerated performance with OpenVINO Runtime. -![](../../img/nncf_workflow.svg) +.. image:: _static/images/nncf_workflow.svg + +Training-Time Compression Methods ++++++++++++++++++++++++++++++++++ -### Training-Time Compression Methods -NNCF provides several methods for improving model performance with training-time compression. +NNCF provides several methods for improving model performance with training-time compression. -#### Quantization -Quantization is the process of converting the weights and activation values in a neural network from a high-precision format (such as 32-bit floating point) to a lower-precision format (such as 8-bit integer). It helps to reduce the model’s memory footprint and latency. NNCF uses quantization-aware training to quantize models. +Quantization +-------------------- +Quantization is the process of converting the weights and activation values in a neural network from a high-precision format (such as 32-bit floating point) to a lower-precision format (such as 8-bit integer). It helps to reduce the model’s memory footprint and latency. NNCF uses quantization-aware training to quantize models. Quantization-aware training inserts nodes into the neural network during training that simulate the effect of lower precision. This allows the training algorithm to consider quantization errors as part of the overall training loss that gets minimized during training. The network is then able to achieve enhanced accuracy when quantized. -The officially supported method of quantization in NNCF is uniform 8-bit quantization. This means all the weights and activation functions in the neural network are converted to 8-bit values. See the [Quantization-ware Training guide](@ref qat_introduction) to learn more. +The officially supported method of quantization in NNCF is uniform 8-bit quantization. This means all the weights and activation functions in the neural network are converted to 8-bit values. See the :doc:`Quantization-ware Training guide ` to learn more. + +Filter pruning +-------------------- + +Filter pruning algorithms compress models by zeroing out the output filters of convolutional layers based on a certain filter importance criterion. During fine-tuning, an importance criteria is used to search for redundant filters that don’t significantly contribute to the network’s output and zero them out. After fine-tuning, the zeroed-out filters are removed from the network. For more information, see the :doc:`Filter Pruning ` page. -#### Filter pruning -Filter pruning algorithms compress models by zeroing out the output filters of convolutional layers based on a certain filter importance criterion. During fine-tuning, an importance criteria is used to search for redundant filters that don’t significantly contribute to the network’s output and zero them out. After fine-tuning, the zeroed-out filters are removed from the network. For more information, see the [Filter Pruning](@ref filter_pruning) page. +Experimental methods +-------------------- -#### Experimental methods NNCF also provides state-of-the-art compression techniques that are still in experimental stages of development and are only recommended for expert developers. These include: - Mixed-precision quantization - Sparsity - Binarization -To learn more about these methods, visit the [NNCF repository on GitHub](https://github.com/openvinotoolkit/nncf). +To learn more about these methods, visit the `NNCF repository on GitHub `__. + +Recommended Workflow +++++++++++++++++++++ -### Recommended Workflow Using compression-aware training requires a training pipeline, an annotated dataset, and compute resources (such as CPUs or GPUs). If you don't already have these set up and available, it can be easier to start post-training quantization to quickly see quantized results. Then you can use compression-aware training if the model isn't accurate enough. We recommend the following workflow for compressing models with NNCF: -1. [Perform post-training quantization](@ref pot_introduction) on your model and then compare performance to the original model. -2. If the accuracy is too degraded, use [Quantization-aware Training](@ref qat_introduction) to increase accuracy while still achieving faster inference time. -3. If the quantized model is still too slow, use [Filter Pruning](@ref filter_pruning) to further improve the model’s inference speed. +1. :doc:`Perform post-training quantization ` on your model and then compare performance to the original model. +2. If the accuracy is too degraded, use :doc:`Quantization-aware Training ` to increase accuracy while still achieving faster inference time. +3. If the quantized model is still too slow, use :doc:`Filter Pruning ` to further improve the model’s inference speed. + +Additional Resources +#################### -## Additional Resources -- [Quantizing Models Post-training](@ref pot_introduction) -- [NNCF GitHub repository](https://github.com/openvinotoolkit/nncf) -- [NNCF FAQ](https://github.com/openvinotoolkit/nncf/blob/develop/docs/FAQ.md) -- [Quantization Aware Training with NNCF and PyTorch](https://docs.openvino.ai/latest/notebooks/302-pytorch-quantization-aware-training-with-output.html) -- [Quantization Aware Training with NNCF and TensorFlow](https://docs.openvino.ai/latest/notebooks/305-tensorflow-quantization-aware-training-with-output.html) \ No newline at end of file +- :doc:`Quantizing Models Post-training ` +- `NNCF GitHub repository `__ +- `NNCF FAQ `__ +- `Quantization Aware Training with NNCF and PyTorch `__ +- `Quantization Aware Training with NNCF and TensorFlow `__ + +@endsphinxdirective diff --git a/docs/resources/tensorflow_frontend.md b/docs/resources/tensorflow_frontend.md index 15926715f5f010..47f9cce8ca2656 100644 --- a/docs/resources/tensorflow_frontend.md +++ b/docs/resources/tensorflow_frontend.md @@ -1,8 +1,10 @@ # OpenVINO TensorFlow Frontend Capabilities and Limitations {#openvino_docs_MO_DG_TensorFlow_Frontend} +@sphinxdirective + TensorFlow Frontend is C++ based Frontend for conversion of TensorFlow models and is available as a preview feature starting from 2022.3. -That means that you can start experimenting with `--use_new_frontend` option passed to Model Optimizer to enjoy improved conversion time for limited scope of models -or directly loading TensorFlow models through `read_model()` method. +That means that you can start experimenting with ``--use_new_frontend`` option passed to Model Optimizer to enjoy improved conversion time for limited scope of models +or directly loading TensorFlow models through ``read_model()`` method. The current limitations: @@ -10,4 +12,6 @@ The current limitations: * There is no full parity yet between legacy Model Optimizer TensorFlow Frontend and new TensorFlow Frontend so primary path for model conversion is still legacy frontend * Model coverage and performance is continuously improving so some conversion phase failures, performance and accuracy issues might occur in case model is not yet covered. Known unsupported models: object detection models and all models with transformation configs, models with TF1/TF2 control flow, Complex type and training parts -* `read_model()` method supports only `*.pb` format while Model Optimizer (or `convert_model` call) will accept other formats as well which are accepted by existing legacy frontend +* ``read_model()`` method supports only ``*.pb`` format while Model Optimizer (or ``convert_model`` call) will accept other formats as well which are accepted by existing legacy frontend + +@endsphinxdirective diff --git a/docs/snippets/InferenceEngine_network_with_state_infer.cpp b/docs/snippets/InferenceEngine_network_with_state_infer.cpp deleted file mode 100644 index 7af9c076931169..00000000000000 --- a/docs/snippets/InferenceEngine_network_with_state_infer.cpp +++ /dev/null @@ -1,115 +0,0 @@ -// Copyright (C) 2018-2020 Intel Corporation -// SPDX-License-Identifier: Apache-2.0 -// - -#include -#include - -using namespace InferenceEngine; - -int main(int argc, char *argv[]) { - try { - // --------------------------- 1. Load inference engine ------------------------------------- - std::cout << "Loading Inference Engine" << std::endl; - Core ie; - - // 2. Read a model in OpenVINO Intermediate Representation (.xml and .bin files) or ONNX (.onnx file) format - std::cout << "Loading network files" << std::endl; - CNNNetwork network; - network = ie.ReadNetwork(std::string("c:\\work\\git\\github_dldt3\\openvino\\model-optimizer\\summator.xml")); - network.setBatchSize(1); - - // 3. Load network to CPU - ExecutableNetwork executableNet = ie.LoadNetwork(network, "CPU"); - // 4. Create Infer Request - InferRequest inferRequest = executableNet.CreateInferRequest(); - - // 5. Prepare inputs - ConstInputsDataMap cInputInfo = executableNet.GetInputsInfo(); - std::vector ptrInputBlobs; - for (const auto& input : cInputInfo) { - ptrInputBlobs.push_back(inferRequest.GetBlob(input.first)); - } - InputsDataMap inputInfo; - inputInfo = network.getInputsInfo(); - for (auto &item : inputInfo) { - Precision inputPrecision = Precision::FP32; - item.second->setPrecision(inputPrecision); - } - - // 6. Prepare outputs - std::vector ptrOutputBlobs; - ConstOutputsDataMap cOutputInfo = executableNet.GetOutputsInfo(); - for (const auto& output : cOutputInfo) { - ptrOutputBlobs.push_back(inferRequest.GetBlob(output.first)); - } - - // 7. Initialize memory state before starting - for (auto &&state : inferRequest.QueryState()) { - state.Reset(); - } - - //! [part1] - // input data - std::vector data = { 1,2,3,4,5,6}; - // infer the first utterance - for (size_t next_input = 0; next_input < data.size()/2; next_input++) { - MemoryBlob::Ptr minput = as(ptrInputBlobs[0]); - auto minputHolder = minput->wmap(); - - std::memcpy(minputHolder.as(), - &data[next_input], - sizeof(float)); - - inferRequest.Infer(); - // check states - auto states = inferRequest.QueryState(); - if (states.empty()) { - throw std::runtime_error("Queried states are empty"); - } - auto mstate = as(states[0].GetState()); - if (mstate == nullptr) { - throw std::runtime_error("Can't cast state to MemoryBlob"); - } - auto state_buf = mstate->rmap(); - float * state =state_buf.as(); - std::cout << state[0] << "\n"; - } - - // resetting state between utterances - std::cout<<"Reset state\n"; - for (auto &&state : inferRequest.QueryState()) { - state.Reset(); - } - - // infer the second utterance - for (size_t next_input = data.size()/2; next_input < data.size(); next_input++) { - MemoryBlob::Ptr minput = as(ptrInputBlobs[0]); - auto minputHolder = minput->wmap(); - - std::memcpy(minputHolder.as(), - &data[next_input], - sizeof(float)); - - inferRequest.Infer(); - // check states - auto states = inferRequest.QueryState(); - auto mstate = as(states[0].GetState()); - auto state_buf = mstate->rmap(); - float * state =state_buf.as(); - std::cout << state[0] << "\n"; - } - //! [part1] - } - catch (const std::exception &error) { - std::cerr << error.what() << std::endl; - return 1; - } - catch (...) { - std::cerr << "Unknown/internal exception happened" << std::endl; - return 1; - } - - std::cerr << "Execution successful" << std::endl; - return 0; -} diff --git a/docs/snippets/gpu/context_sharing_va_c.cpp b/docs/snippets/gpu/context_sharing_va_c.cpp new file mode 100644 index 00000000000000..dd22e330bb5dcf --- /dev/null +++ b/docs/snippets/gpu/context_sharing_va_c.cpp @@ -0,0 +1,156 @@ +// Copyright (C) 2018-2023 Intel Corporation +// SPDX-License-Identifier: Apache-2.0 +// + +#ifdef ENABLE_LIBVA +#include +#include +#include + +VADisplay get_va_display(); +VASurfaceID decode_va_surface(); + +int main() { + ov_core_t* core = NULL; + ov_model_t* model = NULL; + ov_compiled_model_t* compiled_model = NULL; + ov_infer_request_t* infer_request = NULL; + ov_remote_context_t* shared_va_context = NULL; + ov_tensor_t* remote_tensor = NULL; + ov_preprocess_prepostprocessor_t* preprocess = NULL; + ov_preprocess_input_info_t* preprocess_input_info = NULL; + ov_preprocess_input_tensor_info_t* preprocess_input_tensor_info = NULL; + ov_preprocess_preprocess_steps_t* preprocess_input_steps = NULL; + ov_preprocess_input_model_info_t* preprocess_input_model_info = NULL; + ov_layout_t* layout = NULL; + ov_model_t* new_model = NULL; + + ov_output_const_port_t* input_port = NULL; + char* in_tensor_name = NULL; + char* out_tensor_name = NULL; + ov_shape_t* input_shape = NULL; + ov_element_type_e input_type; + + const int height = 480; + const int width = 640; + + // initialize the objects + ov_core_create(&core); + ov_core_read_model(core, "model.xml", "model.bin", &model); + + // ... + + //! [context_sharing_va] + + // ... + + ov_preprocess_prepostprocessor_create(model, &preprocess); + ov_preprocess_prepostprocessor_get_input_info(preprocess, &preprocess_input_info); + ov_preprocess_input_info_get_tensor_info(preprocess_input_info, &preprocess_input_tensor_info); + ov_preprocess_input_tensor_info_set_element_type(preprocess_input_tensor_info, U8); + ov_preprocess_input_tensor_info_set_color_format_with_subname(preprocess_input_tensor_info, + NV12_TWO_PLANES, + 2, + "y", + "uv"); + ov_preprocess_input_tensor_info_set_memory_type(preprocess_input_tensor_info, "GPU_SURFACE"); + ov_preprocess_input_tensor_info_set_spatial_static_shape(preprocess_input_tensor_info, height, width); + ov_preprocess_input_info_get_preprocess_steps(preprocess_input_info, &preprocess_input_steps); + ov_preprocess_preprocess_steps_convert_color(preprocess_input_steps, BGR); + ov_preprocess_preprocess_steps_resize(preprocess_input_steps, RESIZE_LINEAR); + ov_preprocess_input_info_get_model_info(preprocess_input_info, &preprocess_input_model_info); + ov_layout_create("NCHW", &layout); + ov_preprocess_input_model_info_set_layout(preprocess_input_model_info, layout); + ov_preprocess_prepostprocessor_build(preprocess, &new_model); + + VADisplay display = get_va_display(); + // create the shared context object + ov_core_create_context(core, + "GPU", + 4, + &shared_va_context, + ov_property_key_intel_gpu_context_type, + "VA_SHARED", + ov_property_key_intel_gpu_va_device, + display); + + // compile model within a shared context + ov_core_compile_model_with_context(core, new_model, shared_va_context, 0, &compiled_model); + + ov_output_const_port_t* port_0 = NULL; + char* input_name_0 = NULL; + ov_model_const_input_by_index(new_model, 0, &port_0); + ov_port_get_any_name(port_0, &input_name_0); + + ov_output_const_port_t* port_1 = NULL; + char* input_name_1 = NULL; + ov_model_const_input_by_index(new_model, 1, &port_1); + ov_port_get_any_name(port_1, &input_name_1); + + ov_shape_t shape_y = {0, NULL}; + ov_shape_t shape_uv = {0, NULL}; + ov_const_port_get_shape(port_0, &shape_y); + ov_const_port_get_shape(port_1, &shape_uv); + + // execute decoding and obtain decoded surface handle + VASurfaceID va_surface = decode_va_surface(); + // ... + //wrap decoder output into RemoteBlobs and set it as inference input + + ov_tensor_t* remote_tensor_y = NULL; + ov_tensor_t* remote_tensor_uv = NULL; + ov_remote_context_create_tensor(shared_va_context, + U8, + shape_y, + 6, + &remote_tensor_y, + ov_property_key_intel_gpu_shared_mem_type, + "VA_SURFACE", + ov_property_key_intel_gpu_dev_object_handle, + va_surface, + ov_property_key_intel_gpu_va_plane, + 0); + ov_remote_context_create_tensor(shared_va_context, + U8, + shape_uv, + 6, + &remote_tensor_uv, + ov_property_key_intel_gpu_shared_mem_type, + "VA_SURFACE", + ov_property_key_intel_gpu_dev_object_handle, + va_surface, + ov_property_key_intel_gpu_va_plane, + 1); + + ov_compiled_model_create_infer_request(compiled_model, &infer_request); + ov_infer_request_set_tensor(infer_request, input_name_0, remote_tensor_y); + ov_infer_request_set_tensor(infer_request, input_name_1, remote_tensor_uv); + ov_infer_request_infer(infer_request); + //! [context_sharing_va] + + // deinitialization + ov_free(input_name_0); + ov_free(input_name_1); + ov_output_const_port_free(port_0); + ov_output_const_port_free(port_1); + ov_layout_free(layout); + ov_preprocess_input_model_info_free(preprocess_input_model_info); + ov_preprocess_preprocess_steps_free(preprocess_input_steps); + ov_preprocess_input_tensor_info_free(preprocess_input_tensor_info); + ov_preprocess_input_info_free(preprocess_input_info); + ov_model_free(new_model); + ov_preprocess_prepostprocessor_free(preprocess); + ov_tensor_free(remote_tensor_y); + ov_tensor_free(remote_tensor_uv); + ov_shape_free(&shape_y); + ov_shape_free(&shape_uv); + ov_infer_request_free(infer_request); + ov_compiled_model_free(compiled_model); + ov_model_free(model); + ov_model_free(new_model); + ov_remote_context_free(shared_va_context); + ov_core_free(core); + + return 0; +} +#endif // ENABLE_LIBVA diff --git a/docs/snippets/gpu/preprocessing_nv12_two_planes_c.cpp b/docs/snippets/gpu/preprocessing_nv12_two_planes_c.cpp new file mode 100644 index 00000000000000..826af8ddffe32b --- /dev/null +++ b/docs/snippets/gpu/preprocessing_nv12_two_planes_c.cpp @@ -0,0 +1,126 @@ +// Copyright (C) 2018-2023 Intel Corporation +// SPDX-License-Identifier: Apache-2.0 +// + +#include +#include +#include + +cl::Image2D get_y_image(); +cl::Image2D get_uv_image(); + +int main() { + ov_core_t* core = NULL; + ov_model_t* model = NULL; + ov_compiled_model_t* compiled_model = NULL; + ov_infer_request_t* infer_request = NULL; + ov_preprocess_prepostprocessor_t* preprocess = NULL; + ov_preprocess_input_info_t* preprocess_input_info = NULL; + ov_preprocess_input_tensor_info_t* preprocess_input_tensor_info = NULL; + ov_preprocess_preprocess_steps_t* preprocess_input_steps = NULL; + ov_preprocess_input_model_info_t* preprocess_input_model_info = NULL; + ov_layout_t* layout = NULL; + ov_model_t* model_with_preproc = NULL; + ov_remote_context_t* gpu_context = NULL; + char* input_name0 = NULL; + char* input_name1 = NULL; + ov_output_const_port_t* input_port0 = NULL; + ov_output_const_port_t* input_port1 = NULL; + size_t height = 480; + size_t width = 640; + + ov_core_create(&core); + ov_core_read_model(core, "model.xml", "model.bin", &model); + + //! [init_preproc] + ov_preprocess_prepostprocessor_create(model, &preprocess); + ov_preprocess_prepostprocessor_get_input_info(preprocess, &preprocess_input_info); + ov_preprocess_input_info_get_tensor_info(preprocess_input_info, &preprocess_input_tensor_info); + ov_preprocess_input_tensor_info_set_element_type(preprocess_input_tensor_info, ov_element_type_e::U8); + ov_preprocess_input_tensor_info_set_color_format_with_subname(preprocess_input_tensor_info, + ov_color_format_e::NV12_TWO_PLANES, + 2, + "y", + "uv"); + ov_preprocess_input_tensor_info_set_memory_type(preprocess_input_tensor_info, "GPU_SURFACE"); + ov_preprocess_input_tensor_info_set_spatial_static_shape(preprocess_input_tensor_info, height, width); + ov_preprocess_input_info_get_preprocess_steps(preprocess_input_info, &preprocess_input_steps); + ov_preprocess_preprocess_steps_convert_color(preprocess_input_steps, ov_color_format_e::BGR); + ov_preprocess_preprocess_steps_resize(preprocess_input_steps, RESIZE_LINEAR); + ov_preprocess_input_info_get_model_info(preprocess_input_info, &preprocess_input_model_info); + ov_layout_create("NCHW", &layout); + ov_preprocess_input_model_info_set_layout(preprocess_input_model_info, layout); + ov_preprocess_prepostprocessor_build(preprocess, &model_with_preproc); + //! [init_preproc] + + ov_core_compile_model(core, model_with_preproc, "GPU", 0, &compiled_model); + ov_compiled_model_get_context(compiled_model, &gpu_context); + ov_compiled_model_create_infer_request(compiled_model, &infer_request); + + { + //! [single_batch] + ov_model_const_input_by_index(model, 0, &input_port0); + ov_model_const_input_by_index(model, 1, &input_port1); + ov_port_get_any_name(input_port0, &input_name0); + ov_port_get_any_name(input_port1, &input_name1); + + ov_shape_t shape_y, shape_uv; + ov_tensor_t* remote_tensor_y = NULL; + ov_tensor_t* remote_tensor_uv = NULL; + ov_const_port_get_shape(input_port0, &shape_y); + ov_const_port_get_shape(input_port1, &shape_uv); + + cl::Image2D image_y = get_y_image(); + cl::Image2D image_uv = get_uv_image(); + ov_remote_context_create_tensor(gpu_context, + ov_element_type_e::U8, + shape_y, + 4, + &remote_tensor_y, + ov_property_key_intel_gpu_shared_mem_type, + "OCL_IMAGE2D", + ov_property_key_intel_gpu_mem_handle, + image_y.get()); + + ov_remote_context_create_tensor(gpu_context, + ov_element_type_e::U8, + shape_uv, + 4, + &remote_tensor_y, + ov_property_key_intel_gpu_shared_mem_type, + "OCL_IMAGE2D", + ov_property_key_intel_gpu_mem_handle, + image_uv.get()); + + ov_infer_request_set_tensor(infer_request, input_name0, remote_tensor_y); + ov_infer_request_set_tensor(infer_request, input_name1, remote_tensor_uv); + ov_infer_request_infer(infer_request); + //! [single_batch] + + ov_free(input_name0); + ov_free(input_name1); + ov_output_const_port_free(input_port0); + ov_output_const_port_free(input_port1); + + ov_layout_free(layout); + ov_preprocess_input_model_info_free(preprocess_input_model_info); + ov_preprocess_preprocess_steps_free(preprocess_input_steps); + ov_preprocess_input_tensor_info_free(preprocess_input_tensor_info); + ov_preprocess_input_info_free(preprocess_input_info); + ov_preprocess_prepostprocessor_free(preprocess); + + ov_tensor_free(remote_tensor_y); + ov_tensor_free(remote_tensor_uv); + ov_shape_free(&shape_y); + ov_shape_free(&shape_uv); + + ov_infer_request_free(infer_request); + ov_compiled_model_free(compiled_model); + ov_model_free(model); + ov_model_free(model_with_preproc); + ov_remote_context_free(gpu_context); + ov_core_free(core); + } + + return 0; +} diff --git a/docs/snippets/gpu/remote_objects_creation_c.cpp b/docs/snippets/gpu/remote_objects_creation_c.cpp new file mode 100644 index 00000000000000..c870da6bb06a26 --- /dev/null +++ b/docs/snippets/gpu/remote_objects_creation_c.cpp @@ -0,0 +1,283 @@ +// Copyright (C) 2018-2023 Intel Corporation +// SPDX-License-Identifier: Apache-2.0 +// + +#include +#include +#include + +#ifdef WIN32 +typedef void* ID3D11Device; +#elif defined(ENABLE_LIBVA) +#include +#endif + +void* allocate_usm_buffer(size_t size); +cl_mem allocate_cl_mem(size_t size); +cl_context get_cl_context(); +cl_command_queue get_cl_queue(); +cl::Buffer allocate_buffer(size_t size); +cl::Image2D allocate_image(size_t size); + +#ifdef WIN32 +ID3D11Device* get_d3d_device(); +#elif defined(ENABLE_LIBVA) +VADisplay get_va_display(); +#endif + +int main() { + ov_core_t* core = NULL; + ov_model_t* model = NULL; + ov_compiled_model_t* compiled_model = NULL; + ov_remote_context_t* gpu_context = NULL; + ov_tensor_t* remote_tensor = NULL; + + ov_output_const_port* input_port = NULL; + char* in_tensor_name = NULL; + char* out_tensor_name = NULL; + ov_shape_t input_shape; + ov_element_type_e input_type; + + ov_core_create(&core); + ov_core_read_model(core, "model.xml", "model.bin", &model); + + ov_model_const_input(model, &input_port); + ov_port_get_any_name(input_port, &in_tensor_name); + ov_const_port_get_shape(input_port, &input_shape); + ov_port_get_element_type(input_port, &input_type); + size_t input_size = 1; + for (auto i = 0; i < input_shape.rank; i++) + input_size *= input_shape.dims[i]; + + ov_core_compile_model(core, model, "GPU", 0, &compiled_model); + ov_compiled_model_get_context(compiled_model, &gpu_context); + +{ + //! [wrap_usm_pointer] + void* shared_buffer = allocate_usm_buffer(input_size); + ov_remote_context_create_tensor(gpu_context, + input_type, + input_shape, + 4, + &remote_tensor, + ov_property_key_intel_gpu_shared_mem_type, + "USM_USER_BUFFER", + ov_property_key_intel_gpu_mem_handle, + shared_buffer); + //! [wrap_usm_pointer] +} + +{ + //! [wrap_cl_mem] + cl_mem shared_buffer = allocate_cl_mem(input_size); + ov_remote_context_create_tensor(gpu_context, + input_type, + input_shape, + 4, + &remote_tensor, + ov_property_key_intel_gpu_shared_mem_type, + "OCL_BUFFER", + ov_property_key_intel_gpu_mem_handle, + shared_buffer); + //! [wrap_cl_mem] +} + +{ + //! [wrap_cl_buffer] + cl::Buffer shared_buffer = allocate_buffer(input_size); + ov_remote_context_create_tensor(gpu_context, + input_type, + input_shape, + 4, + &remote_tensor, + ov_property_key_intel_gpu_shared_mem_type, + "OCL_BUFFER", + ov_property_key_intel_gpu_mem_handle, + shared_buffer.get()); + //! [wrap_cl_buffer] +} + +{ + //! [wrap_cl_image] + cl::Image2D shared_buffer = allocate_image(input_size); + ov_remote_context_create_tensor(gpu_context, + input_type, + input_shape, + 4, + &remote_tensor, + ov_property_key_intel_gpu_shared_mem_type, + "OCL_IMAGE2D", + ov_property_key_intel_gpu_mem_handle, + shared_buffer.get()); + //! [wrap_cl_image] +} + +{ + //! [allocate_usm_device] + ov_remote_context_create_tensor(gpu_context, + input_type, + input_shape, + 2, + &remote_tensor, + ov_property_key_intel_gpu_shared_mem_type, + "USM_USER_BUFFER"); + // Extract raw usm pointer from remote tensor + void* usm_ptr = NULL; + ov_tensor_data(remote_tensor, &usm_ptr); + //! [allocate_usm_device] +} + +{ + //! [allocate_usm_host] + ov_remote_context_create_tensor(gpu_context, + input_type, + input_shape, + 2, + &remote_tensor, + ov_property_key_intel_gpu_shared_mem_type, + "USM_HOST_BUFFER"); + // Extract raw usm pointer from remote tensor + void* usm_ptr = NULL; + ov_tensor_data(remote_tensor, &usm_ptr); + //! [allocate_usm_host] +} + +{ + int64_t width = 1024; + int64_t height = 768; + + int64_t y_plane_size = width * height; + int64_t uv_plane_size = width * height / 2; + + ov_shape_t shape_y = {0, NULL}; + int64_t dims_y[4] = {1, 1, height, width}; + ov_shape_t shape_uv = {0, NULL}; + int64_t dims_uv[4] = {1, 2, height / 2, width / 2}; + ov_tensor_t* remote_tensor_y = NULL; + ov_tensor_t* remote_tensor_uv = NULL; + + ov_shape_create(4, dims_y, &shape_y); + ov_shape_create(4, dims_uv, &shape_uv); + + //! [create_nv12_surface] + cl::Image2D y_plane_surface = allocate_image(y_plane_size); + cl::Image2D uv_plane_surface = allocate_image(uv_plane_size); + + ov_remote_context_create_tensor(gpu_context, + input_type, + shape_y, + 4, + &remote_tensor_y, + ov_property_key_intel_gpu_shared_mem_type, + "OCL_IMAGE2D", + ov_property_key_intel_gpu_mem_handle, + y_plane_surface.get()); + + ov_remote_context_create_tensor(gpu_context, + input_type, + shape_uv, + 4, + &remote_tensor_uv, + ov_property_key_intel_gpu_shared_mem_type, + "OCL_IMAGE2D", + ov_property_key_intel_gpu_mem_handle, + uv_plane_surface.get()); + + ov_tensor_free(remote_tensor_y); + ov_tensor_free(remote_tensor_uv); + ov_shape_free(&shape_y); + ov_shape_free(&shape_uv); + //! [create_nv12_surface] +} + +{ + //! [context_from_cl_context] + cl_context cl_context = get_cl_context(); + ov_core_create_context(core, + "GPU", + 4, + &gpu_context, + ov_property_key_intel_gpu_context_type, + "OCL", + ov_property_key_intel_gpu_ocl_context, + cl_context); + //! [context_from_cl_context] +} + +{ + //! [context_from_cl_queue] + cl_command_queue cl_queue = get_cl_queue(); + cl_context cl_context = get_cl_context(); + ov_core_create_context(core, + "GPU", + 6, + &gpu_context, + ov_property_key_intel_gpu_context_type, + "OCL", + ov_property_key_intel_gpu_ocl_context, + cl_context, + ov_property_key_intel_gpu_ocl_queue, + cl_queue); + //! [context_from_cl_queue] +} + +#ifdef WIN32 +{ + //! [context_from_d3d_device] + ID3D11Device* device = get_d3d_device(); + ov_core_create_context(core, + "GPU", + 4, + &gpu_context, + ov_property_key_intel_gpu_context_type, + "VA_SHARED", + ov_property_key_intel_gpu_va_device, + device); + //! [context_from_d3d_device] +} +#elif defined(ENABLE_LIBVA) +{ + //! [context_from_va_display] + VADisplay display = get_va_display(); + ov_core_create_context(core, + "GPU", + 4, + &gpu_context, + ov_property_key_intel_gpu_context_type, + "VA_SHARED", + ov_property_key_intel_gpu_va_device, + display); + //! [context_from_va_display] +} +#endif +{ + //! [default_context_from_core] + ov_core_get_default_context(core, "GPU", &gpu_context); + // Extract ocl context handle from RemoteContext + size_t size = 0; + char* params = nullptr; + // params is format like: "CONTEXT_TYPE OCL OCL_CONTEXT 0x5583b2ec7b40 OCL_QUEUE 0x5583b2e98ff0" + // You need parse it. + ov_remote_context_get_params(gpu_context, &size, ¶ms); + //! [default_context_from_core] +} + +{ + //! [default_context_from_model] + ov_compiled_model_get_context(compiled_model, &gpu_context); + // Extract ocl context handle from RemoteContext + size_t size = 0; + char* params = nullptr; + // params is format like: "CONTEXT_TYPE OCL OCL_CONTEXT 0x5583b2ec7b40 OCL_QUEUE 0x5583b2e98ff0" + // You need parse it. + ov_remote_context_get_params(gpu_context, &size, ¶ms); + //! [default_context_from_model] +} + +ov_compiled_model_free(compiled_model); +ov_model_free(model); +ov_remote_context_free(gpu_context); +ov_core_free(core); + +return 0; +} diff --git a/docs/snippets/ov_model_with_state_infer.cpp b/docs/snippets/ov_model_with_state_infer.cpp new file mode 100644 index 00000000000000..72182546367556 --- /dev/null +++ b/docs/snippets/ov_model_with_state_infer.cpp @@ -0,0 +1,118 @@ +// Copyright (C) 2018-2020 Intel Corporation +// SPDX-License-Identifier: Apache-2.0 +// + +#include + +#include "openvino/op/util/variable.hpp" +#include "openvino/openvino.hpp" +#include "openvino/opsets/opset11.hpp" + +int main(int argc, char* argv[]) { + try { + // --------------------------- 1. Load inference engine ------------------------------------- + std::cout << "Loading OpenVINO" << std::endl; + ov::Core core; + + //! [model_create] + auto arg = std::make_shared(ov::element::f32, ov::Shape{1, 1}); + auto init_const = ov::opset11::Constant::create(ov::element::f32, ov::Shape{1, 1}, {0}); + + // The ReadValue/Assign operations must be used in pairs in the model. + // For each such a pair, its own variable object must be created. + const std::string variable_name("variable0"); + auto variable = std::make_shared( + ov::op::util::VariableInfo{ov::PartialShape::dynamic(), ov::element::dynamic, variable_name}); + + // Creating ov::Model + auto read = std::make_shared(init_const, variable); + std::vector> args = {arg, read}; + auto add = std::make_shared(arg, read); + auto assign = std::make_shared(add, variable); + auto add2 = std::make_shared(add, read); + auto res = std::make_shared(add2); + + auto model = + std::make_shared(ov::ResultVector({res}), ov::SinkVector({assign}), ov::ParameterVector({arg})); + //! [model_create] + + // 2. Read a model in OpenVINO Intermediate Representation (.xml and .bin files) or ONNX (.onnx file) format + std::cout << "Loading network files" << std::endl; + + // 3. Load network to CPU + ov::CompiledModel compiled_model = core.compile_model(model, "CPU"); + // 4. Create Infer Request + ov::InferRequest infer_request = compiled_model.create_infer_request(); + + // 5. Prepare inputs + + std::vector input_tensors; + for (const auto& input : compiled_model.inputs()) { + input_tensors.emplace_back(infer_request.get_tensor(input)); + } + + // 6. Prepare outputs + std::vector output_tensors; + for (const auto& output : compiled_model.outputs()) { + output_tensors.emplace_back(infer_request.get_tensor(output)); + } + + // 7. Initialize memory state before starting + for (auto&& state : infer_request.query_state()) { + state.reset(); + } + + //! [part1] + // input data + std::vector data = {1, 2, 3, 4, 5, 6}; + // infer the first utterance + for (size_t next_input = 0; next_input < data.size() / 2; next_input++) { + auto minput = input_tensors[0]; + + std::memcpy(minput.data(), &data[next_input], sizeof(float)); + + infer_request.infer(); + // check states + auto states = infer_request.query_state(); + if (states.empty()) { + throw std::runtime_error("Queried states are empty"); + } + auto mstate = states[0].get_state(); + if (!mstate) { + throw std::runtime_error("Can't cast state to MemoryBlob"); + } + float* state = mstate.data(); + std::cout << state[0] << "\n"; + } + + // resetting state between utterances + std::cout << "Reset state\n"; + for (auto&& state : infer_request.query_state()) { + state.reset(); + } + + // infer the second utterance + for (size_t next_input = data.size() / 2; next_input < data.size(); next_input++) { + auto minput = input_tensors[0]; + + std::memcpy(minput.data(), &data[next_input], sizeof(float)); + + infer_request.infer(); + // check states + auto states = infer_request.query_state(); + auto mstate = states[0].get_state(); + float* state = mstate.data(); + std::cout << state[0] << "\n"; + } + //! [part1] + } catch (const std::exception& error) { + std::cerr << error.what() << std::endl; + return 1; + } catch (...) { + std::cerr << "Unknown/internal exception happened" << std::endl; + return 1; + } + + std::cerr << "Execution successful" << std::endl; + return 0; +} diff --git a/docs/tutorials.md b/docs/tutorials.md index 94d972729b6cd8..b7f69cb75e48d7 100644 --- a/docs/tutorials.md +++ b/docs/tutorials.md @@ -227,11 +227,8 @@ Demos that demonstrate inference on a particular model. +-------------------------------------------------------------------------------------------------------------------------------+--------------------------------------------------------------------------------------------------------------------------------------------+-------------------------------------------+ | `234-encodec-audio-compression `__ | Audio compression with EnCodec and OpenVINO™ | |n234-img1| | +-------------------------------------------------------------------------------------------------------------------------------+--------------------------------------------------------------------------------------------------------------------------------------------+-------------------------------------------+ - - - - - + | `235-controlnet-stable-diffusion `__ | A Text-to-Image Generation with ControlNet Conditioning and OpenVINO™ | |n235-img1| | + +-------------------------------------------------------------------------------------------------------------------------------+--------------------------------------------------------------------------------------------------------------------------------------------+-------------------------------------------+ .. raw:: html @@ -445,6 +442,8 @@ Made with `contributors-img `__. :target: https://user-images.githubusercontent.com/29454499/221933762-4ff32ecb-5e5d-4484-80e1-e9396cb3c511.png .. |n234-img1| image:: https://github.com/facebookresearch/encodec/raw/main/thumbnail.png :target: https://github.com/facebookresearch/encodec/raw/main/thumbnail.png +.. |n235-img1| image:: https://user-images.githubusercontent.com/29454499/224541412-9d13443e-0e42-43f2-8210-aa31820c5b44.png + :target: https://user-images.githubusercontent.com/29454499/224541412-9d13443e-0e42-43f2-8210-aa31820c5b44.png .. |n301-img1| image:: https://user-images.githubusercontent.com/15709723/127779607-8fa34947-1c35-4260-8d04-981c41a2a2cc.png :target: https://user-images.githubusercontent.com/15709723/127779607-8fa34947-1c35-4260-8d04-981c41a2a2cc.png .. |n401-img1| image:: https://user-images.githubusercontent.com/4547501/141471665-82b28c86-cf64-4bfe-98b3-c314658f2d96.gif diff --git a/samples/c/hello_classification/main.c b/samples/c/hello_classification/main.c index b3748356ad2fd0..4fbf0c5f7a99b6 100644 --- a/samples/c/hello_classification/main.c +++ b/samples/c/hello_classification/main.c @@ -72,6 +72,7 @@ struct infer_result* tensor_to_infer_result(ov_tensor_t* tensor, size_t* result_ results[i].probability = float_data[i]; } + ov_shape_free(&output_shape); return results; } diff --git a/samples/cpp/benchmark_app/inputs_filling.cpp b/samples/cpp/benchmark_app/inputs_filling.cpp index 1031d2cb434b66..c0dcc755c9b0f9 100644 --- a/samples/cpp/benchmark_app/inputs_filling.cpp +++ b/samples/cpp/benchmark_app/inputs_filling.cpp @@ -563,7 +563,7 @@ ov::Tensor get_random_tensor(const std::pair(inputInfo.second); } else if (type == ov::element::f16) { - return create_tensor_random(inputInfo.second); + return create_tensor_random(inputInfo.second); } else if (type == ov::element::i32) { return create_tensor_random(inputInfo.second); } else if (type == ov::element::i64) { diff --git a/samples/cpp/benchmark_app/main.cpp b/samples/cpp/benchmark_app/main.cpp index bfa2b208d4e2bb..9706012a827b44 100644 --- a/samples/cpp/benchmark_app/main.cpp +++ b/samples/cpp/benchmark_app/main.cpp @@ -356,11 +356,10 @@ int main(int argc, char* argv[]) { bool perf_counts = false; // check if using the virtual device - auto if_auto = std::find(devices.begin(), devices.end(), "AUTO") != devices.end(); - auto if_multi = std::find(devices.begin(), devices.end(), "MULTI") != devices.end(); + auto is_virtual = is_virtual_device_found(devices); auto hardware_devices = devices; - // Remove the hardware devices if AUTO/MULTI appears in the devices list. - if (if_auto || if_multi) { + // Remove the hardware devices if AUTO/MULTI/HETERO appears in the devices list. + if (is_virtual) { devices.clear(); // Parse out the currect virtual device as the target device. std::string virtual_device = split(device_name, ':').at(0); @@ -376,8 +375,11 @@ int main(int argc, char* argv[]) { auto& device_config = config[device]; // high-level performance modes - auto ov_perf_hint = get_performance_hint(device, core); - device_config.emplace(ov::hint::performance_mode(ov_perf_hint)); + if (!device_config.count(ov::hint::performance_mode.name())) { + device_config.emplace(ov::hint::performance_mode(get_performance_hint(device, core))); + } + auto ov_perf_hint = device_config.at(ov::hint::performance_mode.name()).as(); + if (FLAGS_nireq != 0) device_config.emplace(ov::hint::num_requests(unsigned(FLAGS_nireq))); @@ -415,7 +417,7 @@ int main(int argc, char* argv[]) { std::end(supported_properties); }; // the rest are individual per-device settings (overriding the values set with perf modes) - auto setThroughputStreams = [&]() { + auto set_throughput_streams = [&]() { std::string key = getDeviceTypeFromName(device) + "_THROUGHPUT_STREAMS"; auto it_device_nstreams = device_nstreams.find(device); if (it_device_nstreams != device_nstreams.end()) { @@ -426,34 +428,13 @@ int main(int argc, char* argv[]) { // Use API 2.0 key for streams key = ov::num_streams.name(); device_config[key] = it_device_nstreams->second; - } else if (device == "MULTI" || device == "AUTO") { - // check if the element contains the hardware device property - auto value_vec = split(it_device_nstreams->second, ' '); - if (value_vec.size() == 1) { - key = ov::num_streams.name(); - device_config[key] = it_device_nstreams->second; - } else { - // set device nstreams properties in the AUTO/MULTI plugin - std::stringstream strm(it_device_nstreams->second); - std::map devices_property; - ov::util::Read>{}(strm, devices_property); - for (const auto& it : devices_property) { - if (device_config.find(it.first) == device_config.end() || - (is_load_config && is_dev_set_property[it.first])) { - // Create ov::device::properties with ov::num_stream and - // 1. Insert this ov::device::properties into device config if this - // ov::device::properties isn't existed. Otherwise, - // 2. Replace the existed ov::device::properties within device config. - is_dev_set_property[it.first] = false; - device_config.erase(it.first); - device_config.insert( - ov::device::properties(it.first, ov::num_streams(std::stoi(it.second)))); - } else { - auto& property = device_config[it.first].as(); - property.emplace(ov::num_streams(std::stoi(it.second))); - } - } - } + } else if (is_virtual_device(device)) { + key = ov::num_streams.name(); + update_device_config_for_virtual_device(it_device_nstreams->second, + device_config, + ov::num_streams, + is_dev_set_property, + is_load_config); } else { throw std::logic_error("Device " + device + " doesn't support config key '" + key + "' " + "and '" + ov::num_streams.name() + "'!" + @@ -477,7 +458,7 @@ int main(int argc, char* argv[]) { // Use API 2.0 key for streams key = ov::num_streams.name(); device_config[key] = ov::streams::AUTO; - } else if (device == "MULTI" || device == "AUTO") { + } else if (is_virtual_device(device)) { // Set nstreams to default value auto if no nstreams specified from cmd line. for (auto& hwdevice : hardware_devices) { std::string key = std::string(getDeviceTypeFromName(hwdevice) + "_THROUGHPUT_STREAMS"); @@ -502,34 +483,12 @@ int main(int argc, char* argv[]) { // set to user defined value if (supported(ov::inference_precision.name())) { device_config.emplace(ov::inference_precision(it_device_infer_precision->second)); - } else if (device == "MULTI" || device == "AUTO") { - // check if the element contains the hardware device property - auto value_vec = split(it_device_infer_precision->second, ' '); - if (value_vec.size() == 1) { - auto key = ov::inference_precision.name(); - device_config[key] = it_device_infer_precision->second; - } else { - // set device inference_precison properties in the AUTO/MULTI plugin - std::stringstream strm(it_device_infer_precision->second); - std::map devices_property; - ov::util::Read>{}(strm, devices_property); - for (const auto& it : devices_property) { - if (device_config.find(it.first) == device_config.end() || - (is_load_config && is_dev_set_property[it.first])) { - // Create ov::device::properties with ov::inference_precision and - // 1. Insert this ov::device::properties into device config if this - // ov::device::properties isn't existed. Otherwise, - // 2. Replace the existed ov::device::properties within device config. - is_dev_set_property[it.first] = false; - device_config.erase(it.first); - device_config.insert( - ov::device::properties(it.first, ov::inference_precision(it.second))); - } else { - auto& property = device_config[it.first].as(); - property.emplace(ov::inference_precision(it.second)); - } - } - } + } else if (is_virtual_device(device)) { + update_device_config_for_virtual_device(it_device_infer_precision->second, + device_config, + ov::inference_precision, + is_dev_set_property, + is_load_config); } else { throw std::logic_error("Device " + device + " doesn't support config key '" + ov::inference_precision.name() + "'! " + @@ -556,7 +515,7 @@ int main(int argc, char* argv[]) { if (supported(property_name) || device_name == "AUTO") { // create nthreads/pin primary property for HW device or AUTO if -d is AUTO directly. device_config.emplace(property); - } else if (if_auto || if_multi) { + } else if (is_virtual) { // Create secondary property of -nthreads/-pin only for CPU if CPU device appears in the devices // list specified by -d. for (auto& device : hardware_devices) { @@ -571,38 +530,10 @@ int main(int argc, char* argv[]) { if (isFlagSetInCommandLine("pin")) set_nthreads_pin("pin"); - if (device.find("CPU") != std::string::npos || device.find("GPU") != std::string::npos) { - // CPU supports few special performance-oriented keys - // for CPU and GPU execution, more throughput-oriented execution via streams - setThroughputStreams(); - set_infer_precision(); - } else if (device.find("GNA") != std::string::npos) { - set_infer_precision(); - } else if (device.find("AUTO") != std::string::npos) { - setThroughputStreams(); - set_infer_precision(); - device_nstreams.erase(device); - } else if (device.find("MULTI") != std::string::npos) { - setThroughputStreams(); - set_infer_precision(); - if ((device_name.find("GPU") != std::string::npos) && (device_name.find("CPU") != std::string::npos)) { - slog::warn << "GPU throttling is turned on. Multi-device execution with " - "the CPU + GPU performs best with GPU throttling hint, " - << "which releases another CPU thread (that is otherwise " - "used by the GPU driver for active polling)." - << slog::endl; + set_throughput_streams(); + set_infer_precision(); - device_config.insert(ov::device::properties("GPU", {{GPU_CONFIG_KEY(PLUGIN_THROTTLE), 1}})); - // limit threading for CPU portion of inference - if (!isFlagSetInCommandLine("pin")) { - auto it_affinity = device_config.find(ov::affinity.name()); - if (it_affinity != device_config.end()) { - slog::warn << "Turn off threads pinning for " << device - << " device since multi-scenario with GPU device is used." << slog::endl; - it_affinity->second = ov::Affinity::NONE; - } - } - } + if (is_virtual_device(device)) { device_nstreams.erase(device); } } @@ -905,7 +836,21 @@ int main(int argc, char* argv[]) { if (cfg == ov::supported_properties) continue; auto prop = compiledModel.get_property(cfg); - slog::info << " " << cfg << ": " << prop.as() << slog::endl; + if (cfg == ov::device::properties) { + auto devices_properties = prop.as(); + for (auto& item : devices_properties) { + slog::info << " " << item.first << ": " << slog::endl; + for (auto& item2 : item.second.as()) { + if (item2.first == ov::supported_properties || + item2.first == METRIC_KEY(SUPPORTED_CONFIG_KEYS) || + item2.first == METRIC_KEY(SUPPORTED_METRICS)) + continue; + slog::info << " " << item2.first << ": " << item2.second.as() << slog::endl; + } + } + } else { + slog::info << " " << cfg << ": " << prop.as() << slog::endl; + } } // Update number of streams diff --git a/samples/cpp/benchmark_app/remote_tensors_filling.cpp b/samples/cpp/benchmark_app/remote_tensors_filling.cpp index fa139d7485f141..9301a8d113210f 100644 --- a/samples/cpp/benchmark_app/remote_tensors_filling.cpp +++ b/samples/cpp/benchmark_app/remote_tensors_filling.cpp @@ -40,8 +40,10 @@ void fill_buffer_random(void* inputBuffer, void fill_buffer(void* inputBuffer, size_t elementsNum, const ov::element::Type& type) { if (type == ov::element::f32) { fill_buffer_random(inputBuffer, elementsNum); + } else if (type == ov::element::f64) { + fill_buffer_random(inputBuffer, elementsNum); } else if (type == ov::element::f16) { - fill_buffer_random(inputBuffer, elementsNum); + fill_buffer_random(inputBuffer, elementsNum); } else if (type == ov::element::i32) { fill_buffer_random(inputBuffer, elementsNum); } else if (type == ov::element::i64) { diff --git a/samples/cpp/benchmark_app/utils.cpp b/samples/cpp/benchmark_app/utils.cpp index 6728b8438b7ad4..8c53f3d192430a 100644 --- a/samples/cpp/benchmark_app/utils.cpp +++ b/samples/cpp/benchmark_app/utils.cpp @@ -107,13 +107,27 @@ std::vector split_float(const std::string& s, char delim) { return result; } +static const std::vector meta_plugins{"MULTI", "HETERO", "AUTO"}; +bool is_virtual_device(const std::string& device_name) { + return std::find(meta_plugins.begin(), meta_plugins.end(), device_name) != meta_plugins.end(); +} + +bool is_virtual_device_found(const std::vector& device_names) { + for (const auto& device_name : device_names) { + if (is_virtual_device(device_name)) { + return true; + } + } + return false; +} + std::vector parse_devices(const std::string& device_string) { std::string comma_separated_devices = device_string; auto colon = comma_separated_devices.find(":"); std::vector result; if (colon != std::string::npos) { auto target_device = comma_separated_devices.substr(0, colon); - if (target_device == "AUTO" || target_device == "MULTI") { + if (is_virtual_device(target_device)) { result.push_back(target_device); } auto bracket = comma_separated_devices.find("("); // e.g. in BATCH:GPU(4) @@ -137,8 +151,8 @@ void parse_value_for_virtual_device(const std::string& device, std::mapsecond; values_string.clear(); @@ -146,23 +160,92 @@ void parse_value_for_virtual_device(const std::string& device, std::mapfirst == device) { iter++; continue; } - values_string[device] += iter->first + " " + iter->second + " "; + if (ss.str().empty()) + ss << '{'; + else + ss << ','; + ss << iter->first << ":" << iter->second; iter = values_string.erase(iter); } - if (values_string.find(device) != values_string.end()) { - auto& nstreams = values_string[device]; - // Remove the space at the tail. - nstreams.pop_back(); + if (!ss.str().empty()) { + ss << '}'; + values_string[device] = ss.str(); } return; } +template +void update_device_config_for_virtual_device(const std::string& value, + ov::AnyMap& device_config, + ov::Property property, + std::map& is_dev_set_property, + bool is_load_config) { + // check if the element contains the hardware device property + if (split(value, ':').size() == 1) { + device_config[property.name()] = value; + } else { + // set device nstreams properties in the AUTO/MULTI/HETERO plugin + std::stringstream strm(value); + std::map devices_property; + ov::util::Read>{}(strm, devices_property); + for (const auto& it : devices_property) { + const auto& device_name = it.first; + const auto& device_value = it.second; + if (device_config.find(ov::device::properties.name()) == device_config.end() || + (is_load_config && is_dev_set_property[device_name])) { + // Create ov::device::properties with ov::num_stream/ov::inference_precision and + // 1. Insert this ov::device::properties into device config if this + // ov::device::properties isn't existed. Otherwise, + // 2. Replace the existed ov::device::properties within device config. + is_dev_set_property[device_name] = false; + device_config.erase(device_name); + device_config[ov::device::properties.name()] = ov::AnyMap{}; + auto& secondary_property = device_config.at(ov::device::properties.name()).as(); + secondary_property[device_name] = ov::AnyMap{{property.name(), device_value}}; + } else { + auto& secondary_property = device_config.at(ov::device::properties.name()).as(); + if (secondary_property.count(device_name)) { + auto& device_property = secondary_property.at(device_name).as(); + device_property.emplace(property(device_value)); + } else { + secondary_property[device_name] = ov::AnyMap{{property.name(), device_value}}; + } + } + } + } +} + +void update_device_config_for_virtual_device(const std::string& value, + ov::AnyMap& device_config, + ov::Property property, + std::map& is_dev_set_property, + bool is_load_config) { + return update_device_config_for_virtual_device(value, + device_config, + property, + is_dev_set_property, + is_load_config); +} + +void update_device_config_for_virtual_device(const std::string& value, + ov::AnyMap& device_config, + ov::Property property, + std::map& is_dev_set_property, + bool is_load_config) { + return update_device_config_for_virtual_device(value, + device_config, + property, + is_dev_set_property, + is_load_config); +} + std::map parse_value_per_device(const std::vector& devices, const std::string& values_string) { // Format: :,: or just @@ -691,27 +774,12 @@ void dump_config(const std::string& filename, const std::map device_properties; for (const auto& option : item.second) { - if (option.second.is()) { - // hw device properties - device_properties[option.first] = option.second.as(); - } else { - // primary property - std::stringstream strm; - option.second.print(strm); - auto property_string = strm.str(); - jsonConfig[deviceName][option.first] = property_string; - } - if (!device_properties.empty()) { - for (auto& item : device_properties) { - auto hw_device_name = item.first; - for (auto& property : item.second) { - jsonConfig[deviceName]["DEVICE_PROPERTIES"][hw_device_name][property.first] = - property.second.as(); - } - } - } + // primary property + std::stringstream strm; + option.second.print(strm); + auto property_string = strm.str(); + jsonConfig[deviceName][option.first] = property_string; } } @@ -740,23 +808,7 @@ void load_config(const std::string& filename, std::map& const std::string& deviceName = item.key(); const auto& itemValue = item.value(); for (auto option = itemValue.cbegin(), itemValueEnd = itemValue.cend(); option != itemValueEnd; ++option) { - if (option.key() != "DEVICE_PROPERTIES") { - config[deviceName][option.key()] = option.value().get(); - continue; - } - const auto& optionValue = option.value(); - for (auto hw_properties = optionValue.cbegin(), optionValueEnd = optionValue.cend(); - hw_properties != optionValueEnd; - ++hw_properties) { - const std::string& hw_device_name = hw_properties.key(); - std::map hw_device_properties; - const auto& hw_propertiesValue = hw_properties.value(); - for (auto property = hw_propertiesValue.cbegin(), hw_propertiesEnd = hw_propertiesValue.cend(); - property != hw_propertiesEnd; - ++property) - hw_device_properties[property.key()] = property.value().get(); - config[deviceName][hw_device_name] = hw_device_properties; - } + config[deviceName][option.key()] = option.value().get(); } } } diff --git a/samples/cpp/benchmark_app/utils.hpp b/samples/cpp/benchmark_app/utils.hpp index 9c89e2d4df5d03..406500d3c36858 100644 --- a/samples/cpp/benchmark_app/utils.hpp +++ b/samples/cpp/benchmark_app/utils.hpp @@ -58,11 +58,19 @@ using InputsInfo = std::map; using PartialShapes = std::map; } // namespace benchmark_app +bool is_virtual_device(const std::string& device_name); +bool is_virtual_device_found(const std::vector& device_names); std::vector parse_devices(const std::string& device_string); uint32_t device_default_device_duration_in_seconds(const std::string& device); std::map parse_value_per_device(const std::vector& devices, const std::string& values_string); void parse_value_for_virtual_device(const std::string& device, std::map& values_string); +template +void update_device_config_for_virtual_device(const std::string& value, + ov::AnyMap& device_config, + ov::Property property, + std::map& is_dev_set_property, + bool is_load_config = false); std::string get_shapes_string(const benchmark_app::PartialShapes& shapes); size_t get_batch_size(const benchmark_app::InputsInfo& inputs_info); std::vector split(const std::string& s, char delim); diff --git a/samples/cpp/speech_sample/main.cpp b/samples/cpp/speech_sample/main.cpp index e913d71411adcb..7553ee7bf221c7 100644 --- a/samples/cpp/speech_sample/main.cpp +++ b/samples/cpp/speech_sample/main.cpp @@ -255,7 +255,11 @@ int main(int argc, char* argv[]) { // ----------------------------------------------------------------------------------------------------- // --------------------------- Step 2. Loading model to the device ------------------------------------------ if (useGna) { - genericPluginConfig.insert(std::begin(gnaPluginConfig), std::end(gnaPluginConfig)); + if (useHetero) { + genericPluginConfig.insert(ov::device::properties("GNA", gnaPluginConfig)); + } else { + genericPluginConfig.insert(std::begin(gnaPluginConfig), std::end(gnaPluginConfig)); + } } auto t0 = Time::now(); ms loadTime = std::chrono::duration_cast(Time::now() - t0); diff --git a/scripts/setupvars/setupvars.sh b/scripts/setupvars/setupvars.sh index 41789160e69a59..3cdf4987d732b7 100755 --- a/scripts/setupvars/setupvars.sh +++ b/scripts/setupvars/setupvars.sh @@ -36,15 +36,15 @@ if [ -e "$INSTALLDIR/runtime" ]; then export OpenVINO_DIR=$INSTALLDIR/runtime/cmake system_type=$(ls "$INSTALLDIR/runtime/lib/") - IE_PLUGINS_PATH=$INSTALLDIR/runtime/lib/$system_type + OV_PLUGINS_PATH=$INSTALLDIR/runtime/lib/$system_type if [[ "$OSTYPE" == "darwin"* ]]; then - export DYLD_LIBRARY_PATH=${IE_PLUGINS_PATH}/Release:${IE_PLUGINS_PATH}/Debug${DYLD_LIBRARY_PATH:+:$DYLD_LIBRARY_PATH} - export LD_LIBRARY_PATH=${IE_PLUGINS_PATH}/Release:${IE_PLUGINS_PATH}/Debug${LD_LIBRARY_PATH:+:$LD_LIBRARY_PATH} - export PKG_CONFIG_PATH=${IE_PLUGINS_PATH}/Release/pkgconfig${PKG_CONFIG_PATH:+:$PKG_CONFIG_PATH} + export DYLD_LIBRARY_PATH=${OV_PLUGINS_PATH}/Release:${OV_PLUGINS_PATH}/Debug${DYLD_LIBRARY_PATH:+:$DYLD_LIBRARY_PATH} + export LD_LIBRARY_PATH=${OV_PLUGINS_PATH}/Release:${OV_PLUGINS_PATH}/Debug${LD_LIBRARY_PATH:+:$LD_LIBRARY_PATH} + export PKG_CONFIG_PATH=${OV_PLUGINS_PATH}/Release/pkgconfig${PKG_CONFIG_PATH:+:$PKG_CONFIG_PATH} else - export LD_LIBRARY_PATH=${IE_PLUGINS_PATH}${LD_LIBRARY_PATH:+:$LD_LIBRARY_PATH} - export PKG_CONFIG_PATH=$IE_PLUGINS_PATH/pkgconfig${PKG_CONFIG_PATH:+:$PKG_CONFIG_PATH} + export LD_LIBRARY_PATH=${OV_PLUGINS_PATH}${LD_LIBRARY_PATH:+:$LD_LIBRARY_PATH} + export PKG_CONFIG_PATH=$OV_PLUGINS_PATH/pkgconfig${PKG_CONFIG_PATH:+:$PKG_CONFIG_PATH} fi if [ -e "$INSTALLDIR/runtime/3rdparty/tbb" ]; then diff --git a/src/bindings/c/include/openvino/c/ov_property.h b/src/bindings/c/include/openvino/c/ov_property.h index b00f72aaedafc2..54c887435c5cd6 100644 --- a/src/bindings/c/include/openvino/c/ov_property.h +++ b/src/bindings/c/include/openvino/c/ov_property.h @@ -171,3 +171,17 @@ ov_property_key_enable_profiling; */ OPENVINO_C_VAR(const char*) ov_property_key_device_priorities; + +/** + * @brief Read-write property for high-level OpenVINO Execution hint + * unlike low-level properties that are individual (per-device), the hints are something that every device accepts + * and turns into device-specific settings + * Execution mode hint controls preferred optimization targets (performance or accuracy) for given model + * It can be set to be below value: + * "UNDEFINED" //!< Undefined value, settings may vary from device to device + * "PERFORMANCE", //!< Optimize for max performance + * "ACCURACY", //!< Optimize for max accuracy + * @ingroup ov_property_c_api + */ +OPENVINO_C_VAR(const char*) +ov_property_key_hint_execution_mode; diff --git a/src/bindings/c/src/ov_property.cpp b/src/bindings/c/src/ov_property.cpp index 613d52b376a228..2d6c470ae5df1d 100644 --- a/src/bindings/c/src/ov_property.cpp +++ b/src/bindings/c/src/ov_property.cpp @@ -29,3 +29,4 @@ const char* ov_property_key_hint_model_priority = "MODEL_PRIORITY"; const char* ov_property_key_log_level = "LOG_LEVEL"; const char* ov_property_key_enable_profiling = "PERF_COUNT"; const char* ov_property_key_device_priorities = "MULTI_DEVICE_PRIORITIES"; +const char* ov_property_key_hint_execution_mode = "EXECUTION_MODE_HINT"; diff --git a/src/bindings/c/tests/ov_core_test.cpp b/src/bindings/c/tests/ov_core_test.cpp index 6804504c94053f..0cb2f29f65e878 100644 --- a/src/bindings/c/tests/ov_core_test.cpp +++ b/src/bindings/c/tests/ov_core_test.cpp @@ -147,6 +147,27 @@ TEST_P(ov_core_test, ov_core_compile_model_with_property) { ov_core_free(core); } +TEST_P(ov_core_test, ov_core_compile_model_with_excution_mode) { + std::string device_name = "AUTO"; + ov_core_t* core = nullptr; + OV_EXPECT_OK(ov_core_create(&core)); + EXPECT_NE(nullptr, core); + + ov_model_t* model = nullptr; + OV_EXPECT_OK(ov_core_read_model(core, xml_file_name.c_str(), nullptr, &model)); + EXPECT_NE(nullptr, model); + + ov_compiled_model_t* compiled_model = nullptr; + const char* key = ov_property_key_hint_execution_mode; + const char* value = "PERFORMANCE"; + OV_EXPECT_OK(ov_core_compile_model(core, model, device_name.c_str(), 2, &compiled_model, key, value)); + EXPECT_NE(nullptr, compiled_model); + + ov_compiled_model_free(compiled_model); + ov_model_free(model); + ov_core_free(core); +} + TEST_P(ov_core_test, ov_core_compile_model_with_property_invalid) { auto device_name = GetParam(); ov_core_t* core = nullptr; @@ -306,6 +327,30 @@ TEST_P(ov_core_test, ov_core_get_property) { ov_core_free(core); } +TEST_P(ov_core_test, ov_core_set_and_get_property_execution_mode) { + std::string device_name = "AUTO"; + ov_core_t* core = nullptr; + OV_EXPECT_OK(ov_core_create(&core)); + EXPECT_NE(nullptr, core); + + const char* key = ov_property_key_hint_execution_mode; + char* property_value = nullptr; + OV_EXPECT_OK(ov_core_get_property(core, device_name.c_str(), key, &property_value)); + ov_free(property_value); + + const char* value1 = "ACCURACY"; + OV_EXPECT_OK(ov_core_set_property(core, device_name.c_str(), key, value1)); + OV_EXPECT_OK(ov_core_get_property(core, device_name.c_str(), key, &property_value)); + EXPECT_STREQ(value1, property_value); + + const char* value2 = "PERFORMANCE"; + OV_EXPECT_OK(ov_core_set_property(core, device_name.c_str(), key, value2)); + OV_EXPECT_OK(ov_core_get_property(core, device_name.c_str(), key, &property_value)); + EXPECT_STREQ(value2, property_value); + + ov_core_free(core); +} + TEST_P(ov_core_test, ov_core_set_get_property_str) { #ifdef __aarch64__ GTEST_SKIP() << "Skip this test for ARM CPU for now, cause no string property supported"; diff --git a/src/bindings/python/requirements.txt b/src/bindings/python/requirements.txt index e83f59eb8b3ae7..968d95b8760bed 100644 --- a/src/bindings/python/requirements.txt +++ b/src/bindings/python/requirements.txt @@ -1 +1,2 @@ numpy>=1.16.6 +singledispatchmethod; python_version<'3.8' diff --git a/src/bindings/python/requirements_test.txt b/src/bindings/python/requirements_test.txt index 530a28b3bf1e16..2bd82fb628bc26 100644 --- a/src/bindings/python/requirements_test.txt +++ b/src/bindings/python/requirements_test.txt @@ -40,3 +40,4 @@ types-pkg_resources wheel>=0.38.1 protobuf~=3.18.1 numpy>=1.16.6,<=1.23.4 +singledispatchmethod; python_version<'3.8' diff --git a/src/bindings/python/setup.py b/src/bindings/python/setup.py index 021e37875e32da..6205f70aeb4e04 100644 --- a/src/bindings/python/setup.py +++ b/src/bindings/python/setup.py @@ -55,6 +55,7 @@ "openvino.runtime.opset8", "openvino.runtime.opset9", "openvino.runtime.opset10", + "openvino.runtime.opset11", "openvino.runtime.utils", "openvino.runtime.op", "openvino.runtime.op.util", diff --git a/src/bindings/python/src/compatibility/ngraph/__init__.py b/src/bindings/python/src/compatibility/ngraph/__init__.py index 3109d9e3d1d525..d80f2199dfc5d0 100644 --- a/src/bindings/python/src/compatibility/ngraph/__init__.py +++ b/src/bindings/python/src/compatibility/ngraph/__init__.py @@ -18,180 +18,180 @@ from ngraph.impl import PartialShape from ngraph.helpers import function_from_cnn from ngraph.helpers import function_to_cnn -from ngraph.opset10 import absolute -from ngraph.opset10 import absolute as abs -from ngraph.opset10 import acos -from ngraph.opset10 import acosh -from ngraph.opset10 import adaptive_avg_pool -from ngraph.opset10 import adaptive_max_pool -from ngraph.opset10 import add -from ngraph.opset10 import asin -from ngraph.opset10 import asinh -from ngraph.opset10 import assign -from ngraph.opset10 import atan -from ngraph.opset10 import atanh -from ngraph.opset10 import avg_pool -from ngraph.opset10 import batch_norm_inference -from ngraph.opset10 import batch_to_space -from ngraph.opset10 import binary_convolution -from ngraph.opset10 import broadcast -from ngraph.opset10 import bucketize -from ngraph.opset10 import ceiling -from ngraph.opset10 import ceiling as ceil -from ngraph.opset10 import clamp -from ngraph.opset10 import concat -from ngraph.opset10 import constant -from ngraph.opset10 import convert -from ngraph.opset10 import convert_like -from ngraph.opset10 import convolution -from ngraph.opset10 import convolution_backprop_data -from ngraph.opset10 import cos -from ngraph.opset10 import cosh -from ngraph.opset10 import ctc_greedy_decoder -from ngraph.opset10 import ctc_greedy_decoder_seq_len -from ngraph.opset10 import ctc_loss -from ngraph.opset10 import cum_sum -from ngraph.opset10 import cum_sum as cumsum -from ngraph.opset10 import deformable_convolution -from ngraph.opset10 import deformable_psroi_pooling -from ngraph.opset10 import depth_to_space -from ngraph.opset10 import detection_output -from ngraph.opset10 import dft -from ngraph.opset10 import divide -from ngraph.opset10 import einsum -from ngraph.opset10 import elu -from ngraph.opset10 import embedding_bag_offsets_sum -from ngraph.opset10 import embedding_bag_packed_sum -from ngraph.opset10 import embedding_segments_sum -from ngraph.opset10 import extract_image_patches -from ngraph.opset10 import equal -from ngraph.opset10 import erf -from ngraph.opset10 import exp -from ngraph.opset10 import eye -from ngraph.opset10 import fake_quantize -from ngraph.opset10 import floor -from ngraph.opset10 import floor_mod -from ngraph.opset10 import gather -from ngraph.opset10 import gather_elements -from ngraph.opset10 import gather_nd -from ngraph.opset10 import gather_tree -from ngraph.opset10 import gelu -from ngraph.opset10 import generate_proposals -from ngraph.opset10 import greater -from ngraph.opset10 import greater_equal -from ngraph.opset10 import grid_sample -from ngraph.opset10 import grn -from ngraph.opset10 import group_convolution -from ngraph.opset10 import group_convolution_backprop_data -from ngraph.opset10 import gru_cell -from ngraph.opset10 import gru_sequence -from ngraph.opset10 import hard_sigmoid -from ngraph.opset10 import hsigmoid -from ngraph.opset10 import hswish -from ngraph.opset10 import idft -from ngraph.opset10 import if_op -from ngraph.opset10 import interpolate -from ngraph.opset10 import irdft -from ngraph.opset10 import is_finite -from ngraph.opset10 import is_inf -from ngraph.opset10 import is_nan -from ngraph.opset10 import i420_to_bgr -from ngraph.opset10 import i420_to_rgb -from ngraph.opset10 import less -from ngraph.opset10 import less_equal -from ngraph.opset10 import log -from ngraph.opset10 import logical_and -from ngraph.opset10 import logical_not -from ngraph.opset10 import logical_or -from ngraph.opset10 import logical_xor -from ngraph.opset10 import log_softmax -from ngraph.opset10 import loop -from ngraph.opset10 import lrn -from ngraph.opset10 import lstm_cell -from ngraph.opset10 import lstm_sequence -from ngraph.opset10 import matmul -from ngraph.opset10 import matrix_nms -from ngraph.opset10 import max_pool -from ngraph.opset10 import maximum -from ngraph.opset10 import minimum -from ngraph.opset10 import mish -from ngraph.opset10 import mod -from ngraph.opset10 import multiclass_nms -from ngraph.opset10 import multiply -from ngraph.opset10 import mvn -from ngraph.opset10 import negative -from ngraph.opset10 import non_max_suppression -from ngraph.opset10 import non_zero -from ngraph.opset10 import normalize_l2 -from ngraph.opset10 import not_equal -from ngraph.opset10 import nv12_to_bgr -from ngraph.opset10 import nv12_to_rgb -from ngraph.opset10 import one_hot -from ngraph.opset10 import pad -from ngraph.opset10 import parameter -from ngraph.opset10 import power -from ngraph.opset10 import prelu -from ngraph.opset10 import prior_box -from ngraph.opset10 import prior_box_clustered -from ngraph.opset10 import psroi_pooling -from ngraph.opset10 import proposal -from ngraph.opset10 import random_uniform -from ngraph.opset10 import range -from ngraph.opset10 import rdft -from ngraph.opset10 import read_value -from ngraph.opset10 import reduce_l1 -from ngraph.opset10 import reduce_l2 -from ngraph.opset10 import reduce_logical_and -from ngraph.opset10 import reduce_logical_or -from ngraph.opset10 import reduce_max -from ngraph.opset10 import reduce_mean -from ngraph.opset10 import reduce_min -from ngraph.opset10 import reduce_prod -from ngraph.opset10 import reduce_sum -from ngraph.opset10 import region_yolo -from ngraph.opset10 import reorg_yolo -from ngraph.opset10 import relu -from ngraph.opset10 import reshape -from ngraph.opset10 import result -from ngraph.opset10 import reverse_sequence -from ngraph.opset10 import rnn_cell -from ngraph.opset10 import rnn_sequence -from ngraph.opset10 import roi_align -from ngraph.opset10 import roi_pooling -from ngraph.opset10 import roll -from ngraph.opset10 import round -from ngraph.opset10 import scatter_elements_update -from ngraph.opset10 import scatter_update -from ngraph.opset10 import select -from ngraph.opset10 import selu -from ngraph.opset10 import shape_of -from ngraph.opset10 import shuffle_channels -from ngraph.opset10 import sigmoid -from ngraph.opset10 import sign -from ngraph.opset10 import sin -from ngraph.opset10 import sinh -from ngraph.opset10 import slice -from ngraph.opset10 import softmax -from ngraph.opset10 import softplus -from ngraph.opset10 import softsign -from ngraph.opset10 import space_to_batch -from ngraph.opset10 import space_to_depth -from ngraph.opset10 import split -from ngraph.opset10 import sqrt -from ngraph.opset10 import squared_difference -from ngraph.opset10 import squeeze -from ngraph.opset10 import strided_slice -from ngraph.opset10 import subtract -from ngraph.opset10 import swish -from ngraph.opset10 import tan -from ngraph.opset10 import tanh -from ngraph.opset10 import tensor_iterator -from ngraph.opset10 import tile -from ngraph.opset10 import topk -from ngraph.opset10 import transpose -from ngraph.opset10 import unique -from ngraph.opset10 import unsqueeze -from ngraph.opset10 import variadic_split +from ngraph.opset11 import absolute +from ngraph.opset11 import absolute as abs +from ngraph.opset11 import acos +from ngraph.opset11 import acosh +from ngraph.opset11 import adaptive_avg_pool +from ngraph.opset11 import adaptive_max_pool +from ngraph.opset11 import add +from ngraph.opset11 import asin +from ngraph.opset11 import asinh +from ngraph.opset11 import assign +from ngraph.opset11 import atan +from ngraph.opset11 import atanh +from ngraph.opset11 import avg_pool +from ngraph.opset11 import batch_norm_inference +from ngraph.opset11 import batch_to_space +from ngraph.opset11 import binary_convolution +from ngraph.opset11 import broadcast +from ngraph.opset11 import bucketize +from ngraph.opset11 import ceiling +from ngraph.opset11 import ceiling as ceil +from ngraph.opset11 import clamp +from ngraph.opset11 import concat +from ngraph.opset11 import constant +from ngraph.opset11 import convert +from ngraph.opset11 import convert_like +from ngraph.opset11 import convolution +from ngraph.opset11 import convolution_backprop_data +from ngraph.opset11 import cos +from ngraph.opset11 import cosh +from ngraph.opset11 import ctc_greedy_decoder +from ngraph.opset11 import ctc_greedy_decoder_seq_len +from ngraph.opset11 import ctc_loss +from ngraph.opset11 import cum_sum +from ngraph.opset11 import cum_sum as cumsum +from ngraph.opset11 import deformable_convolution +from ngraph.opset11 import deformable_psroi_pooling +from ngraph.opset11 import depth_to_space +from ngraph.opset11 import detection_output +from ngraph.opset11 import dft +from ngraph.opset11 import divide +from ngraph.opset11 import einsum +from ngraph.opset11 import elu +from ngraph.opset11 import embedding_bag_offsets_sum +from ngraph.opset11 import embedding_bag_packed_sum +from ngraph.opset11 import embedding_segments_sum +from ngraph.opset11 import extract_image_patches +from ngraph.opset11 import equal +from ngraph.opset11 import erf +from ngraph.opset11 import exp +from ngraph.opset11 import eye +from ngraph.opset11 import fake_quantize +from ngraph.opset11 import floor +from ngraph.opset11 import floor_mod +from ngraph.opset11 import gather +from ngraph.opset11 import gather_elements +from ngraph.opset11 import gather_nd +from ngraph.opset11 import gather_tree +from ngraph.opset11 import gelu +from ngraph.opset11 import generate_proposals +from ngraph.opset11 import greater +from ngraph.opset11 import greater_equal +from ngraph.opset11 import grid_sample +from ngraph.opset11 import grn +from ngraph.opset11 import group_convolution +from ngraph.opset11 import group_convolution_backprop_data +from ngraph.opset11 import gru_cell +from ngraph.opset11 import gru_sequence +from ngraph.opset11 import hard_sigmoid +from ngraph.opset11 import hsigmoid +from ngraph.opset11 import hswish +from ngraph.opset11 import idft +from ngraph.opset11 import if_op +from ngraph.opset11 import interpolate +from ngraph.opset11 import irdft +from ngraph.opset11 import is_finite +from ngraph.opset11 import is_inf +from ngraph.opset11 import is_nan +from ngraph.opset11 import i420_to_bgr +from ngraph.opset11 import i420_to_rgb +from ngraph.opset11 import less +from ngraph.opset11 import less_equal +from ngraph.opset11 import log +from ngraph.opset11 import logical_and +from ngraph.opset11 import logical_not +from ngraph.opset11 import logical_or +from ngraph.opset11 import logical_xor +from ngraph.opset11 import log_softmax +from ngraph.opset11 import loop +from ngraph.opset11 import lrn +from ngraph.opset11 import lstm_cell +from ngraph.opset11 import lstm_sequence +from ngraph.opset11 import matmul +from ngraph.opset11 import matrix_nms +from ngraph.opset11 import max_pool +from ngraph.opset11 import maximum +from ngraph.opset11 import minimum +from ngraph.opset11 import mish +from ngraph.opset11 import mod +from ngraph.opset11 import multiclass_nms +from ngraph.opset11 import multiply +from ngraph.opset11 import mvn +from ngraph.opset11 import negative +from ngraph.opset11 import non_max_suppression +from ngraph.opset11 import non_zero +from ngraph.opset11 import normalize_l2 +from ngraph.opset11 import not_equal +from ngraph.opset11 import nv12_to_bgr +from ngraph.opset11 import nv12_to_rgb +from ngraph.opset11 import one_hot +from ngraph.opset11 import pad +from ngraph.opset11 import parameter +from ngraph.opset11 import power +from ngraph.opset11 import prelu +from ngraph.opset11 import prior_box +from ngraph.opset11 import prior_box_clustered +from ngraph.opset11 import psroi_pooling +from ngraph.opset11 import proposal +from ngraph.opset11 import random_uniform +from ngraph.opset11 import range +from ngraph.opset11 import rdft +from ngraph.opset11 import read_value +from ngraph.opset11 import reduce_l1 +from ngraph.opset11 import reduce_l2 +from ngraph.opset11 import reduce_logical_and +from ngraph.opset11 import reduce_logical_or +from ngraph.opset11 import reduce_max +from ngraph.opset11 import reduce_mean +from ngraph.opset11 import reduce_min +from ngraph.opset11 import reduce_prod +from ngraph.opset11 import reduce_sum +from ngraph.opset11 import region_yolo +from ngraph.opset11 import reorg_yolo +from ngraph.opset11 import relu +from ngraph.opset11 import reshape +from ngraph.opset11 import result +from ngraph.opset11 import reverse_sequence +from ngraph.opset11 import rnn_cell +from ngraph.opset11 import rnn_sequence +from ngraph.opset11 import roi_align +from ngraph.opset11 import roi_pooling +from ngraph.opset11 import roll +from ngraph.opset11 import round +from ngraph.opset11 import scatter_elements_update +from ngraph.opset11 import scatter_update +from ngraph.opset11 import select +from ngraph.opset11 import selu +from ngraph.opset11 import shape_of +from ngraph.opset11 import shuffle_channels +from ngraph.opset11 import sigmoid +from ngraph.opset11 import sign +from ngraph.opset11 import sin +from ngraph.opset11 import sinh +from ngraph.opset11 import slice +from ngraph.opset11 import softmax +from ngraph.opset11 import softplus +from ngraph.opset11 import softsign +from ngraph.opset11 import space_to_batch +from ngraph.opset11 import space_to_depth +from ngraph.opset11 import split +from ngraph.opset11 import sqrt +from ngraph.opset11 import squared_difference +from ngraph.opset11 import squeeze +from ngraph.opset11 import strided_slice +from ngraph.opset11 import subtract +from ngraph.opset11 import swish +from ngraph.opset11 import tan +from ngraph.opset11 import tanh +from ngraph.opset11 import tensor_iterator +from ngraph.opset11 import tile +from ngraph.opset11 import topk +from ngraph.opset11 import transpose +from ngraph.opset11 import unique +from ngraph.opset11 import unsqueeze +from ngraph.opset11 import variadic_split # Extend Node class to support binary operators diff --git a/src/bindings/python/src/compatibility/ngraph/opset11/__init__.py b/src/bindings/python/src/compatibility/ngraph/opset11/__init__.py new file mode 100644 index 00000000000000..91f84b81f415cd --- /dev/null +++ b/src/bindings/python/src/compatibility/ngraph/opset11/__init__.py @@ -0,0 +1,177 @@ +# Copyright (C) 2018-2023 Intel Corporation +# SPDX-License-Identifier: Apache-2.0 + +from ngraph.opset1.ops import absolute +from ngraph.opset1.ops import absolute as abs +from ngraph.opset1.ops import acos +from ngraph.opset4.ops import acosh +from ngraph.opset8.ops import adaptive_avg_pool +from ngraph.opset8.ops import adaptive_max_pool +from ngraph.opset1.ops import add +from ngraph.opset1.ops import asin +from ngraph.opset4.ops import asinh +from ngraph.opset3.ops import assign +from ngraph.opset1.ops import atan +from ngraph.opset4.ops import atanh +from ngraph.opset1.ops import avg_pool +from ngraph.opset5.ops import batch_norm_inference +from ngraph.opset2.ops import batch_to_space +from ngraph.opset1.ops import binary_convolution +from ngraph.opset3.ops import broadcast +from ngraph.opset3.ops import bucketize +from ngraph.opset1.ops import ceiling +from ngraph.opset1.ops import ceiling as ceil +from ngraph.opset1.ops import clamp +from ngraph.opset1.ops import concat +from ngraph.opset1.ops import constant +from ngraph.opset1.ops import convert +from ngraph.opset1.ops import convert_like +from ngraph.opset1.ops import convolution +from ngraph.opset1.ops import convolution_backprop_data +from ngraph.opset1.ops import cos +from ngraph.opset1.ops import cosh +from ngraph.opset1.ops import ctc_greedy_decoder +from ngraph.opset6.ops import ctc_greedy_decoder_seq_len +from ngraph.opset4.ops import ctc_loss +from ngraph.opset3.ops import cum_sum +from ngraph.opset3.ops import cum_sum as cumsum +from ngraph.opset8.ops import deformable_convolution +from ngraph.opset1.ops import deformable_psroi_pooling +from ngraph.opset1.ops import depth_to_space +from ngraph.opset8.ops import detection_output +from ngraph.opset7.ops import dft +from ngraph.opset1.ops import divide +from ngraph.opset7.ops import einsum +from ngraph.opset1.ops import elu +from ngraph.opset3.ops import embedding_bag_offsets_sum +from ngraph.opset3.ops import embedding_bag_packed_sum +from ngraph.opset3.ops import embedding_segments_sum +from ngraph.opset3.ops import extract_image_patches +from ngraph.opset1.ops import equal +from ngraph.opset1.ops import erf +from ngraph.opset1.ops import exp +from ngraph.opset9.ops import eye +from ngraph.opset1.ops import fake_quantize +from ngraph.opset1.ops import floor +from ngraph.opset1.ops import floor_mod +from ngraph.opset8.ops import gather +from ngraph.opset6.ops import gather_elements +from ngraph.opset8.ops import gather_nd +from ngraph.opset1.ops import gather_tree +from ngraph.opset7.ops import gelu +from ngraph.opset9.ops import generate_proposals +from ngraph.opset1.ops import greater +from ngraph.opset1.ops import greater_equal +from ngraph.opset9.ops import grid_sample +from ngraph.opset1.ops import grn +from ngraph.opset1.ops import group_convolution +from ngraph.opset1.ops import group_convolution_backprop_data +from ngraph.opset3.ops import gru_cell +from ngraph.opset5.ops import gru_sequence +from ngraph.opset1.ops import hard_sigmoid +from ngraph.opset5.ops import hsigmoid +from ngraph.opset4.ops import hswish +from ngraph.opset7.ops import idft +from ngraph.opset8.ops import if_op +from ngraph.opset11.ops import interpolate +from ngraph.opset9.ops import irdft +from ngraph.opset10.ops import is_finite +from ngraph.opset10.ops import is_inf +from ngraph.opset10.ops import is_nan +from ngraph.opset8.ops import i420_to_bgr +from ngraph.opset8.ops import i420_to_rgb +from ngraph.opset1.ops import less +from ngraph.opset1.ops import less_equal +from ngraph.opset1.ops import log +from ngraph.opset1.ops import logical_and +from ngraph.opset1.ops import logical_not +from ngraph.opset1.ops import logical_or +from ngraph.opset1.ops import logical_xor +from ngraph.opset5.ops import log_softmax +from ngraph.opset5.ops import loop +from ngraph.opset1.ops import lrn +from ngraph.opset4.ops import lstm_cell +from ngraph.opset5.ops import lstm_sequence +from ngraph.opset1.ops import matmul +from ngraph.opset8.ops import matrix_nms +from ngraph.opset8.ops import max_pool +from ngraph.opset1.ops import maximum +from ngraph.opset1.ops import minimum +from ngraph.opset4.ops import mish +from ngraph.opset1.ops import mod +from ngraph.opset9.ops import multiclass_nms +from ngraph.opset1.ops import multiply +from ngraph.opset6.ops import mvn +from ngraph.opset1.ops import negative +from ngraph.opset9.ops import non_max_suppression +from ngraph.opset3.ops import non_zero +from ngraph.opset1.ops import normalize_l2 +from ngraph.opset1.ops import not_equal +from ngraph.opset8.ops import nv12_to_bgr +from ngraph.opset8.ops import nv12_to_rgb +from ngraph.opset1.ops import one_hot +from ngraph.opset1.ops import pad +from ngraph.opset1.ops import parameter +from ngraph.opset1.ops import power +from ngraph.opset1.ops import prelu +from ngraph.opset8.ops import prior_box +from ngraph.opset1.ops import prior_box_clustered +from ngraph.opset1.ops import psroi_pooling +from ngraph.opset4.ops import proposal +from ngraph.opset8.ops import random_uniform +from ngraph.opset1.ops import range +from ngraph.opset9.ops import rdft +from ngraph.opset3.ops import read_value +from ngraph.opset4.ops import reduce_l1 +from ngraph.opset4.ops import reduce_l2 +from ngraph.opset1.ops import reduce_logical_and +from ngraph.opset1.ops import reduce_logical_or +from ngraph.opset1.ops import reduce_max +from ngraph.opset1.ops import reduce_mean +from ngraph.opset1.ops import reduce_min +from ngraph.opset1.ops import reduce_prod +from ngraph.opset1.ops import reduce_sum +from ngraph.opset1.ops import region_yolo +from ngraph.opset2.ops import reorg_yolo +from ngraph.opset1.ops import relu +from ngraph.opset1.ops import reshape +from ngraph.opset1.ops import result +from ngraph.opset1.ops import reverse_sequence +from ngraph.opset3.ops import rnn_cell +from ngraph.opset5.ops import rnn_sequence +from ngraph.opset9.ops import roi_align +from ngraph.opset2.ops import roi_pooling +from ngraph.opset7.ops import roll +from ngraph.opset5.ops import round +from ngraph.opset3.ops import scatter_elements_update +from ngraph.opset3.ops import scatter_update +from ngraph.opset1.ops import select +from ngraph.opset1.ops import selu +from ngraph.opset3.ops import shape_of +from ngraph.opset3.ops import shuffle_channels +from ngraph.opset1.ops import sigmoid +from ngraph.opset1.ops import sign +from ngraph.opset1.ops import sin +from ngraph.opset1.ops import sinh +from ngraph.opset8.ops import slice +from ngraph.opset8.ops import softmax +from ngraph.opset4.ops import softplus +from ngraph.opset9.ops import softsign +from ngraph.opset2.ops import space_to_batch +from ngraph.opset1.ops import space_to_depth +from ngraph.opset1.ops import split +from ngraph.opset1.ops import sqrt +from ngraph.opset1.ops import squared_difference +from ngraph.opset1.ops import squeeze +from ngraph.opset1.ops import strided_slice +from ngraph.opset1.ops import subtract +from ngraph.opset4.ops import swish +from ngraph.opset1.ops import tan +from ngraph.opset1.ops import tanh +from ngraph.opset1.ops import tensor_iterator +from ngraph.opset1.ops import tile +from ngraph.opset3.ops import topk +from ngraph.opset1.ops import transpose +from ngraph.opset10.ops import unique +from ngraph.opset1.ops import unsqueeze +from ngraph.opset1.ops import variadic_split diff --git a/src/bindings/python/src/compatibility/ngraph/opset11/ops.py b/src/bindings/python/src/compatibility/ngraph/opset11/ops.py new file mode 100644 index 00000000000000..434b778b246cf8 --- /dev/null +++ b/src/bindings/python/src/compatibility/ngraph/opset11/ops.py @@ -0,0 +1,77 @@ +# -*- coding: utf-8 -*- +# Copyright (C) 2018-2023 Intel Corporation +# SPDX-License-Identifier: Apache-2.0 + +"""Factory functions for all openvino ops.""" +from functools import partial +from typing import List, Optional + +from ngraph.impl import Node +from ngraph.opset_utils import _get_node_factory +from ngraph.utils.decorators import nameable_op +from ngraph.utils.types import ( + NodeInput, + as_nodes, +) + +_get_node_factory_opset11 = partial(_get_node_factory, "opset11") + +# -------------------------------------------- ops ------------------------------------------------ + + +@nameable_op +def interpolate( + image: NodeInput, + scales_or_sizes: NodeInput, + mode: str, + shape_calculation_mode: str, + pads_begin: Optional[List[int]] = None, + pads_end: Optional[List[int]] = None, + coordinate_transformation_mode: str = "half_pixel", + nearest_mode: str = "round_prefer_floor", + antialias: bool = False, + cube_coeff: float = -0.75, + axes: Optional[NodeInput] = None, + name: Optional[str] = None, +) -> Node: + """Perfors the interpolation of the input tensor. + + :param image: The node providing input tensor with data for interpolation. + :param scales_or_sizes: + 1D tensor providing information used to calculate the output shape + of the operation. It might contain floats (scales) or integers(sizes). + :param mode: Specifies type of interpolation. Possible values are: nearest, linear, + linear_onnx, cubic, bilinear_pillow, bicubic_pillow. + :param shape_calculation_mode: + Specifies how the scales_or_sizes input should be interpreted. + :param pads_begin: Specifies the number of pixels to add to the beginning of the image + being interpolated. Default is None. + :param pads_end: Specifies the number of pixels to add to the end of the image being + interpolated. Default is None. + :param coordinate_transformation_mode: + Specifies how to transform the coordinate in the resized tensor to the + coordinate in the original tensor. Default is "half_pixel". + :param nearest_mode: Specifies round mode when mode == nearest and is used only when + mode == nearest. Default is "round_prefer_floor". + :param antialias: Specifies whether to perform anti-aliasing. Default is False. + :param cube_coeff: Specifies the parameter a for cubic interpolation. Default is -0.75. + :param axes: 1D tensor specifying dimension indices where interpolation is applied. + The default is None. + :param name: Optional name for the output node. The default is None. + :return: Node representing the interpolation operation. + """ + attrs = { + "mode": mode, + "shape_calculation_mode": shape_calculation_mode, + "coordinate_transformation_mode": coordinate_transformation_mode, + "nearest_mode": nearest_mode, + "antialias": antialias, + "cube_coeff": cube_coeff, + } + + attrs["pads_begin"] = [] if pads_begin is None else pads_begin + attrs["pads_end"] = [] if pads_end is None else pads_end + + inputs = as_nodes(image, scales_or_sizes) if axes is None else as_nodes(image, scales_or_sizes, axes) + + return _get_node_factory_opset11().create("Interpolate", inputs, attrs) diff --git a/src/bindings/python/src/compatibility/ngraph/utils/node_factory.py b/src/bindings/python/src/compatibility/ngraph/utils/node_factory.py index 6aa951a65b66b7..0e3d2cc09cecc2 100644 --- a/src/bindings/python/src/compatibility/ngraph/utils/node_factory.py +++ b/src/bindings/python/src/compatibility/ngraph/utils/node_factory.py @@ -12,7 +12,7 @@ from ngraph.exceptions import UserInputError -DEFAULT_OPSET = "opset10" +DEFAULT_OPSET = "opset11" class NodeFactory(object): diff --git a/src/bindings/python/src/compatibility/pyngraph/discrete_type_info.cpp b/src/bindings/python/src/compatibility/pyngraph/discrete_type_info.cpp index b7418def4d3acf..49f8bb97953ebf 100644 --- a/src/bindings/python/src/compatibility/pyngraph/discrete_type_info.cpp +++ b/src/bindings/python/src/compatibility/pyngraph/discrete_type_info.cpp @@ -28,14 +28,14 @@ void regclass_pyngraph_DiscreteTypeInfo(py::module m) { discrete_type_info.def(py::self != py::self); discrete_type_info.def_readonly("name", &ngraph::DiscreteTypeInfo::name); - discrete_type_info.def_readonly("version", &ngraph::DiscreteTypeInfo::version); + discrete_type_info.def_readonly("version_id", &ngraph::DiscreteTypeInfo::version_id); discrete_type_info.def_readonly("parent", &ngraph::DiscreteTypeInfo::parent); discrete_type_info.def("__repr__", [](const ngraph::DiscreteTypeInfo& self) { std::string name = std::string(self.name); - std::string version = std::to_string(self.version); + std::string version = std::string(self.version_id); if (self.parent != nullptr) { - std::string parent_version = std::to_string(self.parent->version); + std::string parent_version = std::string(self.parent->version_id); std::string parent_name = self.parent->name; return ""; diff --git a/src/bindings/python/src/compatibility/pyngraph/node.cpp b/src/bindings/python/src/compatibility/pyngraph/node.cpp index 02bfb3d1395630..f696a4297cad7f 100644 --- a/src/bindings/python/src/compatibility/pyngraph/node.cpp +++ b/src/bindings/python/src/compatibility/pyngraph/node.cpp @@ -277,16 +277,6 @@ void regclass_pyngraph_Node(py::module m) { get_rt_info : PyRTMap A dictionary of user defined data. )"); - node.def("get_version", - &ngraph::Node::get_version, - R"( - Returns operation's version of the node. - - Returns - ---------- - get_version : int - Operation version. - )"); node.def("set_argument", &ngraph::Node::set_argument); node.def("set_arguments", [](const std::shared_ptr& self, const ngraph::NodeVector& args) { @@ -301,7 +291,6 @@ void regclass_pyngraph_Node(py::module m) { node.def_property_readonly("rt_info", (PyRTMap & (ngraph::Node::*)()) & ngraph::Node::get_rt_info, py::return_value_policy::reference_internal); - node.def_property_readonly("version", &ngraph::Node::get_version); node.def_property_readonly("type_info", &ngraph::Node::get_type_info); node.def_property("friendly_name", &ngraph::Node::get_friendly_name, &ngraph::Node::set_friendly_name); diff --git a/src/bindings/python/src/compatibility/pyngraph/node_factory.cpp b/src/bindings/python/src/compatibility/pyngraph/node_factory.cpp index 281525cca95832..2108a7a057bb3c 100644 --- a/src/bindings/python/src/compatibility/pyngraph/node_factory.cpp +++ b/src/bindings/python/src/compatibility/pyngraph/node_factory.cpp @@ -82,7 +82,7 @@ class NodeFactory { return it->second(); } - const ngraph::OpSet& m_opset = ngraph::get_opset10(); + const ngraph::OpSet& m_opset = ngraph::get_opset11(); std::unordered_map> m_variables; }; } // namespace diff --git a/src/bindings/python/src/openvino/runtime/__init__.py b/src/bindings/python/src/openvino/runtime/__init__.py index 3c2937c214ed70..9241819e87135c 100644 --- a/src/bindings/python/src/openvino/runtime/__init__.py +++ b/src/bindings/python/src/openvino/runtime/__init__.py @@ -56,6 +56,7 @@ from openvino.runtime import opset8 from openvino.runtime import opset9 from openvino.runtime import opset10 +from openvino.runtime import opset11 # Import properties API from openvino._pyopenvino import properties @@ -66,19 +67,19 @@ # Extend Node class to support binary operators -Node.__add__ = opset10.add -Node.__sub__ = opset10.subtract -Node.__mul__ = opset10.multiply -Node.__div__ = opset10.divide -Node.__truediv__ = opset10.divide -Node.__radd__ = lambda left, right: opset10.add(right, left) -Node.__rsub__ = lambda left, right: opset10.subtract(right, left) -Node.__rmul__ = lambda left, right: opset10.multiply(right, left) -Node.__rdiv__ = lambda left, right: opset10.divide(right, left) -Node.__rtruediv__ = lambda left, right: opset10.divide(right, left) -Node.__eq__ = opset10.equal -Node.__ne__ = opset10.not_equal -Node.__lt__ = opset10.less -Node.__le__ = opset10.less_equal -Node.__gt__ = opset10.greater -Node.__ge__ = opset10.greater_equal +Node.__add__ = opset11.add +Node.__sub__ = opset11.subtract +Node.__mul__ = opset11.multiply +Node.__div__ = opset11.divide +Node.__truediv__ = opset11.divide +Node.__radd__ = lambda left, right: opset11.add(right, left) +Node.__rsub__ = lambda left, right: opset11.subtract(right, left) +Node.__rmul__ = lambda left, right: opset11.multiply(right, left) +Node.__rdiv__ = lambda left, right: opset11.divide(right, left) +Node.__rtruediv__ = lambda left, right: opset11.divide(right, left) +Node.__eq__ = opset11.equal +Node.__ne__ = opset11.not_equal +Node.__lt__ = opset11.less +Node.__le__ = opset11.less_equal +Node.__gt__ = opset11.greater +Node.__ge__ = opset11.greater_equal diff --git a/src/bindings/python/src/openvino/runtime/ie_api.py b/src/bindings/python/src/openvino/runtime/ie_api.py index 7bab65a0382113..90099609a1a313 100644 --- a/src/bindings/python/src/openvino/runtime/ie_api.py +++ b/src/bindings/python/src/openvino/runtime/ie_api.py @@ -2,7 +2,6 @@ # Copyright (C) 2018-2023 Intel Corporation # SPDX-License-Identifier: Apache-2.0 -from functools import singledispatch from typing import Any, Iterable, Union, Dict, Optional from pathlib import Path @@ -16,6 +15,7 @@ from openvino._pyopenvino import Tensor from openvino.runtime.utils.data_helpers import ( + OVDict, _InferRequestWrapper, _data_dispatch, tensor_from_file, @@ -25,7 +25,7 @@ class InferRequest(_InferRequestWrapper): """InferRequest class represents infer request which can be run in asynchronous or synchronous manners.""" - def infer(self, inputs: Any = None, shared_memory: bool = False) -> dict: + def infer(self, inputs: Any = None, shared_memory: bool = False) -> OVDict: """Infers specified input(s) in synchronous mode. Blocks all methods of InferRequest while request is running. @@ -68,14 +68,14 @@ def infer(self, inputs: Any = None, shared_memory: bool = False) -> dict: Default value: False :type shared_memory: bool, optional - :return: Dictionary of results from output tensors with ports as keys. - :rtype: Dict[openvino.runtime.ConstOutput, numpy.ndarray] + :return: Dictionary of results from output tensors with port/int/str keys. + :rtype: OVDict """ - return super().infer(_data_dispatch( + return OVDict(super().infer(_data_dispatch( self, inputs, is_shared=shared_memory, - )) + ))) def start_async( self, @@ -138,6 +138,15 @@ def start_async( userdata, ) + @property + def results(self) -> OVDict: + """Gets all outputs tensors of this InferRequest. + + :return: Dictionary of results from output tensors with ports as keys. + :rtype: Dict[openvino.runtime.ConstOutput, numpy.array] + """ + return OVDict(super().results) + class CompiledModel(CompiledModelBase): """CompiledModel class. @@ -161,7 +170,7 @@ def create_infer_request(self) -> InferRequest: """ return InferRequest(super().create_infer_request()) - def infer_new_request(self, inputs: Union[dict, list, tuple, Tensor, np.ndarray] = None) -> dict: + def infer_new_request(self, inputs: Union[dict, list, tuple, Tensor, np.ndarray] = None) -> OVDict: """Infers specified input(s) in synchronous mode. Blocks all methods of CompiledModel while request is running. @@ -187,8 +196,8 @@ def infer_new_request(self, inputs: Union[dict, list, tuple, Tensor, np.ndarray] :param inputs: Data to be set on input tensors. :type inputs: Union[Dict[keys, values], List[values], Tuple[values], Tensor, numpy.ndarray], optional - :return: Dictionary of results from output tensors with ports as keys. - :rtype: Dict[openvino.runtime.ConstOutput, numpy.array] + :return: Dictionary of results from output tensors with port/int/str keys. + :rtype: OVDict """ # It returns wrapped python InferReqeust and then call upon # overloaded functions of InferRequest class @@ -196,7 +205,7 @@ def infer_new_request(self, inputs: Union[dict, list, tuple, Tensor, np.ndarray] def __call__(self, inputs: Union[dict, list, tuple, Tensor, np.ndarray] = None, - shared_memory: bool = True) -> dict: + shared_memory: bool = True) -> OVDict: """Callable infer wrapper for CompiledModel. Infers specified input(s) in synchronous mode. @@ -248,8 +257,8 @@ def __call__(self, Default value: True :type shared_memory: bool, optional - :return: Dictionary of results from output tensors with ports as keys. - :rtype: Dict[openvino.runtime.ConstOutput, numpy.ndarray] + :return: Dictionary of results from output tensors with port/int/str as keys. + :rtype: OVDict """ if self._infer_request is None: self._infer_request = self.create_infer_request() diff --git a/src/bindings/python/src/openvino/runtime/opset11/__init__.py b/src/bindings/python/src/openvino/runtime/opset11/__init__.py new file mode 100644 index 00000000000000..79c7068bf83d87 --- /dev/null +++ b/src/bindings/python/src/openvino/runtime/opset11/__init__.py @@ -0,0 +1,178 @@ +# -*- coding: utf-8 -*- +# Copyright (C) 2018-2023 Intel Corporation +# SPDX-License-Identifier: Apache-2.0 + +from openvino.runtime.opset1.ops import absolute +from openvino.runtime.opset1.ops import absolute as abs +from openvino.runtime.opset1.ops import acos +from openvino.runtime.opset4.ops import acosh +from openvino.runtime.opset8.ops import adaptive_avg_pool +from openvino.runtime.opset8.ops import adaptive_max_pool +from openvino.runtime.opset1.ops import add +from openvino.runtime.opset1.ops import asin +from openvino.runtime.opset4.ops import asinh +from openvino.runtime.opset3.ops import assign +from openvino.runtime.opset1.ops import atan +from openvino.runtime.opset4.ops import atanh +from openvino.runtime.opset1.ops import avg_pool +from openvino.runtime.opset5.ops import batch_norm_inference +from openvino.runtime.opset2.ops import batch_to_space +from openvino.runtime.opset1.ops import binary_convolution +from openvino.runtime.opset3.ops import broadcast +from openvino.runtime.opset3.ops import bucketize +from openvino.runtime.opset1.ops import ceiling +from openvino.runtime.opset1.ops import ceiling as ceil +from openvino.runtime.opset1.ops import clamp +from openvino.runtime.opset1.ops import concat +from openvino.runtime.opset1.ops import constant +from openvino.runtime.opset1.ops import convert +from openvino.runtime.opset1.ops import convert_like +from openvino.runtime.opset1.ops import convolution +from openvino.runtime.opset1.ops import convolution_backprop_data +from openvino.runtime.opset1.ops import cos +from openvino.runtime.opset1.ops import cosh +from openvino.runtime.opset1.ops import ctc_greedy_decoder +from openvino.runtime.opset6.ops import ctc_greedy_decoder_seq_len +from openvino.runtime.opset4.ops import ctc_loss +from openvino.runtime.opset3.ops import cum_sum +from openvino.runtime.opset3.ops import cum_sum as cumsum +from openvino.runtime.opset8.ops import deformable_convolution +from openvino.runtime.opset1.ops import deformable_psroi_pooling +from openvino.runtime.opset1.ops import depth_to_space +from openvino.runtime.opset8.ops import detection_output +from openvino.runtime.opset7.ops import dft +from openvino.runtime.opset1.ops import divide +from openvino.runtime.opset7.ops import einsum +from openvino.runtime.opset1.ops import elu +from openvino.runtime.opset3.ops import embedding_bag_offsets_sum +from openvino.runtime.opset3.ops import embedding_bag_packed_sum +from openvino.runtime.opset3.ops import embedding_segments_sum +from openvino.runtime.opset3.ops import extract_image_patches +from openvino.runtime.opset1.ops import equal +from openvino.runtime.opset1.ops import erf +from openvino.runtime.opset1.ops import exp +from openvino.runtime.opset9.ops import eye +from openvino.runtime.opset1.ops import fake_quantize +from openvino.runtime.opset1.ops import floor +from openvino.runtime.opset1.ops import floor_mod +from openvino.runtime.opset8.ops import gather +from openvino.runtime.opset6.ops import gather_elements +from openvino.runtime.opset8.ops import gather_nd +from openvino.runtime.opset1.ops import gather_tree +from openvino.runtime.opset7.ops import gelu +from openvino.runtime.opset9.ops import generate_proposals +from openvino.runtime.opset1.ops import greater +from openvino.runtime.opset1.ops import greater_equal +from openvino.runtime.opset9.ops import grid_sample +from openvino.runtime.opset1.ops import grn +from openvino.runtime.opset1.ops import group_convolution +from openvino.runtime.opset1.ops import group_convolution_backprop_data +from openvino.runtime.opset3.ops import gru_cell +from openvino.runtime.opset5.ops import gru_sequence +from openvino.runtime.opset1.ops import hard_sigmoid +from openvino.runtime.opset5.ops import hsigmoid +from openvino.runtime.opset4.ops import hswish +from openvino.runtime.opset7.ops import idft +from openvino.runtime.opset8.ops import if_op +from openvino.runtime.opset11.ops import interpolate +from openvino.runtime.opset9.ops import irdft +from openvino.runtime.opset10.ops import is_finite +from openvino.runtime.opset10.ops import is_inf +from openvino.runtime.opset10.ops import is_nan +from openvino.runtime.opset8.ops import i420_to_bgr +from openvino.runtime.opset8.ops import i420_to_rgb +from openvino.runtime.opset1.ops import less +from openvino.runtime.opset1.ops import less_equal +from openvino.runtime.opset1.ops import log +from openvino.runtime.opset1.ops import logical_and +from openvino.runtime.opset1.ops import logical_not +from openvino.runtime.opset1.ops import logical_or +from openvino.runtime.opset1.ops import logical_xor +from openvino.runtime.opset5.ops import log_softmax +from openvino.runtime.opset5.ops import loop +from openvino.runtime.opset1.ops import lrn +from openvino.runtime.opset4.ops import lstm_cell +from openvino.runtime.opset5.ops import lstm_sequence +from openvino.runtime.opset1.ops import matmul +from openvino.runtime.opset8.ops import matrix_nms +from openvino.runtime.opset8.ops import max_pool +from openvino.runtime.opset1.ops import maximum +from openvino.runtime.opset1.ops import minimum +from openvino.runtime.opset4.ops import mish +from openvino.runtime.opset1.ops import mod +from openvino.runtime.opset9.ops import multiclass_nms +from openvino.runtime.opset1.ops import multiply +from openvino.runtime.opset6.ops import mvn +from openvino.runtime.opset1.ops import negative +from openvino.runtime.opset9.ops import non_max_suppression +from openvino.runtime.opset3.ops import non_zero +from openvino.runtime.opset1.ops import normalize_l2 +from openvino.runtime.opset1.ops import not_equal +from openvino.runtime.opset8.ops import nv12_to_bgr +from openvino.runtime.opset8.ops import nv12_to_rgb +from openvino.runtime.opset1.ops import one_hot +from openvino.runtime.opset1.ops import pad +from openvino.runtime.opset1.ops import parameter +from openvino.runtime.opset1.ops import power +from openvino.runtime.opset1.ops import prelu +from openvino.runtime.opset8.ops import prior_box +from openvino.runtime.opset1.ops import prior_box_clustered +from openvino.runtime.opset1.ops import psroi_pooling +from openvino.runtime.opset4.ops import proposal +from openvino.runtime.opset1.ops import range +from openvino.runtime.opset8.ops import random_uniform +from openvino.runtime.opset9.ops import rdft +from openvino.runtime.opset3.ops import read_value +from openvino.runtime.opset4.ops import reduce_l1 +from openvino.runtime.opset4.ops import reduce_l2 +from openvino.runtime.opset1.ops import reduce_logical_and +from openvino.runtime.opset1.ops import reduce_logical_or +from openvino.runtime.opset1.ops import reduce_max +from openvino.runtime.opset1.ops import reduce_mean +from openvino.runtime.opset1.ops import reduce_min +from openvino.runtime.opset1.ops import reduce_prod +from openvino.runtime.opset1.ops import reduce_sum +from openvino.runtime.opset1.ops import region_yolo +from openvino.runtime.opset2.ops import reorg_yolo +from openvino.runtime.opset1.ops import relu +from openvino.runtime.opset1.ops import reshape +from openvino.runtime.opset1.ops import result +from openvino.runtime.opset1.ops import reverse_sequence +from openvino.runtime.opset3.ops import rnn_cell +from openvino.runtime.opset5.ops import rnn_sequence +from openvino.runtime.opset9.ops import roi_align +from openvino.runtime.opset2.ops import roi_pooling +from openvino.runtime.opset7.ops import roll +from openvino.runtime.opset5.ops import round +from openvino.runtime.opset3.ops import scatter_elements_update +from openvino.runtime.opset3.ops import scatter_update +from openvino.runtime.opset1.ops import select +from openvino.runtime.opset1.ops import selu +from openvino.runtime.opset3.ops import shape_of +from openvino.runtime.opset3.ops import shuffle_channels +from openvino.runtime.opset1.ops import sigmoid +from openvino.runtime.opset1.ops import sign +from openvino.runtime.opset1.ops import sin +from openvino.runtime.opset1.ops import sinh +from openvino.runtime.opset8.ops import slice +from openvino.runtime.opset8.ops import softmax +from openvino.runtime.opset4.ops import softplus +from openvino.runtime.opset9.ops import softsign +from openvino.runtime.opset2.ops import space_to_batch +from openvino.runtime.opset1.ops import space_to_depth +from openvino.runtime.opset1.ops import split +from openvino.runtime.opset1.ops import sqrt +from openvino.runtime.opset1.ops import squared_difference +from openvino.runtime.opset1.ops import squeeze +from openvino.runtime.opset1.ops import strided_slice +from openvino.runtime.opset1.ops import subtract +from openvino.runtime.opset4.ops import swish +from openvino.runtime.opset1.ops import tan +from openvino.runtime.opset1.ops import tanh +from openvino.runtime.opset1.ops import tensor_iterator +from openvino.runtime.opset1.ops import tile +from openvino.runtime.opset3.ops import topk +from openvino.runtime.opset1.ops import transpose +from openvino.runtime.opset10.ops import unique +from openvino.runtime.opset1.ops import unsqueeze +from openvino.runtime.opset1.ops import variadic_split diff --git a/src/bindings/python/src/openvino/runtime/opset11/ops.py b/src/bindings/python/src/openvino/runtime/opset11/ops.py new file mode 100644 index 00000000000000..2a54db0069ebd1 --- /dev/null +++ b/src/bindings/python/src/openvino/runtime/opset11/ops.py @@ -0,0 +1,77 @@ +# -*- coding: utf-8 -*- +# Copyright (C) 2018-2023 Intel Corporation +# SPDX-License-Identifier: Apache-2.0 + +"""Factory functions for all openvino ops.""" +from functools import partial +from typing import List, Optional + +from openvino.runtime import Node +from openvino.runtime.opset_utils import _get_node_factory +from openvino.runtime.utils.decorators import nameable_op +from openvino.runtime.utils.types import ( + NodeInput, + as_nodes, +) + +_get_node_factory_opset11 = partial(_get_node_factory, "opset11") + +# -------------------------------------------- ops ------------------------------------------------ + + +@nameable_op +def interpolate( + image: NodeInput, + scales_or_sizes: NodeInput, + mode: str, + shape_calculation_mode: str, + pads_begin: Optional[List[int]] = None, + pads_end: Optional[List[int]] = None, + coordinate_transformation_mode: str = "half_pixel", + nearest_mode: str = "round_prefer_floor", + antialias: bool = False, + cube_coeff: float = -0.75, + axes: Optional[NodeInput] = None, + name: Optional[str] = None, +) -> Node: + """Perfors the interpolation of the input tensor. + + :param image: The node providing input tensor with data for interpolation. + :param scales_or_sizes: + 1D tensor providing information used to calculate the output shape + of the operation. It might contain floats (scales) or integers(sizes). + :param mode: Specifies type of interpolation. Possible values are: nearest, linear, + linear_onnx, cubic, bilinear_pillow, bicubic_pillow. + :param shape_calculation_mode: + Specifies how the scales_or_sizes input should be interpreted. + :param pads_begin: Specifies the number of pixels to add to the beginning of the image + being interpolated. Default is None. + :param pads_end: Specifies the number of pixels to add to the end of the image being + interpolated. Default is None. + :param coordinate_transformation_mode: + Specifies how to transform the coordinate in the resized tensor to the + coordinate in the original tensor. Default is "half_pixel". + :param nearest_mode: Specifies round mode when mode == nearest and is used only when + mode == nearest. Default is "round_prefer_floor". + :param antialias: Specifies whether to perform anti-aliasing. Default is False. + :param cube_coeff: Specifies the parameter a for cubic interpolation. Default is -0.75. + :param axes: 1D tensor specifying dimension indices where interpolation is applied. + The default is None. + :param name: Optional name for the output node. The default is None. + :return: Node representing the interpolation operation. + """ + attrs = { + "mode": mode, + "shape_calculation_mode": shape_calculation_mode, + "coordinate_transformation_mode": coordinate_transformation_mode, + "nearest_mode": nearest_mode, + "antialias": antialias, + "cube_coeff": cube_coeff, + } + + attrs["pads_begin"] = [] if pads_begin is None else pads_begin + attrs["pads_end"] = [] if pads_end is None else pads_end + + inputs = as_nodes(image, scales_or_sizes) if axes is None else as_nodes(image, scales_or_sizes, axes) + + return _get_node_factory_opset11().create("Interpolate", inputs, attrs) diff --git a/src/bindings/python/src/openvino/runtime/utils/data_helpers/__init__.py b/src/bindings/python/src/openvino/runtime/utils/data_helpers/__init__.py index e49265ccca987f..829a77af96a04c 100644 --- a/src/bindings/python/src/openvino/runtime/utils/data_helpers/__init__.py +++ b/src/bindings/python/src/openvino/runtime/utils/data_helpers/__init__.py @@ -5,3 +5,4 @@ from openvino.runtime.utils.data_helpers.data_dispatcher import _data_dispatch from openvino.runtime.utils.data_helpers.wrappers import tensor_from_file from openvino.runtime.utils.data_helpers.wrappers import _InferRequestWrapper +from openvino.runtime.utils.data_helpers.wrappers import OVDict diff --git a/src/bindings/python/src/openvino/runtime/utils/data_helpers/wrappers.py b/src/bindings/python/src/openvino/runtime/utils/data_helpers/wrappers.py index 24b09d40de9555..e2849b8d5e01bd 100644 --- a/src/bindings/python/src/openvino/runtime/utils/data_helpers/wrappers.py +++ b/src/bindings/python/src/openvino/runtime/utils/data_helpers/wrappers.py @@ -4,7 +4,17 @@ import numpy as np -from openvino._pyopenvino import Tensor +# TODO: remove this WA and refactor OVDict when Python3.8 +# becomes minimal supported version. +try: + from functools import singledispatchmethod +except ImportError: + from singledispatchmethod import singledispatchmethod # type: ignore[no-redef] + +from collections.abc import Mapping +from typing import Union, Dict, List, Iterator, KeysView, ItemsView, ValuesView + +from openvino._pyopenvino import Tensor, ConstOutput from openvino._pyopenvino import InferRequest as InferRequestBase @@ -20,3 +30,109 @@ def __init__(self, other: InferRequestBase) -> None: # Private memeber to store newly created shared memory data self._inputs_data = None super().__init__(other) + + +class OVDict(Mapping): + """Custom OpenVINO dictionary with inference results. + + This class is a dict-like object. It provides possibility to + address data tensors with three key types: + + * `openvino.runtime.ConstOutput` - port of the output + * `int` - index of the output + * `str` - names of the output + + This class follows `frozenset`/`tuple` concept of immutability. + It is prohibited to assign new items or edit them. + + To revert to the previous behavior use `to_dict` method which + return shallow copy of underlaying dictionary. + Note: It removes addressing feature! New dictionary keeps + only `ConstOutput` keys. + + If a tuple returns value is needed, use `to_tuple` method which + converts values to the tuple. + + :Example: + + .. code-block:: python + + # Reverts to the previous behavior of the native dict + result = request.infer(inputs).to_dict() + # or alternatively: + result = dict(request.infer(inputs)) + + .. code-block:: python + + # To dispatch outputs of multi-ouput inference: + out1, out2, out3, _ = request.infer(inputs).values() + # or alternatively: + out1, out2, out3, _ = request.infer(inputs).to_tuple() + """ + def __init__(self, _dict: Dict[ConstOutput, np.ndarray]) -> None: + self._dict = _dict + + def __iter__(self) -> Iterator: + return self._dict.__iter__() + + def __len__(self) -> int: + return len(self._dict) + + def __repr__(self) -> str: + return self._dict.__repr__() + + def __get_key(self, index: int) -> ConstOutput: + return list(self._dict.keys())[index] + + @singledispatchmethod + def __getitem_impl(self, key: Union[ConstOutput, int, str]) -> np.ndarray: + raise TypeError("Unknown key type!") + + @__getitem_impl.register + def _(self, key: ConstOutput) -> np.ndarray: + return self._dict[key] + + @__getitem_impl.register + def _(self, key: int) -> np.ndarray: + try: + return self._dict[self.__get_key(key)] + except IndexError: + raise KeyError(key) + + @__getitem_impl.register + def _(self, key: str) -> np.ndarray: + try: + return self._dict[self.__get_key(self.names().index(key))] + except ValueError: + raise KeyError(key) + + def __getitem__(self, key: Union[ConstOutput, int, str]) -> np.ndarray: + return self.__getitem_impl(key) + + def keys(self) -> KeysView[ConstOutput]: + return self._dict.keys() + + def values(self) -> ValuesView[np.ndarray]: + return self._dict.values() + + def items(self) -> ItemsView[ConstOutput, np.ndarray]: + return self._dict.items() + + def names(self) -> List[str]: + """Return a name of every output key. + + Throws RuntimeError if any of ConstOutput keys has no name. + """ + return [key.get_any_name() for key in self._dict.keys()] + + def to_dict(self) -> Dict[ConstOutput, np.ndarray]: + """Return underlaying native dictionary. + + Function performs shallow copy, thus any modifications to + returned values may affect this class as well. + """ + return self._dict + + def to_tuple(self) -> tuple: + """Convert values of this dictionary to a tuple.""" + return tuple(self._dict.values()) diff --git a/src/bindings/python/src/openvino/runtime/utils/node_factory.py b/src/bindings/python/src/openvino/runtime/utils/node_factory.py index a89c05ab0cffb6..f952bcf90fb4dc 100644 --- a/src/bindings/python/src/openvino/runtime/utils/node_factory.py +++ b/src/bindings/python/src/openvino/runtime/utils/node_factory.py @@ -13,7 +13,7 @@ from openvino.runtime.exceptions import UserInputError -DEFAULT_OPSET = "opset10" +DEFAULT_OPSET = "opset11" class NodeFactory(object): diff --git a/src/bindings/python/src/pyopenvino/core/common.cpp b/src/bindings/python/src/pyopenvino/core/common.cpp index 2ad7e395a92895..ef5313cec0185d 100644 --- a/src/bindings/python/src/pyopenvino/core/common.cpp +++ b/src/bindings/python/src/pyopenvino/core/common.cpp @@ -53,6 +53,27 @@ const std::map& dtype_to_ov_type() { return dtype_to_ov_type_mapping; } +namespace containers { +const TensorIndexMap cast_to_tensor_index_map(const py::dict& inputs) { + TensorIndexMap result_map; + for (auto&& input : inputs) { + int idx; + if (py::isinstance(input.first)) { + idx = input.first.cast(); + } else { + throw py::type_error("incompatible function arguments!"); + } + if (py::isinstance(input.second)) { + auto tensor = Common::cast_to_tensor(input.second); + result_map[idx] = tensor; + } else { + throw ov::Exception("Unable to cast tensor " + std::to_string(idx) + "!"); + } + } + return result_map; +} +}; // namespace containers + namespace array_helpers { bool is_contiguous(const py::array& array) { @@ -110,6 +131,67 @@ py::array as_contiguous(py::array& array, ov::element::Type type) { } } +py::array array_from_tensor(ov::Tensor&& t) { + switch (t.get_element_type()) { + case ov::element::Type_t::f32: { + return py::array_t(t.get_shape(), t.data()); + break; + } + case ov::element::Type_t::f64: { + return py::array_t(t.get_shape(), t.data()); + break; + } + case ov::element::Type_t::bf16: { + return py::array(py::dtype("float16"), t.get_shape(), t.data()); + break; + } + case ov::element::Type_t::f16: { + return py::array(py::dtype("float16"), t.get_shape(), t.data()); + break; + } + case ov::element::Type_t::i8: { + return py::array_t(t.get_shape(), t.data()); + break; + } + case ov::element::Type_t::i16: { + return py::array_t(t.get_shape(), t.data()); + break; + } + case ov::element::Type_t::i32: { + return py::array_t(t.get_shape(), t.data()); + break; + } + case ov::element::Type_t::i64: { + return py::array_t(t.get_shape(), t.data()); + break; + } + case ov::element::Type_t::u8: { + return py::array_t(t.get_shape(), t.data()); + break; + } + case ov::element::Type_t::u16: { + return py::array_t(t.get_shape(), t.data()); + break; + } + case ov::element::Type_t::u32: { + return py::array_t(t.get_shape(), t.data()); + break; + } + case ov::element::Type_t::u64: { + return py::array_t(t.get_shape(), t.data()); + break; + } + case ov::element::Type_t::boolean: { + return py::array_t(t.get_shape(), t.data()); + break; + } + default: { + throw ov::Exception("Numpy array cannot be created from given OV Tensor!"); + break; + } + } +} + }; // namespace array_helpers template <> @@ -226,38 +308,6 @@ const ov::Tensor& cast_to_tensor(const py::handle& tensor) { return tensor.cast(); } -const Containers::TensorNameMap cast_to_tensor_name_map(const py::dict& inputs) { - Containers::TensorNameMap result_map; - for (auto&& input : inputs) { - std::string name; - if (py::isinstance(input.first)) { - name = input.first.cast(); - } else { - throw py::type_error("incompatible function arguments!"); - } - OPENVINO_ASSERT(py::isinstance(input.second), "Unable to cast tensor ", name, "!"); - auto tensor = Common::cast_to_tensor(input.second); - result_map[name] = tensor; - } - return result_map; -} - -const Containers::TensorIndexMap cast_to_tensor_index_map(const py::dict& inputs) { - Containers::TensorIndexMap result_map; - for (auto&& input : inputs) { - int idx; - if (py::isinstance(input.first)) { - idx = input.first.cast(); - } else { - throw py::type_error("incompatible function arguments!"); - } - OPENVINO_ASSERT(py::isinstance(input.second), "Unable to cast tensor ", idx, "!"); - auto tensor = Common::cast_to_tensor(input.second); - result_map[idx] = tensor; - } - return result_map; -} - void set_request_tensors(ov::InferRequest& request, const py::dict& inputs) { if (!inputs.empty()) { for (auto&& input : inputs) { @@ -293,67 +343,10 @@ uint32_t get_optimal_number_of_requests(const ov::CompiledModel& actual) { } } -py::dict outputs_to_dict(const std::vector>& outputs, ov::InferRequest& request) { +py::dict outputs_to_dict(InferRequestWrapper& request) { py::dict res; - for (const auto& out : outputs) { - ov::Tensor t{request.get_tensor(out)}; - switch (t.get_element_type()) { - case ov::element::Type_t::i8: { - res[py::cast(out)] = py::array_t(t.get_shape(), t.data()); - break; - } - case ov::element::Type_t::i16: { - res[py::cast(out)] = py::array_t(t.get_shape(), t.data()); - break; - } - case ov::element::Type_t::i32: { - res[py::cast(out)] = py::array_t(t.get_shape(), t.data()); - break; - } - case ov::element::Type_t::i64: { - res[py::cast(out)] = py::array_t(t.get_shape(), t.data()); - break; - } - case ov::element::Type_t::u8: { - res[py::cast(out)] = py::array_t(t.get_shape(), t.data()); - break; - } - case ov::element::Type_t::u16: { - res[py::cast(out)] = py::array_t(t.get_shape(), t.data()); - break; - } - case ov::element::Type_t::u32: { - res[py::cast(out)] = py::array_t(t.get_shape(), t.data()); - break; - } - case ov::element::Type_t::u64: { - res[py::cast(out)] = py::array_t(t.get_shape(), t.data()); - break; - } - case ov::element::Type_t::bf16: { - res[py::cast(out)] = py::array(py::dtype("float16"), t.get_shape(), t.data()); - break; - } - case ov::element::Type_t::f16: { - res[py::cast(out)] = py::array(py::dtype("float16"), t.get_shape(), t.data()); - break; - } - case ov::element::Type_t::f32: { - res[py::cast(out)] = py::array_t(t.get_shape(), t.data()); - break; - } - case ov::element::Type_t::f64: { - res[py::cast(out)] = py::array_t(t.get_shape(), t.data()); - break; - } - case ov::element::Type_t::boolean: { - res[py::cast(out)] = py::array_t(t.get_shape(), t.data()); - break; - } - default: { - break; - } - } + for (const auto& out : request.m_outputs) { + res[py::cast(out)] = array_helpers::array_from_tensor(request.m_request.get_tensor(out)); } return res; } diff --git a/src/bindings/python/src/pyopenvino/core/common.hpp b/src/bindings/python/src/pyopenvino/core/common.hpp index 910d9e55e966ed..de033c3ddf383c 100644 --- a/src/bindings/python/src/pyopenvino/core/common.hpp +++ b/src/bindings/python/src/pyopenvino/core/common.hpp @@ -20,14 +20,20 @@ #include "openvino/runtime/infer_request.hpp" #include "openvino/runtime/tensor.hpp" #include "openvino/pass/serialize.hpp" -#include "pyopenvino/core/containers.hpp" #include "pyopenvino/graph/any.hpp" #include "pyopenvino/graph/ops/constant.hpp" +#include "pyopenvino/core/infer_request.hpp" namespace py = pybind11; namespace Common { +namespace containers { + using TensorIndexMap = std::map; + + const TensorIndexMap cast_to_tensor_index_map(const py::dict& inputs); +}; // namespace containers + namespace values { // Minimum amount of bits for common numpy types. Used to perform checks against OV types. @@ -52,6 +58,8 @@ std::vector get_strides(const py::array& array); py::array as_contiguous(py::array& array, ov::element::Type type); +py::array array_from_tensor(ov::Tensor&& t); + }; // namespace array_helpers template @@ -80,15 +88,11 @@ ov::PartialShape partial_shape_from_list(const py::list& shape); const ov::Tensor& cast_to_tensor(const py::handle& tensor); -const Containers::TensorNameMap cast_to_tensor_name_map(const py::dict& inputs); - -const Containers::TensorIndexMap cast_to_tensor_index_map(const py::dict& inputs); - void set_request_tensors(ov::InferRequest& request, const py::dict& inputs); uint32_t get_optimal_number_of_requests(const ov::CompiledModel& actual); -py::dict outputs_to_dict(const std::vector>& outputs, ov::InferRequest& request); +py::dict outputs_to_dict(InferRequestWrapper& request); ov::pass::Serialize::Version convert_to_version(const std::string& version); diff --git a/src/bindings/python/src/pyopenvino/core/compiled_model.cpp b/src/bindings/python/src/pyopenvino/core/compiled_model.cpp index 9cd0202f32f415..7cca9af077e15a 100644 --- a/src/bindings/python/src/pyopenvino/core/compiled_model.cpp +++ b/src/bindings/python/src/pyopenvino/core/compiled_model.cpp @@ -9,13 +9,9 @@ #include "common.hpp" #include "pyopenvino/core/compiled_model.hpp" -#include "pyopenvino/core/containers.hpp" #include "pyopenvino/core/infer_request.hpp" #include "pyopenvino/utils/utils.hpp" -PYBIND11_MAKE_OPAQUE(Containers::TensorIndexMap); -PYBIND11_MAKE_OPAQUE(Containers::TensorNameMap); - namespace py = pybind11; void regclass_CompiledModel(py::module m) { diff --git a/src/bindings/python/src/pyopenvino/core/containers.cpp b/src/bindings/python/src/pyopenvino/core/containers.cpp deleted file mode 100644 index 8ee414e007a14f..00000000000000 --- a/src/bindings/python/src/pyopenvino/core/containers.cpp +++ /dev/null @@ -1,23 +0,0 @@ -// Copyright (C) 2018-2023 Intel Corporation -// SPDX-License-Identifier: Apache-2.0 -// - -#include "pyopenvino/core/containers.hpp" - -#include - -PYBIND11_MAKE_OPAQUE(Containers::TensorIndexMap); -PYBIND11_MAKE_OPAQUE(Containers::TensorNameMap); - -namespace py = pybind11; - -namespace Containers { - -void regclass_TensorIndexMap(py::module m) { - py::bind_map(m, "TensorIndexMap"); -} - -void regclass_TensorNameMap(py::module m) { - py::bind_map(m, "TensorNameMap"); -} -} // namespace Containers diff --git a/src/bindings/python/src/pyopenvino/core/containers.hpp b/src/bindings/python/src/pyopenvino/core/containers.hpp deleted file mode 100644 index becf2f717847de..00000000000000 --- a/src/bindings/python/src/pyopenvino/core/containers.hpp +++ /dev/null @@ -1,23 +0,0 @@ -// Copyright (C) 2018-2023 Intel Corporation -// SPDX-License-Identifier: Apache-2.0 -// - -#pragma once - -#include -#include -#include - -#include - -#include - -namespace py = pybind11; - -namespace Containers { - using TensorIndexMap = std::map; - using TensorNameMap = std::map; - - void regclass_TensorIndexMap(py::module m); - void regclass_TensorNameMap(py::module m); -} diff --git a/src/bindings/python/src/pyopenvino/core/core.cpp b/src/bindings/python/src/pyopenvino/core/core.cpp index ef94b298c9480f..c110dcd5bd7776 100644 --- a/src/bindings/python/src/pyopenvino/core/core.cpp +++ b/src/bindings/python/src/pyopenvino/core/core.cpp @@ -583,6 +583,21 @@ void regclass_Core(py::module m) { :type extensions: list[openvino.runtime.Extension] )"); + cls.def("get_available_devices", + &ov::Core::get_available_devices, + py::call_guard(), + R"( + Returns devices available for inference Core objects goes over all registered plugins. + + GIL is released while running this function. + + :returns: A list of devices. The devices are returned as: CPU, GPU.0, GPU.1, GNA... + If there more than one device of specific type, they are enumerated with .# suffix. + Such enumerated device can later be used as a device name in all Core methods like: + compile_model, query_model, set_property and so on. + :rtype: list + )"); + cls.def_property_readonly("available_devices", &ov::Core::get_available_devices, py::call_guard(), diff --git a/src/bindings/python/src/pyopenvino/core/infer_request.cpp b/src/bindings/python/src/pyopenvino/core/infer_request.cpp index eb71fd7f953bcf..8be02e8adb86bb 100644 --- a/src/bindings/python/src/pyopenvino/core/infer_request.cpp +++ b/src/bindings/python/src/pyopenvino/core/infer_request.cpp @@ -11,10 +11,7 @@ #include #include "pyopenvino/core/common.hpp" -#include "pyopenvino/core/containers.hpp" - -PYBIND11_MAKE_OPAQUE(Containers::TensorIndexMap); -PYBIND11_MAKE_OPAQUE(Containers::TensorNameMap); +#include "pyopenvino/utils/utils.hpp" namespace py = pybind11; @@ -25,7 +22,7 @@ inline py::dict run_sync_infer(InferRequestWrapper& self) { self.m_request.infer(); *self.m_end_time = Time::now(); } - return Common::outputs_to_dict(self.m_outputs, self.m_request); + return Common::outputs_to_dict(self); } void regclass_InferRequest(py::module m) { @@ -102,7 +99,7 @@ void regclass_InferRequest(py::module m) { cls.def( "set_output_tensors", [](InferRequestWrapper& self, const py::dict& outputs) { - auto outputs_map = Common::cast_to_tensor_index_map(outputs); + auto outputs_map = Common::containers::cast_to_tensor_index_map(outputs); for (auto&& output : outputs_map) { self.m_request.set_output_tensor(output.first, output.second); } @@ -119,7 +116,7 @@ void regclass_InferRequest(py::module m) { cls.def( "set_input_tensors", [](InferRequestWrapper& self, const py::dict& inputs) { - auto inputs_map = Common::cast_to_tensor_index_map(inputs); + auto inputs_map = Common::containers::cast_to_tensor_index_map(inputs); for (auto&& input : inputs_map) { self.m_request.set_input_tensor(input.first, input.second); } @@ -647,21 +644,29 @@ void regclass_InferRequest(py::module m) { :rtype: List[openvino.runtime.ConstOutput] )"); - cls.def_property_readonly("inputs", - &InferRequestWrapper::get_input_tensors, - R"( - Gets all input tensors of this InferRequest. - - :rtype: List[openvino.runtime.Tensor] - )"); + cls.def_property_readonly( + "inputs", + [](InferRequestWrapper& self) { + Common::utils::deprecation_warning("inputs", "2024.0", "Please use 'input_tensors' property instead."); + return self.get_input_tensors(); + }, + R"( + Gets all input tensors of this InferRequest. + + :rtype: List[openvino.runtime.Tensor] + )"); - cls.def_property_readonly("outputs", - &InferRequestWrapper::get_output_tensors, - R"( - Gets all output tensors of this InferRequest. - - :rtype: List[openvino.runtime.Tensor] - )"); + cls.def_property_readonly( + "outputs", + [](InferRequestWrapper& self) { + Common::utils::deprecation_warning("outputs", "2024.0", "Please use 'output_tensors' property instead."); + return self.get_output_tensors(); + }, + R"( + Gets all output tensors of this InferRequest. + + :rtype: List[openvino.runtime.Tensor] + )"); cls.def_property_readonly("input_tensors", &InferRequestWrapper::get_input_tensors, @@ -710,7 +715,7 @@ void regclass_InferRequest(py::module m) { cls.def_property_readonly( "results", [](InferRequestWrapper& self) { - return Common::outputs_to_dict(self.m_outputs, self.m_request); + return Common::outputs_to_dict(self); }, R"( Gets all outputs tensors of this InferRequest. diff --git a/src/bindings/python/src/pyopenvino/core/properties/properties.cpp b/src/bindings/python/src/pyopenvino/core/properties/properties.cpp index 7bca2a50980a88..7c45761700fca9 100644 --- a/src/bindings/python/src/pyopenvino/core/properties/properties.cpp +++ b/src/bindings/python/src/pyopenvino/core/properties/properties.cpp @@ -6,6 +6,7 @@ #include "pyopenvino/core/common.hpp" #include "pyopenvino/graph/any.hpp" +#include "pyopenvino/utils/utils.hpp" namespace py = pybind11; @@ -158,6 +159,31 @@ void regmodule_properties(py::module m) { wrap_property_RO(m_device, ov::device::capabilities, "capabilities"); wrap_property_RO(m_device, ov::device::uuid, "uuid"); + // Special case: ov::device::properties + m_device.def("properties", []() { + return ov::device::properties.name(); + }); + + m_device.def("properties", [](py::args& args) { + ov::AnyMap value = {}; + for (auto v : args) { + if (!py::isinstance(v)) { + throw py::type_error("Incorrect passed value: " + std::string(py::str(v)) + + ", expected dictionary instead of " + typeid(v).name()); + } + auto dict = py::cast(v); + for (auto item : dict) { + if (!py::isinstance(item.first)) { + throw py::type_error("Incorrect passed key in value: " + std::string(py::str(item.first)) + + ", expected string instead of " + typeid(item.first).name()); + } + value[py::cast(item.first)] = + Common::utils::py_object_to_any(py::cast(item.second)); + } + } + return ov::device::properties(value); + }); + // Modules made in pybind cannot easily register attributes, thus workaround is needed. // Let's simulate module with attributes by creating empty proxy class called FakeModuleName. class FakeCapability {}; diff --git a/src/bindings/python/src/pyopenvino/frontend/frontend_module.cmake b/src/bindings/python/src/pyopenvino/frontend/frontend_module.cmake index 33aaa10a6b3b55..d056bbfc1e93b0 100644 --- a/src/bindings/python/src/pyopenvino/frontend/frontend_module.cmake +++ b/src/bindings/python/src/pyopenvino/frontend/frontend_module.cmake @@ -25,7 +25,7 @@ function(frontend_module TARGET FRAMEWORK INSTALL_COMPONENT) target_include_directories(${TARGET_NAME} PRIVATE "${CMAKE_CURRENT_SOURCE_DIR}" "${OpenVINOPython_SOURCE_DIR}/src/pyopenvino/utils/") - target_link_libraries(${TARGET_NAME} PRIVATE openvino::runtime openvino::frontend::${FRAMEWORK}) + target_link_libraries(${TARGET_NAME} PRIVATE openvino::runtime openvino::runtime::dev openvino::frontend::${FRAMEWORK}) set_target_properties(${TARGET_NAME} PROPERTIES INTERPROCEDURAL_OPTIMIZATION_RELEASE ${ENABLE_LTO}) diff --git a/src/bindings/python/src/pyopenvino/graph/any.cpp b/src/bindings/python/src/pyopenvino/graph/any.cpp index 19c9e3c52cb111..a53fffac102119 100644 --- a/src/bindings/python/src/pyopenvino/graph/any.cpp +++ b/src/bindings/python/src/pyopenvino/graph/any.cpp @@ -5,12 +5,19 @@ #include "openvino/core/any.hpp" #include +#include #include "pyopenvino/graph/any.hpp" #include "pyopenvino/utils/utils.hpp" namespace py = pybind11; +namespace { +bool check_key(py::object key, py::object obj) { + return key.is(py::type::of(obj)); +} +}; // namespace + void regclass_graph_Any(py::module m) { py::class_> ov_any(m, "OVAny"); @@ -23,9 +30,7 @@ void regclass_graph_Any(py::module m) { })); ov_any.def("__repr__", [](const ov::Any& self) { - std::stringstream ret; - self.print(ret); - return ret.str(); + return ""; }); ov_any.def("__hash__", [](ov::Any& self) { @@ -62,6 +67,72 @@ void regclass_graph_Any(py::module m) { ov_any.def("__eq__", [](const ov::Any& a, py::object& b) -> bool { return a == ov::Any(Common::utils::py_object_to_any(b)); }); + ov_any.def( + "astype", + [](ov::Any& self, py::object dtype) { + if (check_key(dtype, py::bool_())) { + return py::cast(self.as()); + } else if (check_key(dtype, py::str())) { + return py::cast(self.as()); + } else if (check_key(dtype, py::int_())) { + return py::cast(self.as()); + } else if (check_key(dtype, py::float_())) { + return py::cast(self.as()); + } else if (check_key(dtype, py::dict())) { + return Common::utils::from_ov_any_map_no_leaves(self); + } + std::stringstream str; + str << "Unsupported data type : '" << dtype << "' is passed as an argument."; + OPENVINO_THROW(str.str()); + }, + R"( + Returns runtime attribute casted to defined data type. + + :param dtype: Data type in which runtime attribute will be casted. + :type dtype: Union[bool, int, str, float, dict] + + :return: A runtime attribute. + :rtype: Any + )"); + ov_any.def( + "aslist", + [](ov::Any& self, py::object dtype) { + // before serialization + if (self.is() || dtype.is_none()) { + return py::cast(py::list()); + } else if (self.is>()) { + return py::cast(self.as>()); + } else if (self.is>()) { + return py::cast(self.as>()); + } else if (self.is>()) { + return py::cast(self.as>()); + } else if (self.is>()) { + return py::cast(self.as>()); + } + // after serialization + if (check_key(dtype, py::str())) { + return py::cast(self.as>()); + } else if (check_key(dtype, py::int_())) { + return py::cast(self.as>()); + } else if (check_key(dtype, py::float_())) { + return py::cast(self.as>()); + } else if (check_key(dtype, py::bool_())) { + return py::cast(self.as>()); + } + std::stringstream str; + str << "Unsupported data type : '" << dtype << "' is passed as an argument."; + OPENVINO_THROW(str.str()); + }, + py::arg("dtype") = py::none(), + R"( + Returns runtime attribute as a list with specified data type. + + :param dtype: Data type of a list in which runtime attribute will be casted. + :type dtype: Union[bool, int, str, float] + + :return: A runtime attribute as a list. + :rtype: Union[List[float], List[int], List[str], List[bool]] + )"); ov_any.def( "get", [](const ov::Any& self) -> py::object { diff --git a/src/bindings/python/src/pyopenvino/graph/dimension.cpp b/src/bindings/python/src/pyopenvino/graph/dimension.cpp index fc98155c95ae36..b8e6241bad4a07 100644 --- a/src/bindings/python/src/pyopenvino/graph/dimension.cpp +++ b/src/bindings/python/src/pyopenvino/graph/dimension.cpp @@ -91,6 +91,15 @@ void regclass_graph_Dimension(py::module m) { Return this dimension's min_dimension as integer. This dimension must be dynamic and non-negative. + :return: Value of the dimension. + :rtype: int + )"); + dim.def_property_readonly("min_length", + &ov::Dimension::get_min_length, + R"( + Return this dimension's min_dimension as integer. + This dimension must be dynamic and non-negative. + :return: Value of the dimension. :rtype: int )"); @@ -103,7 +112,15 @@ void regclass_graph_Dimension(py::module m) { :return: Value of the dimension. :rtype: int )"); + dim.def_property_readonly("max_length", + &ov::Dimension::get_max_length, + R"( + Return this dimension's max_dimension as integer. + This dimension must be dynamic and non-negative. + :return: Value of the dimension. + :rtype: int + )"); dim.def("same_scheme", &ov::Dimension::same_scheme, py::arg("dim"), diff --git a/src/bindings/python/src/pyopenvino/graph/discrete_type_info.cpp b/src/bindings/python/src/pyopenvino/graph/discrete_type_info.cpp index 74b81eb62a45cf..cd19fa2a121cbc 100644 --- a/src/bindings/python/src/pyopenvino/graph/discrete_type_info.cpp +++ b/src/bindings/python/src/pyopenvino/graph/discrete_type_info.cpp @@ -9,6 +9,7 @@ #include #include "openvino/core/type.hpp" +#include "pyopenvino/utils/utils.hpp" namespace py = pybind11; @@ -25,20 +26,18 @@ void regclass_graph_DiscreteTypeInfo(py::module m) { discrete_type_info.def(py::self != py::self); discrete_type_info.def_readonly("name", &ov::DiscreteTypeInfo::name); - discrete_type_info.def_readonly("version", &ov::DiscreteTypeInfo::version); discrete_type_info.def_readonly("version_id", &ov::DiscreteTypeInfo::version_id); discrete_type_info.def_readonly("parent", &ov::DiscreteTypeInfo::parent); - discrete_type_info.def("get_version", &ov::DiscreteTypeInfo::get_version); discrete_type_info.def("hash", [](const ov::DiscreteTypeInfo& self) { return self.hash(); }); discrete_type_info.def("__repr__", [](const ov::DiscreteTypeInfo& self) { std::string name = std::string(self.name); - std::string version = std::to_string(self.version); + std::string version = std::string(self.version_id); if (self.parent != nullptr) { - std::string parent_version = std::to_string(self.parent->version); + std::string parent_version = std::string(self.parent->version_id); std::string parent_name = self.parent->name; return ""; diff --git a/src/bindings/python/src/pyopenvino/graph/model.cpp b/src/bindings/python/src/pyopenvino/graph/model.cpp index 6c898713f0647e..acaa8792f0538a 100644 --- a/src/bindings/python/src/pyopenvino/graph/model.cpp +++ b/src/bindings/python/src/pyopenvino/graph/model.cpp @@ -480,6 +480,14 @@ void regclass_graph_Model(py::module m) { :return: ParameterVector containing model parameters. :rtype: ParameterVector )"); + model.def_property_readonly("parameters", + &ov::Model::get_parameters, + R"( + Return the model parameters. + + :return: ParameterVector containing model parameters. + :rtype: ParameterVector + )"); model.def("get_results", &ov::Model::get_results, R"( @@ -488,6 +496,14 @@ void regclass_graph_Model(py::module m) { :return: ResultVector containing model parameters. :rtype: ResultVector )"); + model.def_property_readonly("results", + &ov::Model::get_results, + R"( + Return a list of model outputs. + + :return: ResultVector containing model parameters. + :rtype: ResultVector + )"); model.def("get_result", &ov::Model::get_result, R"( @@ -496,6 +512,14 @@ void regclass_graph_Model(py::module m) { :return: Node object representing result. :rtype: openvino.runtime.Node )"); + model.def_property_readonly("result", + &ov::Model::get_result, + R"( + Return single result. + + :return: Node object representing result. + :rtype: openvino.runtime.Node + )"); model.def("get_result_index", (int64_t(ov::Model::*)(const ov::Output&) const) & ov::Model::get_result_index, py::arg("value"), @@ -561,6 +585,14 @@ void regclass_graph_Model(py::module m) { :rtype: bool )"); + model.def_property_readonly("dynamic", + &ov::Model::is_dynamic, + R"( + Returns true if any of the op's defined in the model + contains partial shape. + + :rtype: bool + )"); model.def("input", (ov::Output(ov::Model::*)()) & ov::Model::input); model.def("input", (ov::Output(ov::Model::*)(size_t)) & ov::Model::input, py::arg("index")); @@ -742,47 +774,41 @@ void regclass_graph_Model(py::module m) { for (size_t i = 0; i < path.size(); i++) { cpp_args[i] = path[i].cast(); } - return Common::utils::from_ov_any(self.get_rt_info(cpp_args)); + return py::cast(self.get_rt_info(cpp_args)); }, py::arg("path"), R"( - Returns runtime attribute. + Returns runtime attribute as a OVAny object. :param path: List of strings which defines a path to runtime info. :type path: List[str] :return: A runtime attribute. - :rtype: Any + :rtype: openvino.runtime.OVAny )"); model.def( "get_rt_info", [](const ov::Model& self, const py::str& path) -> py::object { - return Common::utils::from_ov_any(self.get_rt_info(path.cast())); + return py::cast(self.get_rt_info(path.cast())); }, py::arg("path"), R"( - Returns runtime attribute. + Returns runtime attribute as a OVAny object. :param path: List of strings which defines a path to runtime info. :type path: str :return: A runtime attribute. - :rtype: Any + :rtype: openvino.runtime.OVAny )"); model.def( "has_rt_info", [](const ov::Model& self, const py::list& path) -> bool { - // FIXME: understand why has_rt_info causes Python crash - try { - std::vector cpp_args(path.size()); - for (size_t i = 0; i < path.size(); i++) { - cpp_args[i] = path[i].cast(); - } - self.get_rt_info(cpp_args); - return true; - } catch (ov::Exception&) { - return false; + std::vector cpp_args(path.size()); + for (size_t i = 0; i < path.size(); i++) { + cpp_args[i] = path[i].cast(); } + return self.has_rt_info(cpp_args); }, py::arg("path"), R"( diff --git a/src/bindings/python/src/pyopenvino/graph/node.cpp b/src/bindings/python/src/pyopenvino/graph/node.cpp index 599194ab9b96f0..1e99bff44210e0 100644 --- a/src/bindings/python/src/pyopenvino/graph/node.cpp +++ b/src/bindings/python/src/pyopenvino/graph/node.cpp @@ -304,14 +304,6 @@ void regclass_graph_Node(py::module m) { :return: A dictionary of user defined data. :rtype: openvino.runtime.RTMap )"); - node.def("get_version", - &ov::Node::get_version, - R"( - Returns operation's version of the node. - - :return: Operation version. - :rtype: int - )"); node.def("set_argument", &ov::Node::set_argument); node.def("set_arguments", [](const std::shared_ptr& self, const ov::NodeVector& args) { @@ -326,7 +318,6 @@ void regclass_graph_Node(py::module m) { node.def_property_readonly("rt_info", (PyRTMap & (ov::Node::*)()) & ov::Node::get_rt_info, py::return_value_policy::reference_internal); - node.def_property_readonly("version", &ov::Node::get_version); node.def_property_readonly("type_info", &ov::Node::get_type_info); node.def_property("friendly_name", &ov::Node::get_friendly_name, &ov::Node::set_friendly_name); diff --git a/src/bindings/python/src/pyopenvino/graph/node_factory.cpp b/src/bindings/python/src/pyopenvino/graph/node_factory.cpp index bdf7c982b3e8fd..9aed62c2e00a17 100644 --- a/src/bindings/python/src/pyopenvino/graph/node_factory.cpp +++ b/src/bindings/python/src/pyopenvino/graph/node_factory.cpp @@ -79,7 +79,7 @@ class NodeFactory { return it->second(); } - const ov::OpSet& m_opset = ov::get_opset10(); + const ov::OpSet& m_opset = ov::get_opset11(); std::unordered_map> m_variables; }; } // namespace diff --git a/src/bindings/python/src/pyopenvino/graph/passes/manager.cpp b/src/bindings/python/src/pyopenvino/graph/passes/manager.cpp index 814a9974f3982a..e8ba80d1524544 100644 --- a/src/bindings/python/src/pyopenvino/graph/passes/manager.cpp +++ b/src/bindings/python/src/pyopenvino/graph/passes/manager.cpp @@ -53,114 +53,4 @@ void regclass_passes_Manager(py::module m) { :param transformation: transformation instance. :type transformation: openvino.runtime.passes.PassBase )"); - - manager.def( - "register_pass", - [](ov::pass::Manager& self, const std::string& pass_name) -> void { - Common::utils::deprecation_warning("register_pass(pass_name)", - "", - "Please use register_pass(ConstantFolding()) instead."); - if (pass_name == "ConstantFolding") { - self.register_pass(); - } - }, - py::arg("pass_name"), - R"( - This method is deprecated. Please use m.register_pass(ConstantFolding()) instead. - - Register pass by name from the list of predefined passes. - - :param pass_name: String to set the type of a pass. - :type pass_name: str - )"); - - manager.def( - "register_pass", - [](ov::pass::Manager& self, - const std::string& pass_name, - const FilePaths& file_paths, - const std::string& version) -> void { - Common::utils::deprecation_warning("register_pass(pass_name, output_files, version)", - "", - "Please use register_pass(Serialize(xml, bin, version)) instead."); - if (pass_name == "Serialize") { - self.register_pass(file_paths.first, - file_paths.second, - Common::utils::convert_to_version(version)); - } - }, - py::arg("pass_name"), - py::arg("output_files"), - py::arg("version") = "UNSPECIFIED", - R"( - This method is deprecated. Please use m.register_pass(Serialize(...)) instead. - - Set the type of register pass for pass manager. - - :param pass_name: String to set the type of a pass. - :type pass_name: str - :param output_files: Tuple which contains paths where .xml and .bin files will be saved. - :type output_files: Tuple[str, str] - :param version: Sets the version of the IR which will be generated. - Supported versions are: - - "UNSPECIFIED" (default) : Use the latest or function version - - "IR_V10" : v10 IR - - "IR_V11" : v11 IR - :type version: str - - Examples - ---------- - 1. Default Version - pass_manager = Manager() - pass_manager.register_pass("Serialize", output_files=("example.xml", "example.bin")) - 2. IR version 11 - pass_manager = Manager() - pass_manager.register_pass("Serialize", output_files=("example.xml", "example.bin"), version="IR_V11") - )"); - - manager.def( - "register_pass", - [](ov::pass::Manager& self, - const std::string& pass_name, - const std::string& xml_path, - const std::string& bin_path, - const std::string& version) -> void { - Common::utils::deprecation_warning("register_pass(pass_name, xml_path, bin_path, version", - "", - "Please use register_pass(Serialize(xml, bin, version)) instead."); - if (pass_name == "Serialize") { - self.register_pass(xml_path, bin_path, Common::utils::convert_to_version(version)); - } - }, - py::arg("pass_name"), - py::arg("xml_path"), - py::arg("bin_path"), - py::arg("version") = "UNSPECIFIED", - R"( - This method is deprecated. Please use m.register_pass(Serialize(...)) instead. - - Set the type of register pass for pass manager. - - :param pass_name: String to set the type of a pass. - :type pass_name: str - :param xml_path: Path where *.xml file will be saved. - :type xml_path: str - :param bin_path: Path where *.bin file will be saved. - :type bin_path: str - :param version: Sets the version of the IR which will be generated. - Supported versions are: - - "UNSPECIFIED" (default) : Use the latest or function version - - "IR_V10" : v10 IR - - "IR_V11" : v11 IR - :type version: str - - Examples - ---------- - 1. Default Version - pass_manager = Manager() - pass_manager.register_pass("Serialize", xml_path="example.xml", bin_path="example.bin") - 2. IR version 11 - pass_manager = Manager() - pass_manager.register_pass("Serialize", xml_path="example.xml", bin_path="example.bin", version="IR_V11") - )"); } diff --git a/src/bindings/python/src/pyopenvino/graph/rt_map.cpp b/src/bindings/python/src/pyopenvino/graph/rt_map.cpp index 5985c87f06136f..e666b3972e605c 100644 --- a/src/bindings/python/src/pyopenvino/graph/rt_map.cpp +++ b/src/bindings/python/src/pyopenvino/graph/rt_map.cpp @@ -10,6 +10,7 @@ #include #include "dict_attribute_visitor.hpp" +#include "meta_data.hpp" #include "openvino/core/node.hpp" #include "openvino/core/runtime_attribute.hpp" #include "openvino/op/add.hpp" @@ -27,11 +28,48 @@ using PyRTMap = ov::RTMap; PYBIND11_MAKE_OPAQUE(PyRTMap); +// Create our custom iterator to return python object not OVAny itself. +class PyRTMapIterator { +public: + PyRTMapIterator(const PyRTMap& py_rt_map, py::object ref, bool is_value) + : py_rt_map(py_rt_map), + is_value(is_value), + ref(ref), + it(py_rt_map.cbegin()) {} + + py::object next() { + if (it == py_rt_map.end()) { + throw py::stop_iteration(); + } + const auto result = *it; + it++; + if (is_value) { + return Common::utils::from_ov_any_no_leaves(result.second); + } else { + std::pair res = {result.first, + Common::utils::from_ov_any_no_leaves(result.second)}; + return py::cast(res); + } + } + + const PyRTMap& py_rt_map; + bool is_value = false; + py::object ref; // keep a reference + std::map::const_iterator it; +}; + void regclass_graph_PyRTMap(py::module m) { auto py_map = py::class_(m, "RTMap"); py_map.doc() = "openvino.runtime.RTMap makes bindings for std::map, which can later be used as ov::Node::RTMap"; + py::class_(m, "Iterator") + .def("__iter__", + [](PyRTMapIterator& it) -> PyRTMapIterator& { + return it; + }) + .def("__next__", &PyRTMapIterator::next); + py_map.def("__setitem__", [](PyRTMap& m, const std::string& k, const std::string v) { m[k] = v; }); @@ -39,7 +77,7 @@ void regclass_graph_PyRTMap(py::module m) { m[k] = v; }); py_map.def("__getitem__", [](PyRTMap& m, const std::string& k) -> py::object { - return Common::utils::from_ov_any(m[k]); + return Common::utils::from_ov_any_no_leaves(m[k]); }); py_map.def( "__bool__", @@ -50,20 +88,28 @@ void regclass_graph_PyRTMap(py::module m) { py_map.def( "__iter__", - [](PyRTMap& m) { - return py::make_key_iterator(m.begin(), m.end()); + [](PyRTMap& rt_map) { + return py::make_key_iterator(rt_map.begin(), rt_map.end()); }, py::keep_alive<0, 1>() /* Essential: keep list alive while iterator exists */ ); py_map.def( - "items", - [](PyRTMap& m) { - return py::make_iterator(m.begin(), m.end()); + "keys", + [](PyRTMap& rt_map) { + return py::make_key_iterator(rt_map.begin(), rt_map.end()); }, py::keep_alive<0, 1>() /* Essential: keep list alive while iterator exists */ ); + py_map.def("items", [](py::object rt_map) { + return PyRTMapIterator(rt_map.cast(), rt_map, false); + }); + + py_map.def("values", [](py::object rt_map) { + return PyRTMapIterator(rt_map.cast(), rt_map, true); + }); + py_map.def("__contains__", [](PyRTMap& m, const std::string& k) -> bool { auto it = m.find(k); if (it == m.end()) diff --git a/src/bindings/python/src/pyopenvino/graph/types/element_type.cpp b/src/bindings/python/src/pyopenvino/graph/types/element_type.cpp index 0123dd780e2c1c..41524dfd1b6bad 100644 --- a/src/bindings/python/src/pyopenvino/graph/types/element_type.cpp +++ b/src/bindings/python/src/pyopenvino/graph/types/element_type.cpp @@ -68,11 +68,17 @@ void regclass_graph_Type(py::module m) { type.def("is_static", &ov::element::Type::is_static); type.def("is_dynamic", &ov::element::Type::is_dynamic); type.def("is_real", &ov::element::Type::is_real); + type.def_property_readonly("real", &ov::element::Type::is_real); type.def("is_integral", &ov::element::Type::is_integral); + type.def_property_readonly("integral", &ov::element::Type::is_integral); type.def("is_integral_number", &ov::element::Type::is_integral_number); + type.def_property_readonly("integral_number", &ov::element::Type::is_integral_number); type.def("is_signed", &ov::element::Type::is_signed); + type.def_property_readonly("signed", &ov::element::Type::is_signed); type.def("is_quantized", &ov::element::Type::is_quantized); + type.def_property_readonly("quantized", &ov::element::Type::is_quantized); type.def("get_type_name", &ov::element::Type::get_type_name); + type.def_property_readonly("type_name", &ov::element::Type::get_type_name); type.def("compatible", &ov::element::Type::compatible, py::arg("other"), @@ -121,5 +127,7 @@ void regclass_graph_Type(py::module m) { )"); type.def_property_readonly("size", &ov::element::Type::size); + type.def("get_size", &ov::element::Type::size); type.def_property_readonly("bitwidth", &ov::element::Type::bitwidth); + type.def("get_bitwidth", &ov::element::Type::bitwidth); } diff --git a/src/bindings/python/src/pyopenvino/pyopenvino.cpp b/src/bindings/python/src/pyopenvino/pyopenvino.cpp index a229f9eaa7d72e..0f2cdf38278010 100644 --- a/src/bindings/python/src/pyopenvino/pyopenvino.cpp +++ b/src/bindings/python/src/pyopenvino/pyopenvino.cpp @@ -24,7 +24,6 @@ #endif #include "pyopenvino/core/async_infer_queue.hpp" #include "pyopenvino/core/compiled_model.hpp" -#include "pyopenvino/core/containers.hpp" #include "pyopenvino/core/core.hpp" #include "pyopenvino/core/extension.hpp" #include "pyopenvino/core/infer_request.hpp" @@ -210,9 +209,6 @@ PYBIND11_MODULE(_pyopenvino, m) { regclass_Core(m); regclass_Tensor(m); - // Registering specific types of containers - Containers::regclass_TensorIndexMap(m); - Containers::regclass_TensorNameMap(m); regclass_CompiledModel(m); regclass_InferRequest(m); diff --git a/src/bindings/python/src/pyopenvino/utils/utils.cpp b/src/bindings/python/src/pyopenvino/utils/utils.cpp index f5e1b5f9e0552b..12f08410a67271 100644 --- a/src/bindings/python/src/pyopenvino/utils/utils.cpp +++ b/src/bindings/python/src/pyopenvino/utils/utils.cpp @@ -12,6 +12,7 @@ #include #include "Python.h" +#include "meta_data.hpp" #include "openvino/core/except.hpp" #include "openvino/frontend/decoder.hpp" @@ -20,12 +21,57 @@ using Version = ov::pass::Serialize::Version; namespace Common { namespace utils { +// For complex structure if an element isn't map, then just cast it to OVAny +py::object from_ov_any_no_leaves(const ov::Any& any) { + if (any.is>() || any.is()) { + return Common::utils::from_ov_any_map_no_leaves(any); + } else { + return py::cast(any); + } +} + +// Recursively go through dict to unwrap nested dicts and keep leaves as OVAny. +py::object from_ov_any_map_no_leaves(const ov::Any& any) { + const auto traverse_map = [](const ov::AnyMap& map) { + const auto unwrap_only_maps = [](const ov::Any& any) { + if (any.is>()) { + const ov::AnyMap& as_map = *any.as>(); + return from_ov_any_map_no_leaves(as_map); + } else if (any.is()) { + return from_ov_any_map_no_leaves(any.as()); + } + return py::cast(any); + }; + + std::map result; + for (const auto& entry : map) { + result[entry.first] = unwrap_only_maps(entry.second); + } + return py::cast(result); + }; + + if (any.is>()) { + const ov::AnyMap& as_map = *any.as>(); + return traverse_map(as_map); + } else if (any.is()) { + return traverse_map(any.as()); + } + OPENVINO_THROW("Only ov::AnyMap or ov::Meta are expected here."); +} + +py::object from_ov_any_map(const ov::AnyMap& map) { + std::map result; + for (const auto& entry : map) { + result[entry.first] = from_ov_any(entry.second); + } + return py::cast(result); +} + py::object from_ov_any(const ov::Any& any) { // Check for py::object if (any.is()) { return any.as(); - } - // Check for std::string + } // Check for std::string else if (any.is()) { return py::cast(any.as().c_str()); } @@ -98,6 +144,13 @@ py::object from_ov_any(const ov::Any& any) { // Check for std::map else if (any.is>()) { return py::cast(any.as>()); + } // Check for ov::AnyMap (std::map) + else if (any.is()) { + return from_ov_any_map(any.as()); + } + // Check for std::map { + else if (any.is>()) { + return py::cast(any.as>()); } // Check for std::vector else if (any.is>()) { @@ -109,6 +162,9 @@ py::object from_ov_any(const ov::Any& any) { PyDict_SetItemString(dict, property_name.c_str(), PyUnicode_FromString(mutability.c_str())); } return py::cast(dict); + } else if (any.is>()) { + const ov::AnyMap& as_map = *any.as>(); + return from_ov_any_map(as_map); } else if (any.is()) { return py::cast(any.as()); } else if (any.is()) { @@ -194,6 +250,33 @@ void deprecation_warning(const std::string& function_name, const std::string& ve PyErr_WarnEx(PyExc_DeprecationWarning, ss.str().data(), 2); } +bool py_object_is_any_map(const py::object& py_obj) { + if (!py::isinstance(py_obj)) { + return false; + } + auto dict = py::cast(py_obj); + return std::all_of(dict.begin(), dict.end(), [&](const std::pair& elem) { + return py::isinstance(elem.first); + }); +} + +ov::AnyMap py_object_to_any_map(const py::object& py_obj) { + OPENVINO_ASSERT(py_object_is_any_map(py_obj), "Unsupported attribute type."); + ov::AnyMap return_value = {}; + for (auto& item : py::cast(py_obj)) { + std::string key = py::cast(item.first); + py::object value = py::cast(item.second); + if (py::isinstance(value)) { + return_value[key] = py::cast(value); + } else if (py_object_is_any_map(value)) { + return_value[key] = Common::utils::py_object_to_any_map(value); + } else { + return_value[key] = Common::utils::py_object_to_any(value); + } + } + return return_value; +} + ov::Any py_object_to_any(const py::object& py_obj) { // Python types if (py::isinstance(py_obj)) { @@ -227,9 +310,8 @@ ov::Any py_object_to_any(const py::object& py_obj) { } } - // In case of empty vector works like with vector of strings if (_list.empty()) - return _list.cast>(); + return ov::Any(EmptyList()); switch (detected_type) { case PY_TYPE::STR: @@ -244,6 +326,8 @@ ov::Any py_object_to_any(const py::object& py_obj) { OPENVINO_ASSERT(false, "Unsupported attribute type."); } // OV types + } else if (py_object_is_any_map(py_obj)) { + return py_object_to_any_map(py_obj); } else if (py::isinstance(py_obj)) { return py::cast(py_obj); } else if (py::isinstance(py_obj)) { diff --git a/src/bindings/python/src/pyopenvino/utils/utils.hpp b/src/bindings/python/src/pyopenvino/utils/utils.hpp index ff6da6558c8694..328f06820033f8 100644 --- a/src/bindings/python/src/pyopenvino/utils/utils.hpp +++ b/src/bindings/python/src/pyopenvino/utils/utils.hpp @@ -15,6 +15,14 @@ namespace py = pybind11; namespace Common { namespace utils { + struct EmptyList {}; + + py::object from_ov_any_no_leaves(const ov::Any& any); + + py::object from_ov_any_map_no_leaves(const ov::Any& almost_map); + + py::object from_ov_any_map(const ov::AnyMap& map); + py::object from_ov_any(const ov::Any& any); std::map properties_to_any_map(const std::map& properties); @@ -23,6 +31,10 @@ namespace utils { void deprecation_warning(const std::string& function_name, const std::string& version = std::string(), const std::string& message = std::string()); + bool py_object_is_any_map(const py::object& py_obj); + + ov::AnyMap py_object_to_any_map(const py::object& py_obj); + ov::Any py_object_to_any(const py::object& py_obj); ov::pass::Serialize::Version convert_to_version(const std::string& version); diff --git a/src/bindings/python/tests/__init__.py b/src/bindings/python/tests/__init__.py index 06d8dfb043480f..a426ce8424ec71 100644 --- a/src/bindings/python/tests/__init__.py +++ b/src/bindings/python/tests/__init__.py @@ -117,7 +117,6 @@ def xfail_test(reason="Mark the test as expected to fail", strict=True): xfail_issue_63033 = xfail_test(reason="BatchNormalization: Training mode is not supported") xfail_issue_63036 = xfail_test(reason="Changes in ConvTranspose padding") -xfail_issue_63039 = xfail_test(reason="Result mismatches with UINT8 operations") xfail_issue_63043 = xfail_test(reason="Recurrent node expects constants as W, R, B inputs.") skip_rng_tests = pytest.mark.skip(reason="Tests use random number generator with no seed.") diff --git a/src/bindings/python/tests/test_graph/test_any.py b/src/bindings/python/tests/test_graph/test_any.py index 8deb5df899b9eb..4a8643a7586189 100644 --- a/src/bindings/python/tests/test_graph/test_any.py +++ b/src/bindings/python/tests/test_graph/test_any.py @@ -33,18 +33,18 @@ def test_any_list(values, data_type): assert ovany.get() == values -@pytest.mark.parametrize(("value_dict", "data_type"), [ - ({"key": "value"}, str), - ({21: 37}, int), - ({21.0: 37.0}, float), +@pytest.mark.parametrize(("value_dict", "value_type", "data_type"), [ + ({"key": "value"}, str, str), + ({21: 37}, int, int), + ({21.0: 37.0}, float, float), ]) -def test_any_dict(value_dict, data_type): +def test_any_dict(value_dict, value_type, data_type): ovany = OVAny(value_dict) key = list(value_dict.keys())[0] assert isinstance(ovany.value, dict) assert ovany[key] == list(value_dict.values())[0] assert len(ovany.value) == 1 - assert type(ovany.value[key]) == data_type + assert type(ovany.value[key]) == value_type assert type(list(value_dict.values())[0]) == data_type assert ovany.get() == value_dict @@ -65,3 +65,26 @@ def __init__(self): value = OVAny(TestClass()) assert isinstance(value.value, TestClass) assert value.value.text == "test" + + +@pytest.mark.parametrize(("value", "dtype"), [ + ("some_value", str), + (31.23456, float), + (True, bool), + (42, int), +]) +def test_astype(value, dtype): + ovany = OVAny(value) + assert ovany.astype(dtype) == value + + +@pytest.mark.parametrize(("value", "dtype"), [ + (["some_value", "another value"], str), + ([31.23456, -31.3453], float), + ([True, False], bool), + ([42, 21], int), + ([], None), +]) +def test_aslist(value, dtype): + ovany = OVAny(value) + assert ovany.aslist(dtype) == value diff --git a/src/bindings/python/tests/test_graph/test_basic.py b/src/bindings/python/tests/test_graph/test_basic.py index 67bb1f1afad7c5..b4cc21edb27150 100644 --- a/src/bindings/python/tests/test_graph/test_basic.py +++ b/src/bindings/python/tests/test_graph/test_basic.py @@ -537,13 +537,6 @@ def test_sink_function_ctor(): assert function.get_friendly_name() == "TestModel" -def test_node_version(): - node = ops.add([1], [2]) - - assert node.get_version() == 1 - assert node.version == 1 - - def test_strides_iteration_methods(): data = np.array([1, 2, 3]) strides = Strides(data) diff --git a/src/bindings/python/tests/test_graph/test_core.py b/src/bindings/python/tests/test_graph/test_core.py index bd02af0fe69089..57e0d26252eec4 100644 --- a/src/bindings/python/tests/test_graph/test_core.py +++ b/src/bindings/python/tests/test_graph/test_core.py @@ -369,10 +369,10 @@ def test_discrete_type_info(): assert n1.get_type_info().name == "TopK" assert n3.get_type_info().name == "Sin" assert n1.type_info.name == n2.type_info.name - assert n1.type_info.version == n2.type_info.version + assert n1.type_info.version_id == n2.type_info.version_id assert n1.type_info.parent == n2.type_info.parent assert n1.get_type_info().name == n2.get_type_info().name - assert n1.get_type_info().version == n2.get_type_info().version + assert n1.get_type_info().version_id == n2.get_type_info().version_id assert n1.get_type_info().parent == n2.get_type_info().parent assert n1.get_type_info().name != n3.get_type_info().name assert n1.get_type_info().name > n3.get_type_info().name diff --git a/src/bindings/python/tests/test_graph/test_create_op.py b/src/bindings/python/tests/test_graph/test_create_op.py index 120f07562d4457..f76ed01641a6d5 100644 --- a/src/bindings/python/tests/test_graph/test_create_op.py +++ b/src/bindings/python/tests/test_graph/test_create_op.py @@ -11,7 +11,8 @@ import openvino.runtime.opset1 as ov_opset1 import openvino.runtime.opset5 as ov_opset5 -import openvino.runtime.opset10 as ov +import openvino.runtime.opset10 as ov_opset10 +import openvino.runtime.opset11 as ov from openvino.runtime import Type np_types = [np.float32, np.int32] @@ -2145,8 +2146,29 @@ def test_interpolate_opset10(dtype, expected_shape, shape_calculation_mode): axes = [2, 3] mode = "cubic" - node = ov.interpolate(image=image_node, output_shape=output_shape, scales=scales, - axes=axes, mode=mode, + node = ov_opset10.interpolate(image=image_node, output_shape=output_shape, scales=scales, + axes=axes, mode=mode, shape_calculation_mode=shape_calculation_mode) + assert node.get_type_name() == "Interpolate" + assert node.get_output_size() == 1 + assert list(node.get_output_shape(0)) == expected_shape + + +@pytest.mark.parametrize( + ("expected_shape", "shape_calculation_mode", "input_value"), + [ + ([1, 3, 64, 64], "scales", np.array([1 / 16, 1 / 16], dtype=np.float32)), + ([1, 3, 256, 256], "sizes", np.array([256, 256], dtype=np.int32)), + ], +) +@pytest.mark.parametrize("dtype", np_types) +def test_interpolate_opset11(dtype, expected_shape, shape_calculation_mode, input_value): + + image_shape = [1, 3, 1024, 1024] + image_node = ov.parameter(image_shape, dtype, name="Image") + axes = [2, 3] + mode = "bilinear_pillow" + + node = ov.interpolate(image=image_node, scales_or_sizes=input_value, axes=axes, mode=mode, shape_calculation_mode=shape_calculation_mode) assert node.get_type_name() == "Interpolate" assert node.get_output_size() == 1 diff --git a/src/bindings/python/tests/test_onnx/test_backend.py b/src/bindings/python/tests/test_onnx/test_backend.py index c681f376348142..dc30a9bda3806b 100644 --- a/src/bindings/python/tests/test_onnx/test_backend.py +++ b/src/bindings/python/tests/test_onnx/test_backend.py @@ -37,7 +37,6 @@ xfail_issue_58033, xfail_issue_63033, xfail_issue_63036, - xfail_issue_63039, xfail_issue_63043, xfail_issue_63137, xfail_issue_63138, @@ -278,10 +277,6 @@ def expect_fail(test_case_path, xfail): # type: (str) -> None "OnnxBackendNodeModelTest.test_batchnorm_example_training_mode_cpu", ), (xfail_issue_63036, "OnnxBackendNodeModelTest.test_convtranspose_autopad_same_cpu"), - ( - xfail_issue_63039, - "OnnxBackendNodeModelTest.test_div_uint8_cpu", - ), ( xfail_issue_63043, "OnnxBackendNodeModelTest.test_gru_batchwise_cpu", diff --git a/src/bindings/python/tests/test_runtime/test_core.py b/src/bindings/python/tests/test_runtime/test_core.py index 15c6a2ed553eb1..87709aa443316c 100644 --- a/src/bindings/python/tests/test_runtime/test_core.py +++ b/src/bindings/python/tests/test_runtime/test_core.py @@ -176,11 +176,13 @@ def test_get_version(device): def test_available_devices(device): core = Core() - devices = core.available_devices - assert device in devices, ( - f"Current device '{device}' is not listed in " - f"available devices '{', '.join(devices)}'" - ) + devices_attr = core.available_devices + devices_method = core.get_available_devices() + for devices in (devices_attr, devices_method): + assert device in devices, ( + f"Current device '{device}' is not listed in " + f"available devices '{', '.join(devices)}'" + ) def test_get_property(device): diff --git a/src/bindings/python/tests/test_runtime/test_dimension.py b/src/bindings/python/tests/test_runtime/test_dimension.py new file mode 100644 index 00000000000000..697e11555590b0 --- /dev/null +++ b/src/bindings/python/tests/test_runtime/test_dimension.py @@ -0,0 +1,63 @@ +# -*- coding: utf-8 -*- +# Copyright (C) 2018-2023 Intel Corporation +# SPDX-License-Identifier: Apache-2.0 + +from openvino.runtime import Dimension + + +def test_dynamic_dimension(): + dim = Dimension() + assert dim.is_dynamic + assert str(dim) == "?" + assert dim.to_string() == "?" + assert str(dim.__repr__) == ">" + + +def test_dynamic_dimension_with_bounds(): + dim = Dimension(2, 5) + assert str(dim) == "2..5" + assert dim.to_string() == "2..5" + assert not dim.is_static + assert dim.is_dynamic + assert dim.get_min_length() == 2 + assert dim.min_length == 2 + assert dim.get_max_length() == 5 + assert dim.max_length == 5 + + +def test_static_dimension(): + dim = Dimension(2) + assert str(dim) == "2" + assert dim.to_string() == "2" + assert dim.is_static + assert not dim.is_dynamic + assert len(dim) == 2 + assert dim.get_length() == 2 + + +def test_dim_same_scheme(): + assert Dimension().same_scheme(Dimension()) is True + assert Dimension(3).same_scheme(Dimension(3)) is True + assert Dimension(3).same_scheme(Dimension(4)) is False + assert Dimension().same_scheme(Dimension(4)) is False + + +def test_dim_compatible(): + assert Dimension().compatible(Dimension()) is True + assert Dimension(3).compatible(Dimension(3)) is True + assert Dimension(3).compatible(Dimension(4)) is False + assert Dimension().compatible(Dimension(4)) is True + + +def test_dim_relax(): + assert Dimension().relaxes(Dimension()) is True + assert Dimension(3).relaxes(Dimension(3)) is True + assert Dimension(3).relaxes(Dimension(4)) is False + assert Dimension().relaxes(Dimension(4)) is True + + +def test_dim_refine(): + assert Dimension().refines(Dimension()) is True + assert Dimension(3).refines(Dimension(3)) is True + assert Dimension(3).refines(Dimension(4)) is False + assert Dimension().refines(Dimension(4)) is False diff --git a/src/bindings/python/tests/test_runtime/test_infer_request.py b/src/bindings/python/tests/test_runtime/test_infer_request.py index 971208b8a75cd5..4755fde6a77ca5 100644 --- a/src/bindings/python/tests/test_runtime/test_infer_request.py +++ b/src/bindings/python/tests/test_runtime/test_infer_request.py @@ -267,7 +267,7 @@ def test_batched_tensors(device): assert np.array_equal(actual[idx], _tmp) -def test_inputs_outputs_property(device): +def test_inputs_outputs_property_and_method(device): num_inputs = 10 input_shape = [1] params = [ops.parameter(input_shape, np.uint8) for _ in range(num_inputs)] @@ -277,10 +277,14 @@ def test_inputs_outputs_property(device): request = compiled_model.create_infer_request() data = [np.atleast_1d(i) for i in range(num_inputs)] results = request.infer(data).values() - for result, output_tensor in zip(results, request.outputs): + for result, output_tensor in zip(results, request.output_tensors): assert np.array_equal(result, output_tensor.data) - for input_data, input_tensor in zip(data, request.inputs): + for input_data, input_tensor in zip(data, request.input_tensors): assert np.array_equal(input_data, input_tensor.data) + for input_tensor in request.input_tensors: + assert list(input_tensor.get_shape()) == input_shape + for output_tensor in request.output_tensors: + assert list(output_tensor.get_shape()) == input_shape @pytest.mark.skip(reason="Sporadically failed. Need further investigation. Ticket - 95967") @@ -395,7 +399,7 @@ def test_infer_mixed_values(device, ov_type, numpy_dtype, shared_flag): request.infer([tensor1, array1], shared_memory=shared_flag) - assert np.array_equal(request.outputs[0].data, np.concatenate((tensor1.data, array1))) + assert np.array_equal(request.output_tensors[0].data, np.concatenate((tensor1.data, array1))) @pytest.mark.parametrize(("ov_type", "numpy_dtype"), [ @@ -419,8 +423,7 @@ def test_async_mixed_values(device, ov_type, numpy_dtype, shared_flag): request.start_async([tensor1, array1], shared_memory=shared_flag) request.wait() - - assert np.array_equal(request.outputs[0].data, np.concatenate((tensor1.data, array1))) + assert np.array_equal(request.output_tensors[0].data, np.concatenate((tensor1.data, array1))) @pytest.mark.parametrize(("ov_type", "numpy_dtype"), [ @@ -512,6 +515,7 @@ def test_infer_queue_iteration(device): it = iter(infer_queue) infer_request = next(it) assert isinstance(infer_request, InferRequest) + assert infer_request.userdata is None with pytest.raises(StopIteration): next(it) @@ -1092,6 +1096,8 @@ def test_mixed_scalar_infer(device, shared_flag, input_data): ]) def test_mixed_dynamic_infer(device, shared_flag, input_data): core = Core() + if device == "CPU" and "Intel" not in core.get_property(device, "FULL_DEVICE_NAME"): + pytest.skip("This test fails on ARM plugin because it doesn't support dynamic shapes.") param0 = ops.parameter([], np.float32, name="data0") param1 = ops.parameter(["?"], np.float32, name="data1") add = ops.add(param0, param1, name="add") diff --git a/src/bindings/python/tests/test_runtime/test_model.py b/src/bindings/python/tests/test_runtime/test_model.py index b13e5015275403..e0af9cbda469be 100644 --- a/src/bindings/python/tests/test_runtime/test_model.py +++ b/src/bindings/python/tests/test_runtime/test_model.py @@ -5,6 +5,7 @@ import os import numpy as np import pytest +import math import openvino.runtime.opset8 as ops from openvino.runtime import ( @@ -48,8 +49,10 @@ def test_function_add_outputs_tensor_name(): relu2 = ops.relu(relu1, name="relu2") function = Model(relu2, [param], "TestFunction") assert len(function.get_results()) == 1 + assert len(function.results) == 1 new_outs = function.add_outputs("relu_t1") assert len(function.get_results()) == 2 + assert len(function.results) == 2 assert "relu_t1" in function.outputs[1].get_tensor().names assert len(new_outs) == 1 assert new_outs[0].get_node() == function.outputs[1].get_node() @@ -64,8 +67,10 @@ def test_function_add_outputs_op_name(): relu2 = ops.relu(relu1, name="relu2") function = Model(relu2, [param], "TestFunction") assert len(function.get_results()) == 1 + assert len(function.results) == 1 new_outs = function.add_outputs(("relu1", 0)) assert len(function.get_results()) == 2 + assert len(function.results) == 2 assert len(new_outs) == 1 assert new_outs[0].get_node() == function.outputs[1].get_node() assert new_outs[0].get_index() == function.outputs[1].get_index() @@ -78,9 +83,9 @@ def test_function_add_output_port(): relu1.get_output_tensor(0).set_names({"relu_t1"}) relu2 = ops.relu(relu1, name="relu2") function = Model(relu2, [param], "TestFunction") - assert len(function.get_results()) == 1 + assert len(function.results) == 1 new_outs = function.add_outputs(relu1.output(0)) - assert len(function.get_results()) == 2 + assert len(function.results) == 2 assert len(new_outs) == 1 assert new_outs[0].get_node() == function.outputs[1].get_node() assert new_outs[0].get_index() == function.outputs[1].get_index() @@ -94,6 +99,7 @@ def test_function_add_output_incorrect_tensor_name(): relu2 = ops.relu(relu1, name="relu2") function = Model(relu2, [param], "TestFunction") assert len(function.get_results()) == 1 + assert len(function.results) == 1 with pytest.raises(RuntimeError) as e: function.add_outputs("relu_t") # Verify that absent output name is present in error message @@ -108,6 +114,7 @@ def test_function_add_output_incorrect_idx(): relu2 = ops.relu(relu1, name="relu2") function = Model(relu2, [param], "TestFunction") assert len(function.get_results()) == 1 + assert len(function.results) == 1 with pytest.raises(RuntimeError) as e: function.add_outputs(("relu1", 1234)) # Verify that op name and port number are present in error message @@ -123,6 +130,7 @@ def test_function_add_output_incorrect_name(): relu2 = ops.relu(relu1, name="relu2") function = Model(relu2, [param], "TestFunction") assert len(function.get_results()) == 1 + assert len(function.results) == 1 with pytest.raises(RuntimeError) as e: function.add_outputs(("relu_1", 0)) # Verify that absent op name is present in error message @@ -139,8 +147,10 @@ def test_add_outputs_several_tensors(): relu3 = ops.relu(relu2, name="relu3") function = Model(relu3, [param], "TestFunction") assert len(function.get_results()) == 1 + assert len(function.results) == 1 new_outs = function.add_outputs(["relu_t1", "relu_t2"]) assert len(function.get_results()) == 3 + assert len(function.results) == 3 assert len(new_outs) == 2 assert new_outs[0].get_node() == function.outputs[1].get_node() assert new_outs[0].get_index() == function.outputs[1].get_index() @@ -158,8 +168,10 @@ def test_add_outputs_several_ports(): relu3 = ops.relu(relu2, name="relu3") function = Model(relu3, [param], "TestFunction") assert len(function.get_results()) == 1 + assert len(function.results) == 1 new_outs = function.add_outputs([("relu1", 0), ("relu2", 0)]) assert len(function.get_results()) == 3 + assert len(function.results) == 3 assert len(new_outs) == 2 assert new_outs[0].get_node() == function.outputs[1].get_node() assert new_outs[0].get_index() == function.outputs[1].get_index() @@ -175,6 +187,7 @@ def test_add_outputs_incorrect_value(): relu2 = ops.relu(relu1, name="relu2") function = Model(relu2, [param], "TestFunction") assert len(function.get_results()) == 1 + assert len(function.results) == 1 with pytest.raises(TypeError) as e: function.add_outputs(0) assert "Incorrect type of a value to add as output." in str(e.value) @@ -187,6 +200,7 @@ def test_add_outputs_incorrect_outputs_list(): relu1.get_output_tensor(0).set_names({"relu_t1"}) function = Model(relu1, [param], "TestFunction") assert len(function.get_results()) == 1 + assert len(function.results) == 1 with pytest.raises(TypeError) as e: function.add_outputs([0, 0]) assert "Incorrect type of a value to add as output at index 0" in str(e.value) @@ -283,6 +297,9 @@ def test_get_batch(): param = model.get_parameters()[0] param.set_layout(Layout("NC")) assert get_batch(model) == 2 + param = model.parameters[0] + param.set_layout(Layout("NC")) + assert get_batch(model) == 2 def test_get_batch_chwn(): @@ -292,41 +309,53 @@ def test_get_batch_chwn(): add = ops.add(param1, param2) add2 = ops.add(add, param3) model = Model(add2, [param1, param2, param3], "TestFunction") - param = model.get_parameters()[0] - param.set_layout(Layout("CHWN")) + param_method = model.get_parameters()[0] + param_attr = model.parameters[0] + param_method.set_layout(Layout("CHWN")) + param_attr.set_layout(Layout("CHWN")) assert get_batch(model) == 4 def test_set_batch_dimension(): model = generate_add_model() - model_param1 = model.get_parameters()[0] - model_param2 = model.get_parameters()[1] + model_param1_method = model.get_parameters()[0] + model_param2_method = model.get_parameters()[1] + model_param1_attr = model.parameters[0] + model_param2_attr = model.parameters[1] # check batch == 2 - model_param1.set_layout(Layout("NC")) + model_param1_method.set_layout(Layout("NC")) + model_param1_attr.set_layout(Layout("NC")) assert get_batch(model) == 2 # set batch to 1 set_batch(model, Dimension(1)) assert get_batch(model) == 1 # check if shape of param 1 has changed - assert model_param1.get_output_shape(0) == PartialShape([1, 1]) + assert model_param1_method.get_output_shape(0) == PartialShape([1, 1]) + assert model_param1_attr.get_output_shape(0) == PartialShape([1, 1]) # check if shape of param 2 has not changed - assert model_param2.get_output_shape(0) == PartialShape([2, 1]) + assert model_param2_method.get_output_shape(0) == PartialShape([2, 1]) + assert model_param2_attr.get_output_shape(0) == PartialShape([2, 1]) def test_set_batch_int(): model = generate_add_model() - model_param1 = model.get_parameters()[0] - model_param2 = model.get_parameters()[1] + model_param1_method = model.get_parameters()[0] + model_param2_method = model.get_parameters()[1] + model_param1_attr = model.parameters[0] + model_param2_attr = model.parameters[1] # check batch == 2 - model_param1.set_layout(Layout("NC")) + model_param1_method.set_layout(Layout("NC")) + model_param1_attr.set_layout(Layout("NC")) assert get_batch(model) == 2 # set batch to 1 set_batch(model, 1) assert get_batch(model) == 1 # check if shape of param 1 has changed - assert model_param1.get_output_shape(0) == PartialShape([1, 1]) + assert model_param1_method.get_output_shape(0) == PartialShape([1, 1]) + assert model_param1_attr.get_output_shape(0) == PartialShape([1, 1]) # check if shape of param 2 has not changed - assert model_param2.get_output_shape(0) == PartialShape([2, 1]) + assert model_param2_method.get_output_shape(0) == PartialShape([2, 1]) + assert model_param2_attr.get_output_shape(0) == PartialShape([2, 1]) def test_set_batch_default_batch_size(): @@ -335,6 +364,7 @@ def test_set_batch_default_batch_size(): model_param1.set_layout(Layout("NC")) set_batch(model) assert model.is_dynamic() + assert model.dynamic def test_reshape_with_ports(): @@ -520,22 +550,6 @@ def check_rt_info(model): # request - https://docs.pytest.org/en/7.1.x/reference/reference.html#request def test_serialize_complex_rt_info(request, tmp_path): def check_rt_info(model, serialized): - if serialized: - threshold = "13.23" - min_val = "-3.24543" - max_val = "3.23422" - directed = "YES" - empty = "" - ids = "sasd fdfdfsdf" - mean = "22.3 33.11 44" - else: - threshold = 13.23 - min_val = -3.24543 - max_val = 3.234223 - directed = True - empty = [] - ids = ["sasd", "fdfdfsdf"] - mean = [22.3, 33.11, 44.0] assert model.has_rt_info(["config", "type_of_model"]) is True assert model.has_rt_info(["config", "converter_type"]) is True assert model.has_rt_info(["config", "model_parameters", "threshold"]) is True @@ -548,17 +562,29 @@ def check_rt_info(model, serialized): assert model.has_rt_info(["config", "model_parameters", "labels", "label_groups", "ids"]) is True assert model.has_rt_info(["config", "model_parameters", "mean_values"]) is True - assert model.get_rt_info(["config", "type_of_model"]) == "classification" - assert model.get_rt_info(["config", "converter_type"]) == "classification" - assert model.get_rt_info(["config", "model_parameters", "threshold"]) == threshold - assert model.get_rt_info(["config", "model_parameters", "min"]) == min_val - assert model.get_rt_info(["config", "model_parameters", "max"]) == max_val - assert model.get_rt_info(["config", "model_parameters", "labels", "label_tree", "type"]) == "tree" - assert model.get_rt_info(["config", "model_parameters", "labels", "label_tree", "directed"]) == directed - assert model.get_rt_info(["config", "model_parameters", "labels", "label_tree", "float_empty"]) == empty - assert model.get_rt_info(["config", "model_parameters", "labels", "label_tree", "nodes"]) == empty - assert model.get_rt_info(["config", "model_parameters", "labels", "label_groups", "ids"]) == ids - assert model.get_rt_info(["config", "model_parameters", "mean_values"]) == mean + assert model.get_rt_info(["config", "type_of_model"]).astype(str) == "classification" + assert model.get_rt_info(["config", "converter_type"]).astype(str) == "classification" + assert math.isclose(model.get_rt_info(["config", "model_parameters", "threshold"]).astype(float), 13.23, rel_tol=0.0001) + assert math.isclose(model.get_rt_info(["config", "model_parameters", "min"]).astype(float), -3.24543, rel_tol=0.0001) + assert math.isclose(model.get_rt_info(["config", "model_parameters", "max"]).astype(float), 3.234223, rel_tol=0.0001) + assert model.get_rt_info(["config", "model_parameters", "labels", "label_tree", "type"]).astype(str) == "tree" + assert model.get_rt_info(["config", "model_parameters", "labels", "label_tree", "directed"]).astype(bool) is True + + assert model.get_rt_info(["config", "model_parameters", "labels", "label_tree", "float_empty"]).aslist() == [] + assert model.get_rt_info(["config", "model_parameters", "labels", "label_tree", "nodes"]).aslist() == [] + assert model.get_rt_info(["config", "model_parameters", "labels", "label_groups", "ids"]).aslist(str) == ["sasd", "fdfdfsdf"] + assert model.get_rt_info(["config", "model_parameters", "mean_values"]).aslist(float) == [22.3, 33.11, 44.0] + + rt_info = model.get_rt_info() + assert isinstance(rt_info["config"], dict) + + for key, value in rt_info.items(): + if key == "config": + for config_value in value: + assert config_value in ["type_of_model", "converter_type", "model_parameters"] + + for rt_info_val in model.get_rt_info(["config", "model_parameters", "labels", "label_tree"]).astype(dict): + assert rt_info_val in ["float_empty", "nodes", "type", "directed"] core = Core() xml_path, bin_path = create_filename_for_test(request.node.name, tmp_path) diff --git a/src/bindings/python/tests/test_runtime/test_ovdict.py b/src/bindings/python/tests/test_runtime/test_ovdict.py new file mode 100644 index 00000000000000..e8c76a6d8d3bf7 --- /dev/null +++ b/src/bindings/python/tests/test_runtime/test_ovdict.py @@ -0,0 +1,249 @@ +# -*- coding: utf-8 -*- +# Copyright (C) 2018-2023 Intel Corporation +# SPDX-License-Identifier: Apache-2.0 + +from collections.abc import Mapping +import numpy as np +import pytest + +import openvino.runtime.opset10 as ops +from openvino.runtime import Core, ConstOutput, CompiledModel, InferRequest, Model +from openvino.runtime.ie_api import OVDict + + +def _get_ovdict( + device, + input_shape=None, + data_type=np.float32, + input_names=None, + output_names=None, + multi_output=False, + direct_infer=False, + split_num=5, +): + # Create model + # If model is multi-output (multi_output=True), input_shape must match + # requirements of split operation. + # TODO OpenSource: refactor it to be more generic + if input_shape is None: + input_shape = [1, 20] + if input_names is None: + input_names = ["data_0"] + if output_names is None: + output_names = ["output_0"] + if multi_output: + assert isinstance(output_names, (list, tuple)) + assert len(output_names) > 1 + assert len(output_names) == split_num + param = ops.parameter(input_shape, data_type, name=input_names[0]) + model = Model( + ops.split(param, 1, split_num) if multi_output else ops.abs(param), [param], + ) + # Manually name outputs + for i in range(len(output_names)): + model.output(i).tensor.names = {output_names[i]} + # Compile model + core = Core() + compiled_model = core.compile_model(model, device) + # Create test data + input_data = np.random.random(input_shape).astype(data_type) + # Two ways of infering + if direct_infer: + result = compiled_model(input_data) + assert result is not None + return result, compiled_model + + request = compiled_model.create_infer_request() + result = request.infer(input_data) + assert result is not None + return result, request + + +def _check_keys(keys, outs): + outs_iter = iter(outs) + for key in keys: + assert isinstance(key, ConstOutput) + assert key == next(outs_iter) + return True + + +def _check_values(result): + for value in result.values(): + assert isinstance(value, np.ndarray) + return True + + +def _check_items(result, outs, output_names): + i = 0 + for key, value in result.items(): + assert isinstance(key, ConstOutput) + assert isinstance(value, np.ndarray) + # Check values + assert np.equal(result[outs[i]], result[key]).all() + assert np.equal(result[outs[i]], result[i]).all() + assert np.equal(result[outs[i]], result[output_names[i]]).all() + i += 1 + return True + + +def _check_dict(result, obj, output_names=None): + if output_names is None: + output_names = ["output_0"] + + outs = obj.model_outputs if isinstance(obj, InferRequest) else obj.outputs + assert len(outs) == len(result) + assert len(outs) == len(output_names) + # Check for __iter__ + assert _check_keys(result, outs) + # Check for keys function + assert _check_keys(result.keys(), outs) + assert _check_values(result) + assert _check_items(result, outs, output_names) + assert result.names() == output_names + + return True + + +@pytest.mark.parametrize("is_direct", [True, False]) +def test_ovdict_assign(device, is_direct): + result, _ = _get_ovdict(device, multi_output=False, direct_infer=is_direct) + + with pytest.raises(TypeError) as e: + result["some_name"] = 99 + assert "'OVDict' object does not support item assignment" in str(e.value) + + +@pytest.mark.parametrize("is_direct", [True, False]) +def test_ovdict_single_output_basic(device, is_direct): + result, obj = _get_ovdict(device, multi_output=False, direct_infer=is_direct) + + assert isinstance(result, OVDict) + if isinstance(obj, (InferRequest, CompiledModel)): + assert _check_dict(result, obj) + else: + raise TypeError("Unknown `obj` type!") + + +@pytest.mark.parametrize("is_direct", [True, False]) +def test_ovdict_single_output_noname(device, is_direct): + result, obj = _get_ovdict( + device, + multi_output=False, + direct_infer=is_direct, + output_names=[], + ) + + assert isinstance(result, OVDict) + + outs = obj.model_outputs if isinstance(obj, InferRequest) else obj.outputs + + assert isinstance(result[outs[0]], np.ndarray) + assert isinstance(result[0], np.ndarray) + + with pytest.raises(RuntimeError) as e0: + _ = result["some_name"] + assert "Attempt to get a name for a Tensor without names" in str(e0.value) + + with pytest.raises(RuntimeError) as e1: + _ = result.names() + assert "Attempt to get a name for a Tensor without names" in str(e1.value) + + +@pytest.mark.parametrize("is_direct", [True, False]) +def test_ovdict_single_output_wrongname(device, is_direct): + result, obj = _get_ovdict( + device, + multi_output=False, + direct_infer=is_direct, + output_names=["output_21"], + ) + + assert isinstance(result, OVDict) + + outs = obj.model_outputs if isinstance(obj, InferRequest) else obj.outputs + + assert isinstance(result[outs[0]], np.ndarray) + assert isinstance(result[0], np.ndarray) + + with pytest.raises(KeyError) as e: + _ = result["output_37"] + assert "output_37" in str(e.value) + + with pytest.raises(KeyError) as e: + _ = result[6] + assert "6" in str(e.value) + + +@pytest.mark.parametrize("is_direct", [True, False]) +@pytest.mark.parametrize("use_function", [True, False]) +def test_ovdict_single_output_dict(device, is_direct, use_function): + result, obj = _get_ovdict( + device, + multi_output=False, + direct_infer=is_direct, + ) + + assert isinstance(result, OVDict) + + outs = obj.model_outputs if isinstance(obj, InferRequest) else obj.outputs + native_dict = result.to_dict() if use_function else dict(result) + + assert issubclass(type(native_dict), dict) + assert not isinstance(native_dict, OVDict) + assert isinstance(native_dict[outs[0]], np.ndarray) + + with pytest.raises(KeyError) as e: + _ = native_dict["output_0"] + assert "output_0" in str(e.value) + + with pytest.raises(KeyError) as e: + _ = native_dict[0] + assert "0" in str(e.value) + + +@pytest.mark.parametrize("is_direct", [True, False]) +def test_ovdict_multi_output_basic(device, is_direct): + output_names = ["output_0", "output_1", "output_2", "output_3", "output_4"] + result, obj = _get_ovdict( + device, + multi_output=True, + direct_infer=is_direct, + output_names=output_names, + ) + + assert isinstance(result, OVDict) + if isinstance(obj, (InferRequest, CompiledModel)): + assert _check_dict(result, obj, output_names) + else: + raise TypeError("Unknown `obj` type!") + + +@pytest.mark.parametrize("is_direct", [True, False]) +@pytest.mark.parametrize("use_function", [True, False]) +def test_ovdict_multi_output_tuple0(device, is_direct, use_function): + output_names = ["output_0", "output_1"] + result, obj = _get_ovdict( + device, + input_shape=(1, 10), + multi_output=True, + direct_infer=is_direct, + split_num=2, + output_names=output_names, + ) + + out0, out1 = None, None + if use_function: + assert isinstance(result.to_tuple(), tuple) + out0, out1 = result.to_tuple() + else: + out0, out1 = result.values() + + assert out0 is not None + assert out1 is not None + assert isinstance(out0, np.ndarray) + assert isinstance(out1, np.ndarray) + + outs = obj.model_outputs if isinstance(obj, InferRequest) else obj.outputs + + assert np.equal(result[outs[0]], out0).all() + assert np.equal(result[outs[1]], out1).all() diff --git a/src/bindings/python/tests/test_runtime/test_properties.py b/src/bindings/python/tests/test_runtime/test_properties.py index 0f74dfb9c9c164..41558de5aa7fc8 100644 --- a/src/bindings/python/tests/test_runtime/test_properties.py +++ b/src/bindings/python/tests/test_runtime/test_properties.py @@ -305,6 +305,30 @@ def test_properties_device_priorities(): assert f"Incorrect passed value: {value} , expected string values." in str(e.value) +def test_properties_device_properties(): + assert properties.device.properties() == "DEVICE_PROPERTIES" + + def make_dict(*arg): + return dict( # noqa: C406 + [*arg]) + + def check(value1, value2): + assert properties.device.properties(value1) == ("DEVICE_PROPERTIES", OVAny(value2)) + + check({"CPU": {properties.streams.num(): 2}}, + {"CPU": {"NUM_STREAMS": 2}}) + check({"CPU": make_dict(properties.streams.num(2))}, + {"CPU": {"NUM_STREAMS": properties.streams.Num(2)}}) + check({"GPU": make_dict(properties.inference_precision(Type.f32))}, + {"GPU": {"INFERENCE_PRECISION_HINT": Type.f32}}) + check({"CPU": make_dict(properties.streams.num(2), properties.inference_precision(Type.f32))}, + {"CPU": {"INFERENCE_PRECISION_HINT": Type.f32, "NUM_STREAMS": properties.streams.Num(2)}}) + check({"CPU": make_dict(properties.streams.num(2), properties.inference_precision(Type.f32)), + "GPU": make_dict(properties.streams.num(1), properties.inference_precision(Type.f16))}, + {"CPU": {"INFERENCE_PRECISION_HINT": Type.f32, "NUM_STREAMS": properties.streams.Num(2)}, + "GPU": {"INFERENCE_PRECISION_HINT": Type.f16, "NUM_STREAMS": properties.streams.Num(1)}}) + + def test_properties_streams(): # Test extra Num class assert properties.streams.Num().to_integer() == -1 diff --git a/src/bindings/python/tests/test_runtime/test_type.py b/src/bindings/python/tests/test_runtime/test_type.py index b31f36edf9f7e2..28ed0708d7f3e0 100644 --- a/src/bindings/python/tests/test_runtime/test_type.py +++ b/src/bindings/python/tests/test_runtime/test_type.py @@ -64,11 +64,18 @@ def test_basic_ovtypes(ovtype, assert ovtype.is_static() is static_flag assert ovtype.is_dynamic() is dynamic_flag assert ovtype.is_real() is real_flag + assert ovtype.real is real_flag assert ovtype.is_integral() is integral_flag + assert ovtype.integral is integral_flag assert ovtype.is_signed() is signed_flag + assert ovtype.signed is signed_flag assert ovtype.is_quantized() is quantized_flag + assert ovtype.quantized is quantized_flag assert ovtype.get_type_name() == type_name + assert ovtype.type_name == type_name + assert ovtype.get_size() == type_size assert ovtype.size == type_size + assert ovtype.get_bitwidth() == type_bitwidth assert ovtype.bitwidth == type_bitwidth @@ -77,15 +84,22 @@ def test_undefined_ovtype(): assert ov_type.is_static() is True assert ov_type.is_dynamic() is False assert ov_type.is_real() is False + assert ov_type.real is False assert ov_type.is_integral() is True + assert ov_type.integral is True assert ov_type.is_signed() is False + assert ov_type.signed is False assert ov_type.is_quantized() is False + assert ov_type.quantized is False assert ov_type.get_type_name() == "undefined" + assert ov_type.type_name == "undefined" + assert ov_type.get_size() == 0 assert ov_type.size == 0 # Note: might depend on the system import sys assert ov_type.bitwidth == sys.maxsize * 2 + 1 + assert ov_type.get_bitwidth() == sys.maxsize * 2 + 1 def test_dynamic_ov_type(): @@ -98,7 +112,9 @@ def test_dynamic_ov_type(): assert ov_type.is_quantized() is False assert ov_type.get_type_name() == "dynamic" assert ov_type.size == 0 + assert ov_type.get_size() == 0 assert ov_type.bitwidth == 0 + assert ov_type.get_bitwidth() == 0 @pytest.mark.parametrize(("ovtype_one", "ovtype_two", "expected"), [ diff --git a/src/bindings/python/tests/test_transformations/test_offline_api.py b/src/bindings/python/tests/test_transformations/test_offline_api.py index fdc7eec0048ce9..e153fc9412c19a 100644 --- a/src/bindings/python/tests/test_transformations/test_offline_api.py +++ b/src/bindings/python/tests/test_transformations/test_offline_api.py @@ -354,5 +354,6 @@ def test_flush_fp32_subnormals_to_zero(): apply_moc_transformations(model, cf=False, smart_reshape=True) # apply_flush_fp32_subnormals_to_zero is called inside - assert np.all(weights.data[4:8] != subnorm_val) - assert np.all(weights.data[4:8] == 0.0) + new_weights = add_node.input_value(1).get_node() + assert np.all(new_weights.data[4:8] != subnorm_val) + assert np.all(new_weights.data[4:8] == 0.0) diff --git a/src/bindings/python/tests/test_utils/test_data_dispatch.py b/src/bindings/python/tests/test_utils/test_data_dispatch.py index 254cf890458bb8..e2ce00f10e7482 100644 --- a/src/bindings/python/tests/test_utils/test_data_dispatch.py +++ b/src/bindings/python/tests/test_utils/test_data_dispatch.py @@ -1,5 +1,5 @@ # -*- coding: utf-8 -*- -# Copyright (C) 2018-2022 Intel Corporation +# Copyright (C) 2018-2023 Intel Corporation # SPDX-License-Identifier: Apache-2.0 import os @@ -157,8 +157,8 @@ def test_ndarray_copied_dispatcher(device, input_shape): result, infer_request = _run_dispatcher(device, test_data, False, input_shape) assert result == {} - assert np.array_equal(infer_request.inputs[0].data, test_data) + assert np.array_equal(infer_request.input_tensors[0].data, test_data) test_data[0] = 2.0 - assert not np.array_equal(infer_request.inputs[0].data, test_data) + assert not np.array_equal(infer_request.input_tensors[0].data, test_data) diff --git a/src/bindings/python/tests_compatibility/__init__.py b/src/bindings/python/tests_compatibility/__init__.py index 7b5d7217cd8ed1..24d2050a3a9d77 100644 --- a/src/bindings/python/tests_compatibility/__init__.py +++ b/src/bindings/python/tests_compatibility/__init__.py @@ -122,7 +122,6 @@ def xfail_test(reason="Mark the test as expected to fail", strict=True): xfail_issue_63033 = xfail_test(reason="BatchNormalization: Training mode is not supported") xfail_issue_63036 = xfail_test(reason="Changes in ConvTranspose padding") -xfail_issue_63039 = xfail_test(reason="Result mismatches with UINT8 operations") xfail_issue_63043 = xfail_test(reason="Recurrent node expects constants as W, R, B inputs.") skip_rng_tests = pytest.mark.skip(reason="Tests use random number generator with no seed.") diff --git a/src/bindings/python/tests_compatibility/test_ngraph/test_basic.py b/src/bindings/python/tests_compatibility/test_ngraph/test_basic.py index de83f6a77089bf..5acc1a29fd2d49 100644 --- a/src/bindings/python/tests_compatibility/test_ngraph/test_basic.py +++ b/src/bindings/python/tests_compatibility/test_ngraph/test_basic.py @@ -414,9 +414,3 @@ def test_sink_function_ctor(): assert len(function.get_results()) == 1 assert function.get_friendly_name() == "TestFunction" - -def test_node_version(): - node = ng.add([1], [2]) - - assert node.get_version() == 1 - assert node.version == 1 diff --git a/src/bindings/python/tests_compatibility/test_ngraph/test_core.py b/src/bindings/python/tests_compatibility/test_ngraph/test_core.py index 9b50732396e5a5..949946ef842bdc 100644 --- a/src/bindings/python/tests_compatibility/test_ngraph/test_core.py +++ b/src/bindings/python/tests_compatibility/test_ngraph/test_core.py @@ -250,10 +250,10 @@ def test_discrete_type_info(): assert n1.get_type_info().name == "TopK" assert n3.get_type_info().name == "Sin" assert n1.type_info.name == n2.type_info.name - assert n1.type_info.version == n2.type_info.version + assert n1.type_info.version_id == n2.type_info.version_id assert n1.type_info.parent == n2.type_info.parent assert n1.get_type_info().name == n2.get_type_info().name - assert n1.get_type_info().version == n2.get_type_info().version + assert n1.get_type_info().version_id == n2.get_type_info().version_id assert n1.get_type_info().parent == n2.get_type_info().parent assert n1.get_type_info().name != n3.get_type_info().name assert n1.get_type_info().name > n3.get_type_info().name diff --git a/src/bindings/python/tests_compatibility/test_ngraph/test_create_op.py b/src/bindings/python/tests_compatibility/test_ngraph/test_create_op.py index 7b084bc77a81d0..09fda90564bd01 100644 --- a/src/bindings/python/tests_compatibility/test_ngraph/test_create_op.py +++ b/src/bindings/python/tests_compatibility/test_ngraph/test_create_op.py @@ -11,6 +11,7 @@ import ngraph.opset1 as ng_opset1 import ngraph.opset5 as ng_opset5 import ngraph.opset10 as ng_opset10 +import ngraph.opset11 as ng_opset11 from ngraph.utils.types import make_constant_node from ngraph.exceptions import UserInputError from ngraph.impl import Type @@ -2259,13 +2260,33 @@ def test_interpolate_opset10(dtype, expected_shape, shape_calculation_mode): mode = "cubic" node = ng_opset10.interpolate(image=image_node, output_shape=output_shape, scales=scales, - axes=axes, - mode=mode, shape_calculation_mode=shape_calculation_mode) + axes=axes,mode=mode, shape_calculation_mode=shape_calculation_mode) assert node.get_type_name() == "Interpolate" assert node.get_output_size() == 1 assert list(node.get_output_shape(0)) == expected_shape +@pytest.mark.parametrize( + ("expected_shape", "shape_calculation_mode", "input_value"), + [ + ([1, 3, 64, 64], "scales", np.array([1 / 16, 1 / 16], dtype=np.float32)), + ([1, 3, 256, 256], "sizes", np.array([256, 256], dtype=np.int32)), + ], +) +@pytest.mark.parametrize("dtype", np_types) +def test_interpolate_opset11(dtype, expected_shape, shape_calculation_mode, input_value): + + image_shape = [1, 3, 1024, 1024] + image_node = ng.parameter(image_shape, dtype, name="Image") + axes = [2, 3] + mode = "bilinear_pillow" + + node = ng_opset11.interpolate(image=image_node, scales_or_sizes=input_value, axes=axes, mode=mode, + shape_calculation_mode=shape_calculation_mode) + assert node.get_type_name() == "Interpolate" + assert node.get_output_size() == 1 + assert list(node.get_output_shape(0)) == expected_shape + def test_is_finite_opset10(): input_shape = [1, 2, 3, 4] input_node = ng.parameter(input_shape, np.float32, name="InputData") diff --git a/src/bindings/python/tests_compatibility/test_onnx/test_backend.py b/src/bindings/python/tests_compatibility/test_onnx/test_backend.py index 89b7afcb47e4af..53ec35731cbc5f 100644 --- a/src/bindings/python/tests_compatibility/test_onnx/test_backend.py +++ b/src/bindings/python/tests_compatibility/test_onnx/test_backend.py @@ -37,7 +37,6 @@ xfail_issue_58033, xfail_issue_63033, xfail_issue_63036, - xfail_issue_63039, xfail_issue_63043, xfail_issue_63137, xfail_issue_63138, @@ -282,10 +281,6 @@ def expect_fail(test_case_path, xfail): # type: (str) -> None "OnnxBackendNodeModelTest.test_batchnorm_example_training_mode_cpu", ), (xfail_issue_63036, "OnnxBackendNodeModelTest.test_convtranspose_autopad_same_cpu"), - ( - xfail_issue_63039, - "OnnxBackendNodeModelTest.test_div_uint8_cpu", - ), ( xfail_issue_63043, "OnnxBackendNodeModelTest.test_gru_batchwise_cpu", diff --git a/src/cmake/openvino.cmake b/src/cmake/openvino.cmake index 7870e2963e3c59..0a0b9f9d1896a4 100644 --- a/src/cmake/openvino.cmake +++ b/src/cmake/openvino.cmake @@ -131,7 +131,7 @@ ie_cpack_add_component(${OV_CPACK_COMP_CORE_DEV} HIDDEN DEPENDS ${OV_CPACK_COMP_CORE} ${core_dev_components}) -if(BUILD_SHARED_LIBS) +if(ENABLE_PLUGINS_XML) install(FILES $/plugins.xml DESTINATION ${OV_CPACK_PLUGINSDIR} COMPONENT ${OV_CPACK_COMP_CORE}) diff --git a/src/common/low_precision_transformations/include/low_precision/markup_precisions.hpp b/src/common/low_precision_transformations/include/low_precision/markup_precisions.hpp index 783dcafb13f678..97f3bc7569984e 100644 --- a/src/common/low_precision_transformations/include/low_precision/markup_precisions.hpp +++ b/src/common/low_precision_transformations/include/low_precision/markup_precisions.hpp @@ -39,12 +39,12 @@ class ngraph::pass::low_precision::MarkupPrecisions : public ngraph::pass::Funct class Restriction { public: explicit Restriction(const bool versionIsRequired) : versionIsRequired(versionIsRequired) {} - void add(const uint64_t version, const ngraph::pass::low_precision::PrecisionsRestriction::PrecisionsByPorts& precisions) { - precisionsByVersion.emplace(version, precisions); + void add(const std::string version_id, const ngraph::pass::low_precision::PrecisionsRestriction::PrecisionsByPorts& precisions) { + precisionsByVersion.emplace(version_id, precisions); } bool versionIsRequired; - std::unordered_map precisionsByVersion; + std::unordered_map precisionsByVersion; }; OPENVINO_RTTI("MarkupPrecisions", "0"); diff --git a/src/common/low_precision_transformations/include/low_precision/markup_quantization_granularity.hpp b/src/common/low_precision_transformations/include/low_precision/markup_quantization_granularity.hpp index 23558ec81a26d5..098da265cee0b4 100644 --- a/src/common/low_precision_transformations/include/low_precision/markup_quantization_granularity.hpp +++ b/src/common/low_precision_transformations/include/low_precision/markup_quantization_granularity.hpp @@ -37,12 +37,12 @@ class ngraph::pass::low_precision::MarkupQuantizationGranularity : public ngraph class PerTensorQuantization { public: explicit PerTensorQuantization(const bool versionIsRequired) : versionIsRequired(versionIsRequired) {} - void add(const uint64_t version, const std::vector& restrictions) { - portsByVersion.emplace(version, restrictions); + void add(const std::string version_id, const std::vector& restrictions) { + portsByVersion.emplace(version_id, restrictions); } bool versionIsRequired; - std::unordered_map> portsByVersion; + std::unordered_map> portsByVersion; }; OPENVINO_RTTI("MarkupPerTensorQuantization", "0"); diff --git a/src/common/low_precision_transformations/include/low_precision/rt_info/avg_pool_precision_preserved_attribute.hpp b/src/common/low_precision_transformations/include/low_precision/rt_info/avg_pool_precision_preserved_attribute.hpp index 4064edabf03006..c7c84e2122960c 100644 --- a/src/common/low_precision_transformations/include/low_precision/rt_info/avg_pool_precision_preserved_attribute.hpp +++ b/src/common/low_precision_transformations/include/low_precision/rt_info/avg_pool_precision_preserved_attribute.hpp @@ -23,7 +23,7 @@ namespace ngraph { */ class LP_TRANSFORMATIONS_API AvgPoolPrecisionPreservedAttribute : public PrecisionPreservedAttribute { public: - OPENVINO_RTTI("LowPrecision::AvgPoolPrecisionPreserved", "", ov::RuntimeAttribute, 0); + OPENVINO_RTTI("LowPrecision::AvgPoolPrecisionPreserved", "", ov::RuntimeAttribute); using PrecisionPreservedAttribute::PrecisionPreservedAttribute; void merge_attributes(std::vector& attributes); bool is_skipped() const; diff --git a/src/common/low_precision_transformations/include/low_precision/rt_info/intervals_alignment_attribute.hpp b/src/common/low_precision_transformations/include/low_precision/rt_info/intervals_alignment_attribute.hpp index dcdf552856062f..99d6e814c2abe0 100644 --- a/src/common/low_precision_transformations/include/low_precision/rt_info/intervals_alignment_attribute.hpp +++ b/src/common/low_precision_transformations/include/low_precision/rt_info/intervals_alignment_attribute.hpp @@ -58,7 +58,7 @@ class LP_TRANSFORMATIONS_API IntervalsAlignmentSharedValue { */ class LP_TRANSFORMATIONS_API IntervalsAlignmentAttribute : public SharedAttribute { public: - OPENVINO_RTTI("LowPrecision::IntervalsAlignment", "", ov::RuntimeAttribute, 0); + OPENVINO_RTTI("LowPrecision::IntervalsAlignment", "", ov::RuntimeAttribute); IntervalsAlignmentAttribute() = default; IntervalsAlignmentAttribute(IntervalsAlignmentSharedValue::Interval combinedInterval, size_t levels); IntervalsAlignmentAttribute( diff --git a/src/common/low_precision_transformations/include/low_precision/rt_info/precision_preserved_attribute.hpp b/src/common/low_precision_transformations/include/low_precision/rt_info/precision_preserved_attribute.hpp index 13baf73c3eb1b6..f5d14342a8f230 100644 --- a/src/common/low_precision_transformations/include/low_precision/rt_info/precision_preserved_attribute.hpp +++ b/src/common/low_precision_transformations/include/low_precision/rt_info/precision_preserved_attribute.hpp @@ -22,7 +22,7 @@ namespace ngraph { */ class LP_TRANSFORMATIONS_API PrecisionPreservedAttribute : public SharedAttribute { public: - OPENVINO_RTTI("LowPrecision::PrecisionPreserved", "", ov::RuntimeAttribute, 0); + OPENVINO_RTTI("LowPrecision::PrecisionPreserved", "", ov::RuntimeAttribute); PrecisionPreservedAttribute() = default; PrecisionPreservedAttribute(const bool value); diff --git a/src/common/low_precision_transformations/include/low_precision/rt_info/precisions_attribute.hpp b/src/common/low_precision_transformations/include/low_precision/rt_info/precisions_attribute.hpp index 41f88a290b6a7c..0a69000b85e4e1 100644 --- a/src/common/low_precision_transformations/include/low_precision/rt_info/precisions_attribute.hpp +++ b/src/common/low_precision_transformations/include/low_precision/rt_info/precisions_attribute.hpp @@ -26,7 +26,7 @@ namespace ngraph { */ class LP_TRANSFORMATIONS_API PrecisionsAttribute : public SharedAttribute> { public: - OPENVINO_RTTI("LowPrecision::Precisions", "", ov::RuntimeAttribute, 0); + OPENVINO_RTTI("LowPrecision::Precisions", "", ov::RuntimeAttribute); PrecisionsAttribute(const std::vector& precisions); static ov::Any create( diff --git a/src/common/low_precision_transformations/include/low_precision/rt_info/quantization_alignment_attribute.hpp b/src/common/low_precision_transformations/include/low_precision/rt_info/quantization_alignment_attribute.hpp index 0c766b71885ab5..30daa5f8073082 100644 --- a/src/common/low_precision_transformations/include/low_precision/rt_info/quantization_alignment_attribute.hpp +++ b/src/common/low_precision_transformations/include/low_precision/rt_info/quantization_alignment_attribute.hpp @@ -27,7 +27,7 @@ namespace ngraph { */ class LP_TRANSFORMATIONS_API QuantizationAlignmentAttribute : public SharedAttribute { public: - OPENVINO_RTTI("LowPrecision::QuantizationAlignment", "", ov::RuntimeAttribute, 0); + OPENVINO_RTTI("LowPrecision::QuantizationAlignment", "", ov::RuntimeAttribute); QuantizationAlignmentAttribute(const bool value = false); static ov::Any create( diff --git a/src/common/low_precision_transformations/include/low_precision/rt_info/quantization_granularity_attribute.hpp b/src/common/low_precision_transformations/include/low_precision/rt_info/quantization_granularity_attribute.hpp index 84f2bf474c82a2..df466ef041cf4a 100644 --- a/src/common/low_precision_transformations/include/low_precision/rt_info/quantization_granularity_attribute.hpp +++ b/src/common/low_precision_transformations/include/low_precision/rt_info/quantization_granularity_attribute.hpp @@ -22,7 +22,7 @@ namespace ngraph { */ class LP_TRANSFORMATIONS_API QuantizationGranularityAttribute : public ov::RuntimeAttribute { public: - OPENVINO_RTTI("LowPrecision::QuantizationGranularity", "", ov::RuntimeAttribute, 0); + OPENVINO_RTTI("LowPrecision::QuantizationGranularity", "", ov::RuntimeAttribute); enum class Granularity { PerChannel, diff --git a/src/common/low_precision_transformations/include/low_precision/rt_info/quantization_mode_attribute.hpp b/src/common/low_precision_transformations/include/low_precision/rt_info/quantization_mode_attribute.hpp index a3658061510ee0..3c3a454e4b9519 100644 --- a/src/common/low_precision_transformations/include/low_precision/rt_info/quantization_mode_attribute.hpp +++ b/src/common/low_precision_transformations/include/low_precision/rt_info/quantization_mode_attribute.hpp @@ -12,7 +12,7 @@ namespace ngraph { class LP_TRANSFORMATIONS_API QuantizationModeAttribute : public ov::RuntimeAttribute { public: - OPENVINO_RTTI("LowPrecision::QuantizationModeAttribute", "", ov::RuntimeAttribute, 0); + OPENVINO_RTTI("LowPrecision::QuantizationModeAttribute", "", ov::RuntimeAttribute); enum class Mode { Asymmetric, diff --git a/src/common/low_precision_transformations/include/low_precision/rt_info/skip_cleanup_attribute.hpp b/src/common/low_precision_transformations/include/low_precision/rt_info/skip_cleanup_attribute.hpp index 1a11bbc2983ea2..1b323ee9424794 100644 --- a/src/common/low_precision_transformations/include/low_precision/rt_info/skip_cleanup_attribute.hpp +++ b/src/common/low_precision_transformations/include/low_precision/rt_info/skip_cleanup_attribute.hpp @@ -11,7 +11,7 @@ namespace ngraph { class LP_TRANSFORMATIONS_API SkipCleanupAttribute : public ov::RuntimeAttribute { public: - OPENVINO_RTTI("LowPrecision::SkipCleanup", "", ov::RuntimeAttribute, 0); + OPENVINO_RTTI("LowPrecision::SkipCleanup", "", ov::RuntimeAttribute); static ov::Any create(const std::shared_ptr& node); }; } // namespace ngraph diff --git a/src/common/low_precision_transformations/src/markup_precisions.cpp b/src/common/low_precision_transformations/src/markup_precisions.cpp index d5c168d4502131..a1748036012f13 100644 --- a/src/common/low_precision_transformations/src/markup_precisions.cpp +++ b/src/common/low_precision_transformations/src/markup_precisions.cpp @@ -30,10 +30,10 @@ ngraph::pass::low_precision::MarkupPrecisions::MarkupPrecisions( OPENVINO_SUPPRESS_DEPRECATED_START if (it == restrictionsByOperation.end()) { Restriction r(restriction.specifyVersion); - r.precisionsByVersion.emplace(restriction.operationType.version, restriction.precisionsByPorts); + r.precisionsByVersion.emplace(restriction.operationType.version_id, restriction.precisionsByPorts); restrictionsByOperation.emplace(restriction.operationType.name, r); } else { - it->second.add(restriction.operationType.version, restriction.precisionsByPorts); + it->second.add(restriction.operationType.version_id, restriction.precisionsByPorts); } OPENVINO_SUPPRESS_DEPRECATED_END } @@ -108,9 +108,7 @@ bool ngraph::pass::low_precision::MarkupPrecisions::run_on_model(const std::shar if (it != restrictionsByOperation.end()) { const Restriction& r = it->second; if (r.versionIsRequired) { - OPENVINO_SUPPRESS_DEPRECATED_START - const auto it2 = r.precisionsByVersion.find(typeInfo.version); - OPENVINO_SUPPRESS_DEPRECATED_END + const auto it2 = r.precisionsByVersion.find(typeInfo.version_id); if (it2 == r.precisionsByVersion.end()) { continue; } diff --git a/src/common/low_precision_transformations/src/markup_quantization_granularity.cpp b/src/common/low_precision_transformations/src/markup_quantization_granularity.cpp index 7b86c1d2203c50..bbe448d83423ec 100644 --- a/src/common/low_precision_transformations/src/markup_quantization_granularity.cpp +++ b/src/common/low_precision_transformations/src/markup_quantization_granularity.cpp @@ -20,10 +20,10 @@ ngraph::pass::low_precision::MarkupQuantizationGranularity::MarkupQuantizationGr OPENVINO_SUPPRESS_DEPRECATED_START if (it == restrictionsByOperation.end()) { PerTensorQuantization r(restriction.specifyVersion); - r.portsByVersion.emplace(restriction.operationType.version, restriction.restrictions); + r.portsByVersion.emplace(restriction.operationType.version_id, restriction.restrictions); restrictionsByOperation.emplace(restriction.operationType.name, r); } else { - it->second.add(restriction.operationType.version, restriction.restrictions); + it->second.add(restriction.operationType.version_id, restriction.restrictions); } OPENVINO_SUPPRESS_DEPRECATED_END } @@ -74,9 +74,7 @@ bool ngraph::pass::low_precision::MarkupQuantizationGranularity::run_on_model(co } if (restriction.versionIsRequired) { - OPENVINO_SUPPRESS_DEPRECATED_START - const auto it2 = restriction.portsByVersion.find(node->get_type_info().version); - OPENVINO_SUPPRESS_DEPRECATED_END + const auto it2 = restriction.portsByVersion.find(node->get_type_info().version_id); if (it2 == restriction.portsByVersion.end()) { continue; } diff --git a/src/common/offline_transformations/include/mask_attribute.hpp b/src/common/offline_transformations/include/mask_attribute.hpp index 33ec516ded9c94..22ad9ba70d054a 100644 --- a/src/common/offline_transformations/include/mask_attribute.hpp +++ b/src/common/offline_transformations/include/mask_attribute.hpp @@ -28,7 +28,7 @@ namespace ngraph { class Mask : public std::vector>, public std::enable_shared_from_this { public: static const ::ov::DiscreteTypeInfo& get_type_info_static() { - static const ::ov::DiscreteTypeInfo type_info_static{"Mask", 0, "0"}; + static const ::ov::DiscreteTypeInfo type_info_static{"Mask", "0"}; return type_info_static; } diff --git a/src/common/snippets/README.md b/src/common/snippets/README.md index eca770a584cda2..8f9d55bc11714b 100644 --- a/src/common/snippets/README.md +++ b/src/common/snippets/README.md @@ -2,12 +2,13 @@ ## Key Contacts -Please contact a member of [openvino-ie-cpu-maintainers](https://github.com/orgs/openvinotoolkit/teams/openvino-ie-cpu-maintainers) group, for assistance regarding snippets. +For assistance regarding snippets, contact a member of [openvino-ie-cpu-maintainers](https://github.com/orgs/openvinotoolkit/teams/openvino-ie-cpu-maintainers) group. * [SnippetS design guide](./docs/snippets_design_guide.md) * [CPU target for SnippetS code generator](./docs/snippets_cpu_target.md) ## See also + * [OpenVINO™ README](../../../README.md) * [OpenVINO Core Components](../../README.md) * [Developer documentation](../../../docs/dev/index.md) \ No newline at end of file diff --git a/src/common/snippets/docs/snippets_cpu_target.md b/src/common/snippets/docs/snippets_cpu_target.md index 04b70f7df8708e..68f03202c4761d 100644 --- a/src/common/snippets/docs/snippets_cpu_target.md +++ b/src/common/snippets/docs/snippets_cpu_target.md @@ -1,12 +1,12 @@ -# CPU target for SnippetS code generator +# CPU Target for SnippetS Code Generator -Snippets in its first generation can be seen as a generalization over generic eltwise node. First generation of snippets has lack of integration with oneDNN and so patterns it supports should be kept orthogonal to what is fused with post-ops. +Snippets in its first generation can be seen as a generalization over a generic eltwise node. First generation of snippets does not have integration with oneDNN, and the patterns it supports should be kept orthogonal to what is fused with post-ops. -POC CPU implementation could be found [here](https://github.com/openvinotoolkit/openvino/pull/2824) +See the example of POC CPU implementation [here](https://github.com/openvinotoolkit/openvino/pull/2824). First 8 kernel parameters are passed by structure which is unpacked inside a kernel into the registers. The rest are passed through the stack. -Loop trip count should be placed to some GP register, as well as work amount. Moreover, we need to load all the parameters into GP registers. If we assume that we have enough registers than it can be done before the loop body. +The loop trip count should be placed to a GP register, as well as the work amount. Moreover, you need to load all the parameters into GP registers. If you assume that you have enough registers, then it can be done before the loop body. ``` auto param0 = abi_params[0]; @@ -18,9 +18,9 @@ auto work_amount = abi_params[3]; ## Memory operations -Load could be Vector, Scalar and Broadcast. Only native vector size for an architecture is supported (e.g. 16 on AVX-512) +A load could be Vector, Scalar, and Broadcast. Only the native vector size for an architecture is supported (for example, 16 on AVX-512). -Memory operation also generates post increments for the pointer it uses. +Memory operation also generates post increments for the pointer it uses. - `MemoryEmitter` - `StoreEmitter` @@ -50,8 +50,8 @@ Tensor data can be passed with strides. `Data` corresponds to a constant table and wraps this entity for the CPU. ## See also + * [OpenVINO™ README](../../../../README.md) * [OpenVINO SnippetS](../README.md) * [OpenVINO Core Components](../../../README.md) * [Developer documentation](../../../../docs/dev/index.md) - \ No newline at end of file diff --git a/src/common/snippets/docs/snippets_design_guide.md b/src/common/snippets/docs/snippets_design_guide.md index 01b005b20e4ec9..d495b35a3fc437 100644 --- a/src/common/snippets/docs/snippets_design_guide.md +++ b/src/common/snippets/docs/snippets_design_guide.md @@ -1,26 +1,26 @@ -# SnippetS design guide -This document describes the design and rationale for snippets code generator. Implementation of code functionality is located [here](https://github.com/openvinotoolkit/openvino/tree/master/src/common/snippets). Proposal for CPU backend integration is [here](https://github.com/openvinotoolkit/openvino/pull/2824). +# SnippetS Design Guide +This document describes the design and rationale for a snippets code generator. Implementation of code functionality is located [here](https://github.com/openvinotoolkit/openvino/tree/master/src/common/snippets). A proposal for CPU backend integration is [here](https://github.com/openvinotoolkit/openvino/pull/2824). ## Rationale -We believe that core **CNN operators (convolution, gemm, fully connected) are limited by compute, the rest is memory bound**. Math approximations (like transcendental functions) are rare in emerging workloads and could be treated with the same machinery. **Snippets are designed to optimize topology for memory**, while leaving compute intensive kernels for backend developers. +Core **CNN operators (convolution, gemm, fully connected) are limited by compute, the rest is memory bound**. Math approximations (like transcendental functions) are rare in emerging workloads and could be treated with the same machinery. **Snippets are designed to optimize topology for memory**, while leaving compute intensive kernels for backend developers. -We believe **potential speedup is proportional to shrink in memory-walked bytes**. So we can transform the problem to a task to optimize for memory walks, whatever pattern snippet has and operations it contains. Number of memory walks should be less or equal to handcrafted optimizations. This guarantees performance improvements over the previous approach (excluding corner cases caused by cache effects). *Shrinkage factor might be encoded to some cost function in future evolution of code generator*. Snippets generator provides diagnostics to estimate this shrinkage factor with `ngraph::snippets::op::Subgraph::print_statistics(bool verbose)` member. +The **potential speedup is proportional to shrink in memory-walked bytes**. Therefore, you can transform the problem to a task to optimize for memory walks, whatever pattern snippet has and operations it contains. The number of memory walks should be less or equal to handcrafted optimizations. This guarantees performance improvements over the previous approach (excluding corner cases caused by cache effects). *Shrinkage factor might be encoded to some cost function in future evolution of code generator*. Snippets generator provides diagnostics to estimate this shrinkage factor with `ngraph::snippets::op::Subgraph::print_statistics(bool verbose)` member. -We design SnippetS generator for back-end developers. The main purpose of inventing snippets code generator is an **operator fusion**, **register allocation** and **target kernel generation** decomposition. This allows modifications (like new fusion support) and feature extensions (like new operation support) to be done in a single point of modification and avoid combinatorial explosion for fusions/types/architectures etc. +The SnippetS generator is designed for back-end developers. The main purpose of inventing the snippets code generator is an **operator fusion**, **register allocation** and **target kernel generation** decomposition. This allows modifications (like new fusion support) and feature extensions (like new operation support) to be done in a single point of modification and avoid combinatorial explosion for fusions/types/architectures etc. -We believe that creating a full-fledged compiler or usage of existing compiler infrastructure (like LLVM & MLIR) is superfluous at this point of evelition. We aim to provide a **flexible and performant framework for operation fusions**, leaving micro optimizations (e.g. instruction scheduling) to the backend H/W. +Creating a full-fledged compiler or usage of existing compiler infrastructure (like LLVM & MLIR) is superfluous at this point of evolution. The aim is to provide a **flexible and performant framework for operation fusions**, leaving micro optimizations (for example, instruction scheduling) to the backend H/W. -We do not aim to invent a DSL for SnippetS and would like to keep it this way. DSL gives users more flexibility to express uncommon operations. However, the shift towards an approach to encode topologies with elementary operations followed by smart enough fusions is already expressive and performant enough. +There are no plans to invent a DSL for SnippetS. DSL gives users more flexibility to express uncommon operations. However, the shift towards an approach to encode topologies with elementary operations followed by smart enough fusions is already expressive and performant enough. -**Snippet** is a compiled compute **kernel** generated from a subgraph using SnippetS code generator for specific architecture with a **scheduling domain**. Using this scheduling domain and calling convention backend can execute generated compute kernels. For the first generation, snippets are **statically scheduled towards the output domain**. Multi-output snippets are supported if all outputs are broadcast-compatible in a sense that domains for all outputs can be broadcasted from one root domain which defines snippet schedule. It’s a subject of extension for future generations. +**Snippet** is a compiled compute **kernel** generated from a subgraph using the SnippetS code generator for a specific architecture with a **scheduling domain**. Using this scheduling domain and calling convention backend can execute generated compute kernels. For the first generation, snippets are **statically scheduled towards the output domain**. Multi-output snippets are supported if all outputs are broadcast-compatible in a sense that domains for all outputs can be broadcasted from one root domain that defines snippet schedule. It is a subject of extension for future generations. -We use nGraph as the highest level IR for subgraph representation and lowering transformations. **Opset1** is a base operation set for code generation. We aim to **keep the minimal possible and sufficient operation set** (or ISA) and keep it **RISC-like** (memory and compute decomposed). +nGraph is used as the highest level IR for subgraph representation and lowering transformations. **Opset1** is a base operation set for code generation. The aim is to **keep the minimal possible and sufficient operation set** (or ISA) and keep it **RISC-like** (memory and compute decomposed). -**One subgraph corresponds to one snippet**. Operations which cannot be scheduled by a single schedule should not be placed in the same subgraph. Snippet somewhat conceptually close to OpenCL kernel without a restriction to express only embarrassingly parallel tasks. +**One subgraph corresponds to one snippet**. Operations which cannot be scheduled by a single schedule should not be placed in the same subgraph. A snippet is somewhat conceptually close to OpenCL kernel without a restriction to express only embarrassingly parallel tasks. **Subgraph** once extracted from full topology IR is **treated as an operation and data flow descriptor in scalar notation** (similar to OpenCL/CUDA). Tensor sizes are used for defining scheduling domain and detecting broadcasts/reductions. -We split operations into 3 groups: **layout-oblivious (LOO), layout-aware(-tolerant) and layout-dependent**. **Layout-oblivious** operation semantics and implementation are completely agnostic to a specific layout in which tensors are placed in memory. For example, elements-wise math and ReLU does in this category. Implementation **layout-aware** operation depends on the layout of input/output tensors. For example, convolutions and other block-wise kernels or layout repaks. For **layout-specific** operation semantics and implementation depends on the layout. For example, the Yolo region. Patterns to fuse constructed in terms of taxonomy above. +Operations are split into 3 groups: **layout-oblivious (LOO), layout-aware(-tolerant) and layout-dependent(-specific)**. **Layout-oblivious** operation semantics and implementation are completely agnostic to a specific layout in which tensors are placed in memory. For example, like elements-wise math and ReLU in this category. Implementation of **layout-aware** operation depends on the layout of input/output tensors. For example, convolutions and other block-wise kernels or layout repacks. **Layout-specific** operation semantics and implementation depend on the layout. For example, the Yolo region. Patterns to fuse are constructed in terms of taxonomy above. ## Design @@ -28,19 +28,19 @@ Code generation is split into 2 phases, **tokenization** and **lowering**. ### Tokenization -Tokenization runs on full topology nGraph function inside a specific plugin in a stage of common transformations. Input of tokenization is a topology graph. Output is a modified topology graph with `ngraph::snippets::op::Subgraph` operations installed. Each subgraph contains nGraph function (called **body**) which holds a part of original topology legal for snippet generation (can be scheduled with a single schedule) +Tokenization runs on full topology nGraph function inside a specific plugin in a stage of common transformations. Input of tokenization is a topology graph. Output is a modified topology graph with `ngraph::snippets::op::Subgraph` operations installed. Each subgraph contains nGraph function (called **body**) which holds a part of original topology legal for snippet generation (can be scheduled with a single schedule). -Procedure of finding subgraphs suitable for code generation is called **tokenization**, meaning that we split the topology tree into subgraphs in the same greedy approach which is used for parsing input stream of characters into the tokens. It also could be seen as and modified into a basic block construction problem, since we also find a leader and potentially terminators. Implementation can be found [here](https://github.com/openvinotoolkit/openvino/blob/master/src/common/snippets/src/pass/collapse_subgraph.cpp). +A procedure of finding subgraphs suitable for code generation is called **tokenization**. During tokenization the topology tree is split into subgraphs in the same greedy approach which is used for parsing input stream of characters into the tokens. It may also be seen as and modified into a basic block construction problem, since there is a leader and potentially terminators. See the example of implementation [here](https://github.com/openvinotoolkit/openvino/blob/master/src/common/snippets/src/pass/collapse_subgraph.cpp). -Tokenization has an advantage over the pattern matching approach (used in traditional and MLIR-based compilers) since it can handle arbitrary patterns of operations. Pattern matching deduces specific configuration of operations to translate to another one, more suitable for target machine or further lowering. This means that relations between operations are fixed. Tokenization on the other hand has the only limitation on specific operation types which are **suitable and profitable** to fuse with respect to original topology correctness (keeping it as a direct acyclic graph). +Tokenization has an advantage over the pattern matching approach (used in traditional and MLIR-based compilers) since it can handle arbitrary patterns of operations. Pattern matching deduces specific configuration of operations to translate to another one, more suitable for target machine or further lowering. This means that relations between operations are fixed. Tokenization, on the other hand, has the only limitation on specific operation types which are **suitable and profitable** to fuse, respecting original topology correctness (keeping it as a direct acyclic graph). -The extracted body comes to a plug-in wrapped as a composite `Subgraph` operation which is seen as a block box from a plugin standpoint and can participate in any plugin specific subroutines (e.g. layout assignment, memory allocation, etc.). +The extracted body comes to a plug-in wrapped as a composite `Subgraph` operation which is seen as a block box from a plugin standpoint and can participate in any plugin specific subroutines (for example, layout assignment, memory allocation, etc.). ### Supported subgraph patterns -Subgraph accepts arbitrary numbers of inputs and outputs. There is 1:1 mapping for external (subgraph node’s) and internal (body) parameters indexes. +Subgraph accepts arbitrary numbers of inputs and outputs. There is 1:1 mapping for external (subgraph node’s) and internal (body) parameters indexes. -Pattern here is an exact subgraph configuration (nodes and edges between them). **The first generation of snippets supports only layout-oblivious operations which may have broadcast on inputs and broadcast-compatible outputs**. For example Shapes `<1, 42, 17, 31>`, `<1, 42, 17, 1>` and `<1, 42, 1, 31>` are considered as broadcast-compatible. Layout-oblivious operation with multiple outputs as a snippet leader and forms a new subgraph. The most beneficial patterns are subgraphs with complex control flow but minimal number of inputs/and outputs. For example, GeLU has a 5x shrinkage factor from original unfused subgraph in number of bytes walked. Subgraph below could be considered as an example of such a subgraph. Leader detection procedure aims to find such subgraphs. +Pattern here is an exact subgraph configuration (nodes and edges between them). **The first generation of snippets supports only layout-oblivious operations which may have broadcast on inputs and broadcast-compatible outputs**. For example Shapes `<1, 42, 17, 31>`, `<1, 42, 17, 1>` and `<1, 42, 1, 31>` are considered as broadcast-compatible. Layout-oblivious operation with multiple outputs serves as a snippet leader and forms a new subgraph. The most beneficial patterns are subgraphs with complex control flow but minimal number of inputs/and outputs. For example, GeLU has a 5x shrinkage factor from original unfused subgraph in number of bytes walked. Subgraph below could be considered as an example of such a subgraph. Leader detection procedure aims to find such subgraphs. ```mermaid flowchart LR @@ -60,12 +60,12 @@ class nodeA3 steel1 ``` Operations are greedily added to the subgraph until -1. New operation doesn’t introduce a loop in a topology function. +1. New operation does not introduce a loop in a topology function. 1. Number of inputs and outputs satisfies target criteria. 1. Operation is not a predecessor of topology output. -1. Resulting subgraph can be scheduled (all outputs are broadcast-compatible). +1. Resulting subgraph can be scheduled (all outputs are broadcast-compatible). -If a potential subgraph doesn’t meet any of criteria above, the procedure continues to find a new leader. +If a potential subgraph does not meet any of the criteria above, the procedure continues to find a new leader. ### Lowering @@ -82,27 +82,27 @@ Lowering is a sequence of subgraph (snippet body) traversal passes to generate a #### Common optimizations -Constants are treated as inputs for a subgraph with an exception for scalar cases (since we don’t need to schedule them). `snippets::op::Scalar` is used to represent this kind of constants. +Constants are treated as inputs for a subgraph with an exception for scalar cases (since they do not need to be scheduled). `snippets::op::Scalar` is used to represent this kind of constants. -If such Scalar comes as a second input of Power operation, it’s replaced with `snippets::op::PowerStatic`. +If such Scalar comes as a second input of Power operation, it is replaced with `snippets::op::PowerStatic`. #### Canonicalization -The goal of this step is to apply target independent and schedule related optimizations and to make snippet **schedulable**. +The goal of this step is to apply target-independent and schedule-related optimizations and to make a snippet **schedulable**. ##### Domain normalization All input and output shapes are normalized to 6D for future schedule generation. If shape propagation fails or leads to inconsistent output shapes an exception is raised. -Layout assigned by user code and passed to a `generate` function is propagated through subgraph on this step as well. Layout is passed to a generate function as a `BlockedShapeVector` which is a `std::vector` , while `BlockedShape` is `std::tuple`. For example, if backend supports `NCHW16c` layout and tensor has size of `<1, 42, 17, 31>` and hold single precision floating point this structure should be `std::make_tuple(ngraph::Shape {1, 3, 17, 31, 16}, ngraph::AxisVector {0, 1, 2, 3, 1}, ngraph::element::f32);`. This allows generic layout representation. +The layout assigned by a user code and passed to a `generate` function is propagated through a subgraph on this step as well. The layout is passed to a `generate` function as a `BlockedShapeVector` which is a `std::vector` , while `BlockedShape` is `std::tuple`. For example, if backend supports `NCHW16c` layout and a tensor has a size of `<1, 42, 17, 31>` and holds single precision floating point, this structure should be `std::make_tuple(ngraph::Shape {1, 3, 17, 31, 16}, ngraph::AxisVector {0, 1, 2, 3, 1}, ngraph::element::f32);`. This allows generic layout representation. ##### Dialect conversion -The goal for this step is to transform a subgraph (body function) into a form possible to code generation. Input for this step is subgraph in a canonical form output is a subgraph in snippets dialect. +The goal for this step is to transform a subgraph (body function) into a form possible for code generation. Input for this step is a subgraph in a canonical form. Output is a subgraph in snippets dialect. -Snippet or kernel is formed around the subgraph body in a sequence of traversal steps. Let’s walk through these steps with the smallest possible subgraph which contains out of single `[Add]` operation. +A snippet or a kernel is formed around the subgraph body in a sequence of traversal steps. Let us walk through these steps with the smallest possible subgraph which contains a single `[Add]` operation. -While we extract subgraphs with the tokenization part we explicitly insert Parameters and Results to its body to form a complete nGraph Function. +When subgraphs are extracted with the tokenization part, Parameters and Results are explicitly inserted to its body to form a complete nGraph Function. ```mermaid flowchart LR @@ -118,11 +118,11 @@ class nodeA8 steel1 class nodeA1,nodeA3 steel1 ``` -This function represents operation dependencies in scalar (similar to OpenCL) notation while shapes of tensors are used to generate schedules. At this point kernel-schedule decomposition is made (similar to Halide/OpenCL/TVM) +This function represents operation dependencies in scalar (similar to OpenCL) notation while shapes of tensors are used to generate schedules. At this point, kernel-schedule decomposition is made (similar to Halide/OpenCL/TVM). ###### Explicit memory operations -As a next step explicit memory operations are placed for each input and output. `InsertLoad` and `InsertStore` passes derived from `MatcherPass`. +As a next step, explicit memory operations are placed for each input and output. `InsertLoad` and `InsertStore` passes derive from `MatcherPass`. ```mermaid flowchart LR @@ -142,16 +142,16 @@ class nodeA8 carbon1 class nodeA1,nodeA3,nodeA6,nodeA7 steel1 ``` -By default, memory operations assumes vector memory access, if scalar access is needed special passes `ReplaceLoadsWithScalarLoads` and `ReplaceStoresWithScalarStores` should be executed. +By default, memory operations assume vector memory access. If scalar access is needed, special `ReplaceLoadsWithScalarLoads` and `ReplaceStoresWithScalarStores` passes should be executed. ###### Explicit broadcast -For each operation in body function inputs are checked against broadcasting. In case of parameters to be broadcasted explicit broadcast operation is generated. For example, if for the subgraph above we have `<1, 42, 17, 31>` and `<1, 42, 17, 1>` resulting subgraph is going to be +For each operation in body function inputs are checked against broadcasting. When Parameters are to be broadcasted, an explicit broadcast operation is generated. For example, with `<1, 42, 17, 31>` and `<1, 42, 17, 1>` for the subgraph above, the resulting subgraph will be: ```mermaid flowchart LR - nodeA1("Parameter\n<1, 42, 17, 1>") --> node6("Load\n<1, 42, 17, 1>") - node6("Load\n<1, 42, 17, 1>") --> nodeA9("BroadcastMove\n<1, 42, 17, 31>") + nodeA1("Parameter\n<1, 42, 17, 1>") --> nodeA6("Load\n<1, 42, 17, 1>") + nodeA6("Load\n<1, 42, 17, 1>") --> nodeA9("BroadcastMove\n<1, 42, 17, 31>") nodeA9("BroadcastMove\n<1, 42, 17, 31>") --> nodeA2(Add) nodeA3("Parameter\n<1, 42, 17, 31>") --> nodeA7("Load\n<1, 42, 17, 31>") nodeA7("Load\n<1, 42, 17, 31>") ---> nodeA2(Add) @@ -164,10 +164,10 @@ classDef daisy1 fill:#FFE17A, stroke: #FEC91B, color: #262626 class nodeA2 daisy1 class nodeA5 moss1 class nodeA8,nodeA9 carbon1 -class nodeA1,nodeA3,node6,nodeA7 steel1 +class nodeA1,nodeA3,nodeA6,nodeA7 steel1 ``` -If load followed by broadcast is detected then this pair is replaced by a single Broadcast load instruction. Like the following +If Load followed by Broadcast is detected, then this pair is replaced by a single BroadcastLoad instruction: ```mermaid flowchart LR @@ -187,7 +187,7 @@ class nodeA8 carbon1 class nodeA1,nodeA3,nodeA6,nodeA7 steel1 ``` -Broadcast and regular streaming vector load is possible from the same pointer. Broadcast load should always go before streaming load. Broadcast load for non the most varying dimension is not generated, however it affects the generated schedule. +Broadcast and regular streaming vector load is possible from the same pointer. BroadcastLoad should always go before streaming load. BroadcastLoad for non the most varying dimension is not generated, however it affects the generated schedule. #### Target-specific optimizations @@ -197,13 +197,13 @@ Target developers can plug in to the code generation pipeline some specific opti #### Register allocation -Canonicalized subgraph in a snippets dialect forms a basic block or region inside a snippet (kernel). Registers are allocated globally for the whole subgraph. Since all operations for a subgraph are assumed to be vector, only vector registers are allocated for the first generation of SnippetS. Linear scan register allocation algorithm is used. Register allocator is implemented as a function pass `ngraph::snippets::pass::AssignRegisters` and store allocated registers for each node into `rt_info`. `rt_info` for a node holds a register for Node's output. *However, this part should be refactored batter, either to become target independent or use target specific abstraction to acquire a new register* +Canonicalized subgraph in a snippets dialect forms a basic block or region inside a snippet (kernel). Registers are allocated globally for the whole subgraph. Since all operations for a subgraph are assumed to be vector, only vector registers are allocated for the first generation of SnippetS. Linear scan register allocation algorithm is used. Register allocator is implemented as the `ngraph::snippets::pass::AssignRegisters` function pass and store allocated registers for each node into `rt_info`. `rt_info` for a node holds a register for Node's output. *However, this part should be refactored better, either to become target independent or to use target-specific abstraction to acquire a new register* -#### Schedule generation +#### Schedule generation -The goal of this step is to transform subgraphs in a scalar notation into kernel functions callable from user code. `Kernel` and `Tile` operations are introduced for this purpose. Each of this operation has a constructor from code region described as a collection of operation and operands pairs `Kernel(const std::vector, ngraph::snippets::RegInfo>>& region);`. +The goal of this step is to transform subgraphs in a scalar notation into kernel functions callable from user code. The `Kernel` and `Tile` operations are introduced for this purpose. Each of these operations has a constructor from code region described as a collection of operation and operand pairs `Kernel(const std::vector, ngraph::snippets::RegInfo>>& region);`. -If we return to example above this comes to a following hierarchical IR. If we limit scope to layout oblivious operations with broadcasting support, tile could be generated as a single loop over the most warning dimension. The second `Tile` is generated to handle tails and can be omitted if not needed. Special pass replaces memory operations on vector to scalar versions for tail subgraph. +The example above can be used for the following hierarchical IR. If the scope to layout oblivious operations with broadcasting support is limited, `Tile` could be generated as a single loop over the most warning dimension. The second `Tile` is generated to handle tails and can be omitted if not needed. A special pass replaces memory operations on vector with scalar versions for tail subgraph. ```mermaid graph LR @@ -244,13 +244,13 @@ class nodeD1 no-stroke ``` Where -* `Kernel` constants a collection of the tiles, corresponds to a Subgraph node and responsible for function signature generation, calls generators for all tiles and data sections -* `Tile` contains single subgraph body, vector or scalar -* `Data` corresponds to data section aggregated for all nodes in all Tile’s subgraphs +* `Kernel` is a collection of the tiles, corresponds to a Subgraph node and is responsible for function signature generation. It calls generators for all tiles and data sections. +* `Tile` contains a single subgraph body, a vector or a scalar. +* `Data` corresponds to data section aggregated for all nodes in all Tile’s subgraphs. #### Target code emission -Target code emission is table based. Target is responsible for filling `jitters` table field in `Generator` class. +A target code emission is table based. A target is responsible for filling `jitters` table field in `Generator` class. ``` std::map(std::shared_ptr)>> jitters; @@ -260,9 +260,9 @@ std::map( An OpenVINO plugin is treated as a target for snippets. -Each nGraph node is mapped to a convertor function which creates `Emitter` form this node. Each specific emitter should extend from `Emitter`. It is used to map this node to target code and has `emit_code` and `emit_data` methods. `emit_data` is used during data section generation. All operations from snippets dialect which are legal for code generation should be expressed as operations derived from nGraph Op as well as Emitter derived snippets::Emitter class which knows how to translate this Op to Target specific ISA. (ex. xbyak is a jit backend for CPU plugin). +Each nGraph node is mapped to a converter function which creates `Emitter` form of the node. Each specific emitter should extend from `Emitter`. It is used to map the node to the target code and has `emit_code` and `emit_data` methods. The `emit_data` is used during data section generation. All operations from snippets dialect which are legal for code generation should be expressed as operations derived from nGraph Op as well as `Emitter` derived `snippets::Emitter` class which knows how to translate this Op to Target-specific ISA. (for example, xbyak is a jit backend for CPU plugin). -For minimal code generator support target should provide emitters for the following operations +For minimal code generator support, a target should provide emitters for the following operations: * `Kernel` * `Tile` @@ -273,29 +273,29 @@ For minimal code generator support target should provide emitters for the follow * `Store` * `ScalarStore` -Once a schedule is generated, target code is emitted from a kernel in Generator::generate method by executing Kernel::emit_code function. Since Kernel and Tile represents hierarchical +Once a schedule is generated, a target code is emitted from a kernel in `Generator::generate` method by executing `Kernel::emit_code` function. Since `Kernel` and `Tile` represent hierarchical IR. ##### Dialect extensibility -Target can potentially extend snippets dialect with target specific operation for code emission. It should implement: +A target can potentially extend the snippets dialect with a target-specific operation for code emission. It should implement: -* nGraph operation (ex. `class FMA : public ngraph::op::Op`) -* Emitter for this operation (ex. `class FmaEmitter : public Emitter` ) -* register this pair in `jitters` map +* nGraph operation (for example, `class FMA : public ngraph::op::Op`) +* Emitter for the operation (for example, `class FmaEmitter : public Emitter` ) +* register the pair in `jitters` map ### Calling convention -Parameters for a generated snippet are split into schedule-invariant and schedule-dependent. Schedule-invariant parameters include pointers to input/output tensors and strides for each of them with the same rank as scheduling domain. +Parameters for a generated snippet are split into schedule invariant and schedule dependent. Schedule-invariant parameters include pointers to input/output tensors and strides for each of them with the same rank as the scheduling domain. ### Diagnostics #### Reference mode -Subgraph can be executed with nGraph references if no generator is present. +A subgraph can be executed with nGraph references if no generator is present. ## See also + * [OpenVINO™ README](../../../../README.md) * [OpenVINO SnippetS](../README.md) * [OpenVINO Core Components](../../../README.md) * [Developer documentation](../../../../docs/dev/index.md) - diff --git a/src/common/snippets/include/snippets/generator.hpp b/src/common/snippets/include/snippets/generator.hpp index b02f995c6e5c19..48715235c11f42 100644 --- a/src/common/snippets/include/snippets/generator.hpp +++ b/src/common/snippets/include/snippets/generator.hpp @@ -16,6 +16,8 @@ namespace snippets { auto getRegisters(std::shared_ptr& n) -> ngraph::snippets::RegInfo; +typedef std::pair(const std::shared_ptr&)>, + std::function>(const std::shared_ptr&)>> jitters_value; /** * @interface TargetMachine * @brief Base class Target machine representation. Target derives from this class to provide generator information about supported emitters @@ -50,7 +52,16 @@ class TargetMachine { if (jitter == jitters.end()) { throw ngraph_error(std::string("Target code emitter is not available for ") + type.name + " operation."); } - return jitter->second; + return jitter->second.first; + } + + std::function>(const std::shared_ptr&)> + get_supported_precisions(const ngraph::DiscreteTypeInfo type) const { + auto jitter = jitters.find(type); + if (jitter == jitters.end()) { + throw ngraph_error(std::string("Target code emitter is not available for ") + type.name + " operation."); + } + return jitter->second.second; } /** @@ -63,7 +74,7 @@ class TargetMachine { virtual ~TargetMachine() = default; protected: - std::map(std::shared_ptr)>> jitters; + std::map jitters; }; /** diff --git a/src/common/snippets/include/snippets/op/subgraph.hpp b/src/common/snippets/include/snippets/op/subgraph.hpp index ec55f076301c64..46e6633f61b8aa 100644 --- a/src/common/snippets/include/snippets/op/subgraph.hpp +++ b/src/common/snippets/include/snippets/op/subgraph.hpp @@ -101,11 +101,17 @@ class Subgraph : public ov::op::util::SubGraphOp { bool is_quantized() const { return config.m_is_quantized; } bool has_type_relaxed_ops() const { return config.m_has_type_relaxed_ops; } bool has_domain_sensitive_ops() const { return config.m_has_domain_sensitive_ops; } - - snippets::Schedule generate(const BlockedShapeVector& output_shapes, const BlockedShapeVector& input_shapes, ngraph::pass::Manager& opt, + snippets::Schedule generate(const BlockedShapeVector& output_shapes, + const BlockedShapeVector& input_shapes, + ngraph::pass::Manager& pre_dialect, + ngraph::pass::Manager& post_dialect, + ngraph::pass::Manager& post_precision, const void* compile_params = nullptr); snippets::Schedule generate(const BlockedShapeVector& output_shapes, const BlockedShapeVector& input_shapes, const void* compile_params = nullptr); - snippets::Schedule generate(ngraph::pass::Manager &opt, const void* compile_params = nullptr); + snippets::Schedule generate(ngraph::pass::Manager& pre_dialect, + ngraph::pass::Manager& post_dialect, + ngraph::pass::Manager& post_precision, + const void* compile_params = nullptr); snippets::Schedule generate(const void* compile_params = nullptr); ov::PartialShape canonicalize(const BlockedShapeVector& output_shapes, const BlockedShapeVector& input_shapes); std::vector reshape_body(const std::vector& input_shapes); @@ -132,6 +138,8 @@ class Subgraph : public ov::op::util::SubGraphOp { // This check returns True if Constant op which is input of this op should be inside Subgraph body static auto constant_input_should_be_inside_body(const std::shared_ptr& node) -> bool; + static bool check_broadcast(const std::shared_ptr& node) noexcept; + private: void align_element_types(const BlockedShapeVector& outputShapes, const BlockedShapeVector& inputShapes); void convert_to_snippet_dialect(); @@ -164,8 +172,6 @@ class Subgraph : public ov::op::util::SubGraphOp { public: // True if Subgraph contains FakeQuantize -> FQ decomposition should be called bool m_is_quantized = false; - // True if we should align element types indise body - bool m_is_needed_to_align_precision = false; // True if Subgraph contains TypeRelaxed nodes -> for several streams in tp mode we should copy body using mutexes // because TypeRelaxed::copy_with_new_inputs() isn't save-thread method bool m_has_type_relaxed_ops = false; diff --git a/src/common/snippets/include/snippets/pass/align_element_type.hpp b/src/common/snippets/include/snippets/pass/align_element_type.hpp deleted file mode 100644 index 0b1f831091c4cc..00000000000000 --- a/src/common/snippets/include/snippets/pass/align_element_type.hpp +++ /dev/null @@ -1,46 +0,0 @@ -// Copyright (C) 2018-2023 Intel Corporation -// SPDX-License-Identifier: Apache-2.0 -// - -#pragma once - -#include -#include - -namespace ngraph { -namespace snippets { -namespace pass { - -/** - * @interface AlignElementType - * @brief Wrap sequence of operations which doesn't support execution on original element type by ConvertSaturation - * and reset element type for type relaxed nodes inside body to align element type between nodes. - * Example 1: - * - After FQ decomposition there may be Convert[U8/I8]. If after the Convert there are other operations - * that don't support U8/I8, new ConvertSaturation[exec_type] will be inserted after the FQ decomposition - * to execute these operations on supported element type - * Example 2: - * - Input[I8] -> Unsupported I8 op -> Movement op -> Output[I8]. There will be inserted two ConvertSaturation: - * * ConvertSatiration[exec_type] before op which is unsupported I8 - * * ConvertSaturation[I8] before Movement op to return original low precision. - * Note: We cannot just remove original Convert[I8/U8] in Example 1 because we should cover two things: - * * allow execution of operations on supported element type for them - * * keep computations mathematically equivalent to the original function - * Thus, for these cases we should have the following pipeline: FP32 -> Convert[I8/U8] -> Convert[FP32] -> FP32 - * Note: We shouldn't call validate_and_infer_type() after Convert insertions to avoid element type conflicts on inputs of ops - * @ingroup snippets - */ -class AlignElementType: public ngraph::pass::FunctionPass { -public: - OPENVINO_RTTI("AlignElementType", "0"); - AlignElementType(const ov::element::Type exec_type = ov::element::f32); - bool run_on_model(const std::shared_ptr& m) override; - - static bool opNeedsAlignElementType(const std::shared_ptr& n, const ov::element::Type exec_type = ov::element::f32); -private: - ov::element::Type exec_type; -}; - -} // namespace pass -} // namespace snippets -} // namespace ngraph diff --git a/src/common/snippets/include/snippets/pass/fq_decomposition.hpp b/src/common/snippets/include/snippets/pass/fq_decomposition.hpp index 284640d8c18122..cfb9ff41955867 100644 --- a/src/common/snippets/include/snippets/pass/fq_decomposition.hpp +++ b/src/common/snippets/include/snippets/pass/fq_decomposition.hpp @@ -29,7 +29,7 @@ namespace pass { * * Expand brackets: * round(x * (levels-1) / (ih - il) - il * (levels-1) / (ih - il)) * (oh - ol) / (levels-1) + ol - * + * * Marking: * - isc := (levels-1) / (ih - il) * - ish := -il * isc @@ -37,7 +37,7 @@ namespace pass { * - osh := ol * Final expression: * round(x * isc + ish) * osc + osh - * + * * Some optimizations (example for scalars): * 1. If output element type of FQ is U8 and il = 0, ish = 0, osc = 1, osh = 0, there is enough expression: x * isc * 2. If output element type of FQ is I8 and ish ~= 128, osc = 1, osh ~= -128, il * isc ~= -128, ih * isc ~= 127 there is enough expression: x * isc @@ -54,7 +54,6 @@ class FakeQuantizeDecomposition : public ngraph::pass::MatcherPass { public: FakeQuantizeDecomposition(); - static bool isAllScalarConstant(const std::shared_ptr& node); static bool getScalesAndShifts(const std::shared_ptr& fq_node, std::vector& cl, std::vector& ch, diff --git a/src/common/snippets/include/snippets/pass/propagate_precision.hpp b/src/common/snippets/include/snippets/pass/propagate_precision.hpp new file mode 100644 index 00000000000000..d0920766f632fd --- /dev/null +++ b/src/common/snippets/include/snippets/pass/propagate_precision.hpp @@ -0,0 +1,48 @@ +// Copyright (C) 2023 Intel Corporation +// SPDX-License-Identifier: Apache-2.0 +// + +#pragma once + +#include +#include +#include "snippets/generator.hpp" + +namespace ngraph { +namespace snippets { +namespace pass { + +/** + * @class PropagatePrecision + * @ingroup snippets + * @brief PropagatePrecision transformation propagate precision from parameters to results. + */ +class PropagatePrecision: public ngraph::pass::FunctionPass { +public: + OPENVINO_RTTI("PropagatePrecision", "0"); + PropagatePrecision(const std::shared_ptr& target_machine); + bool run_on_model(const std::shared_ptr& m) override; + + static std::vector get_precisions( + const std::vector& input_precisions, + const std::set>& supported_precisions) noexcept; + + // if can_be_removed returns true then actual convertion (actual_before => actual_after) + // can be replaced to required (actual_before => required_after) + static bool can_be_removed( + const element::Type& actual_before, + const element::Type& actual_after, + const element::Type& required_after) noexcept; + + // if can_be_fused returns true then actual convertion can be replaced to required + static bool can_be_fused( + const element::Type& actual, + const element::Type& required) noexcept; + +private: + const std::shared_ptr target_machine; +}; + +} // namespace pass +} // namespace snippets +} // namespace ngraph diff --git a/src/common/snippets/src/op/subgraph.cpp b/src/common/snippets/src/op/subgraph.cpp index 93e3b47881524a..f8953745520aff 100644 --- a/src/common/snippets/src/op/subgraph.cpp +++ b/src/common/snippets/src/op/subgraph.cpp @@ -11,6 +11,7 @@ #include "snippets/pass/insert_movebroadcast.hpp" #include "snippets/pass/broadcast_to_movebroadcast.hpp" #include "snippets/pass/load_movebroadcast_to_broadcastload.hpp" +#include "snippets/pass/propagate_precision.hpp" #include "snippets/pass/assign_registers.hpp" #include "snippets/pass/convert_constants.hpp" #include "snippets/pass/convert_power_to_powerstatic.hpp" @@ -18,7 +19,6 @@ #include "snippets/pass/insert_loops.hpp" #include "snippets/pass/transpose_decomposition.hpp" #include "snippets/pass/transform_convert.hpp" -#include "snippets/pass/align_element_type.hpp" #include "snippets/pass/matmul_to_brgemm.hpp" #include "snippets/pass/fuse_transpose_brgemm.hpp" #include "snippets/pass/softmax_decomposition.hpp" @@ -62,10 +62,6 @@ void snippets::op::Subgraph::init_config() { ov::is_type(op); config.m_has_type_relaxed_ops = config.m_has_type_relaxed_ops || std::dynamic_pointer_cast(op); - config.m_is_needed_to_align_precision = config.m_is_needed_to_align_precision || - is_quantized() || - has_type_relaxed_ops() || - snippets::pass::AlignElementType::opNeedsAlignElementType(op, execution_element_type); config.m_has_domain_sensitive_ops = config.m_has_domain_sensitive_ops || ov::is_type(op) || ov::is_type(op) || @@ -359,6 +355,14 @@ ov::PartialShape snippets::op::Subgraph::canonicalize(const BlockedShapeVector& return master_shape; } +bool snippets::op::Subgraph::check_broadcast(const std::shared_ptr& node) noexcept { + const auto elementwise = std::dynamic_pointer_cast(node); + return + (elementwise == nullptr) || + (elementwise->get_input_partial_shape(0).size() == elementwise->get_input_partial_shape(1).size()) || + (elementwise->get_autob().m_type != ov::op::AutoBroadcastType::PDPD); +} + void snippets::op::Subgraph::align_element_types(const BlockedShapeVector& outputShapes, const BlockedShapeVector& inputShapes) { // We should insert Convert before Results to set original output element type if needed @@ -369,35 +373,34 @@ void snippets::op::Subgraph::align_element_types(const BlockedShapeVector& outpu const auto convert = std::make_shared( body_results[i]->get_input_node_shared_ptr(0), needed_out_type); body_results[i]->set_argument(0, convert); + body_results[i]->validate_and_infer_types(); } } // We should change existing element type to original for Parameters if needed - const auto& body_parameters = body_ptr()->get_parameters(); + const auto& parameters = body_ptr()->get_parameters(); for (size_t i = 0; i < inputShapes.size(); ++i) { const auto needed_in_type = std::get<2>(inputShapes[i]); - if (body_parameters[i]->get_element_type() != needed_in_type) { - body_parameters[i]->set_element_type(needed_in_type); - config.m_is_needed_to_align_precision = true; - } - } + const auto& parameter = parameters[i]; + if (parameter->get_element_type() != needed_in_type) { + const auto parameter_output = parameter->output(0); + const auto convert = std::make_shared( + parameter_output, + parameter_output.get_element_type()); + ngraph::copy_runtime_info(parameter, convert); - // We should align element type inside body using the corresponding pass: - // - Insert Convert before operations that doesn't support original element type for execution - // - Insert reverse Convert before operations that support original element type - // but have inputs that doesn't support it (because before them will be inserted Convert with exec_type - first point) - // - Then we should use ConstantFolding pass to convert element type of Scalars before inference. - // - Eliminate redundant Converts which can be inserted in AlignElementType() pass - ngraph::pass::Manager manager; - if (config.m_is_needed_to_align_precision) { - manager.register_pass(execution_element_type); - manager.register_pass(); - // TODO [100041] : In some cases AlignElementType pass can insert extra Convert because - // the pass doesn't know real precisions in real time. - // We call EliminateConverts pass to remove them - manager.register_pass(); + for (const auto input : parameter_output.get_target_inputs()) { + const auto& input_node = input.get_node(); + if (input_node == convert.get()) { + continue; + } + input_node->set_argument(input.get_index(), convert->output(0)); + } + + parameter->set_element_type(needed_in_type); + parameter->validate_and_infer_types(); + } } - manager.run_passes(body_ptr()); } void snippets::op::Subgraph::initialize_buffer_scratchpad_size() { @@ -593,24 +596,39 @@ snippets::Schedule snippets::op::Subgraph::generate(const BlockedShapeVector& ou snippets::Schedule snippets::op::Subgraph::generate(const BlockedShapeVector& output_shapes, const BlockedShapeVector& input_shapes, - ngraph::pass::Manager& opt, + ngraph::pass::Manager& pre_dialect, + ngraph::pass::Manager& post_dialect, + ngraph::pass::Manager& post_precision, const void* compile_params) { canonicalize(output_shapes, input_shapes); - return generate(opt, compile_params); + return generate(pre_dialect, post_dialect, post_precision, compile_params); } snippets::Schedule snippets::op::Subgraph::generate(const void* compile_params) { auto mngr = ngraph::pass::Manager(); - return generate(mngr, compile_params); + return generate(mngr, mngr, mngr, compile_params); } -snippets::Schedule snippets::op::Subgraph::generate(ngraph::pass::Manager& opt, const void* compile_params) { +snippets::Schedule snippets::op::Subgraph::generate( + ngraph::pass::Manager& pre_dialect, + ngraph::pass::Manager& post_dialect, + ngraph::pass::Manager& post_precision, + const void* compile_params) { INTERNAL_OP_SCOPE(Subgraph); OV_ITT_SCOPED_TASK(ngraph::pass::itt::domains::SnippetsTransform, "Snippets::op::generate") NGRAPH_CHECK(m_generator != nullptr, "generate is called while generator is not set"); + pre_dialect.run_passes(body_ptr()); convert_to_snippet_dialect(); - opt.run_passes(body_ptr()); + post_dialect.run_passes(body_ptr()); + + ngraph::pass::Manager precision_manager; + precision_manager.register_pass(m_generator->get_target_machine()); + precision_manager.register_pass(); + precision_manager.register_pass(); + precision_manager.run_passes(body_ptr()); + + post_precision.run_passes(body_ptr()); // After all passes, when all optimizations are completed and all MemoryAccess ops are inserted, // we can calculate common buffer scratchpad size and propagate offset from Buffer to the corresponding MemoryAccess ops diff --git a/src/common/snippets/src/pass/align_element_type.cpp b/src/common/snippets/src/pass/align_element_type.cpp deleted file mode 100644 index 54399e309f646c..00000000000000 --- a/src/common/snippets/src/pass/align_element_type.cpp +++ /dev/null @@ -1,101 +0,0 @@ -// Copyright (C) 2018-2023 Intel Corporation -// SPDX-License-Identifier: Apache-2.0 -// - -#include - -#include "snippets/snippets_isa.hpp" -#include "snippets/op/convert_saturation.hpp" -#include "snippets/pass/align_element_type.hpp" -#include "snippets/utils.hpp" -#include "ov_ops/type_relaxed.hpp" -#include "ngraph/op/util/op_types.hpp" - -#include - -namespace { - -inline auto is_in_op(const std::shared_ptr& n) -> bool { - return ov::is_type(n) - || ov::is_type(n); -} - -// At the moment Subgraph supports only Eltwise, Select, Convert, Broadcast and FQ (which is decomposed into Eltwises and Convert) with -// Softmax (which is decomposed into Eltwises as well) -// And only Eltwise and Select ops supports execution only in "exec_type". So we can check op type from the opposite -// NOTE: This check is only for executable which isn't Parameter/Constant/Result -inline auto op_supports_only_exec_type(const std::shared_ptr& n) -> bool { - return !is_in_op(n) && - !ov::is_type(n) && - !ov::is_type(n) && - !ov::is_type(n) && - !ov::is_type(n) && - !ov::is_type(n) && - !ov::is_type(n) && - !ov::is_type(n); -} - -} // namespace - -ngraph::snippets::pass::AlignElementType::AlignElementType(const ov::element::Type exec_type) : exec_type(exec_type) { } - -bool ngraph::snippets::pass::AlignElementType::run_on_model(const std::shared_ptr &m) { - RUN_ON_FUNCTION_SCOPE(AlignElementType); - - auto insertConvert = [](const std::shared_ptr& op, const size_t idx, const ov::element::Type& element_type) -> void { - auto convert = std::make_shared(op->input(idx).get_source_output(), element_type); - ngraph::copy_runtime_info(op->get_input_node_shared_ptr(idx), convert); - op->set_argument(idx, convert); - }; - - // NOTE: We don't call validate_and_infer_types() to avoid precision conflicts on inputs - bool rewritten = false; - auto ops = m->get_ordered_ops(); - for (auto& op : ops) { - if (is_in_op(op)) { - continue; - } - - if (op_supports_only_exec_type(op)) { - for (size_t i = 0; i < op->inputs().size(); i++) { - auto shared_input = op->get_input_node_shared_ptr(i); - auto existing_convert = ov::as_type_ptr(shared_input); - // We should insert Convert before Ops, which supports only exec element type, only when: - // - Input is Convert with unsupported destination type - // - Input is Op which support any element type - // We couldn't unite these conditions and just check that element type isn't supported exec type - // because we don't call validate_and_infer_types() so we don't know new precisions after setting of original - // input and output element types - if ((existing_convert && existing_convert->get_destination_type() != exec_type) || - (!op_supports_only_exec_type(shared_input))) { - insertConvert(op, i, exec_type); - rewritten |= true; - } - } - if (auto tr_node = std::dynamic_pointer_cast(op)) { - tr_node->set_overridden_output_type(exec_type, 0); - rewritten |= true; - } - } else { // branch for Movement ops, MatMul ops in the future and for the Convert, Result - for (size_t i = 0; i < op->inputs().size(); i++) { - auto shared_input = op->get_input_node_shared_ptr(i); - // it's original element type because we don't use validate_and_infer_type() anywhere - const auto original_eltype = op->input(i).get_element_type(); - // If before op there is another op that doesn't support execution on original element type, we know that - // before this op will be inserted reverse Convert to support execution on supported element type (first branch of condition). - // So we should return original element type for operations that can support low precision - if (op_supports_only_exec_type(shared_input) && original_eltype != exec_type) { - insertConvert(op, i, original_eltype); - rewritten |= true; - } - } - } - } - - return rewritten; -} - -bool ngraph::snippets::pass::AlignElementType::opNeedsAlignElementType(const std::shared_ptr& op, const ov::element::Type exec_type) { - // At the moment Snippets support only Eltwise/Convert/FQ/Select/Softmax/Broadcast which one output so we can just call get_element_type() - return op_supports_only_exec_type(op) && op->get_element_type() != exec_type; -} diff --git a/src/common/snippets/src/pass/collapse_subgraph.cpp b/src/common/snippets/src/pass/collapse_subgraph.cpp index 00c7efc6d5529e..af962adaa64432 100644 --- a/src/common/snippets/src/pass/collapse_subgraph.cpp +++ b/src/common/snippets/src/pass/collapse_subgraph.cpp @@ -219,7 +219,11 @@ const std::set ngraph::snippets::pass::TokenizeSnippets:: { ngraph::element::f32, ngraph::element::bf16, ngraph::element::i8, ngraph::element::u8 }; bool TokenizeSnippets::AppropriateForSubgraph(const std::shared_ptr &node) { - return is_supported_op(node) && has_supported_in_out(node) && node->get_control_dependencies().empty(); + return + is_supported_op(node) && + has_supported_in_out(node) && + node->get_control_dependencies().empty() && + snippets::op::Subgraph::check_broadcast(node); } TokenizeSnippets::TokenizeSnippets() { diff --git a/src/common/snippets/src/pass/common_optimizations.cpp b/src/common/snippets/src/pass/common_optimizations.cpp index 787fb8f650d5be..04278526ce8c34 100644 --- a/src/common/snippets/src/pass/common_optimizations.cpp +++ b/src/common/snippets/src/pass/common_optimizations.cpp @@ -17,7 +17,7 @@ #include "snippets/utils.hpp" #include "snippets/itt.hpp" -NGRAPH_RTTI_DEFINITION(ngraph::snippets::pass::CommonOptimizations, "Snippets::CommonOptimizations", 0); +NGRAPH_RTTI_DEFINITION(ngraph::snippets::pass::CommonOptimizations, "Snippets::CommonOptimizations"); namespace ngraph { namespace snippets { diff --git a/src/common/snippets/src/pass/fq_decomposition.cpp b/src/common/snippets/src/pass/fq_decomposition.cpp index 5c2cfd6b0f82c3..9688e0a0e22940 100644 --- a/src/common/snippets/src/pass/fq_decomposition.cpp +++ b/src/common/snippets/src/pass/fq_decomposition.cpp @@ -36,11 +36,6 @@ bool isValidRangesInputs(const std::shared_ptr& fq }); } -bool is_scalar_constant(const std::shared_ptr& source_output_node) { - return ngraph::is_type(source_output_node) && - ngraph::shape_size(source_output_node->get_shape()) == 1; -} - } // namespace ngraph::snippets::pass::FakeQuantizeDecomposition::FakeQuantizeDecomposition() { @@ -182,13 +177,6 @@ ngraph::snippets::pass::FakeQuantizeDecomposition::FakeQuantizeDecomposition() { register_matcher(m, callback); } -bool ngraph::snippets::pass::FakeQuantizeDecomposition::isAllScalarConstant(const std::shared_ptr& node) { - return is_scalar_constant(node->get_input_node_shared_ptr(1)) && - is_scalar_constant(node->get_input_node_shared_ptr(2)) && - is_scalar_constant(node->get_input_node_shared_ptr(3)) && - is_scalar_constant(node->get_input_node_shared_ptr(4)); -} - bool ngraph::snippets::pass::FakeQuantizeDecomposition::getScalesAndShifts( const std::shared_ptr& fq_node, std::vector& cl, diff --git a/src/common/snippets/src/pass/propagate_precision.cpp b/src/common/snippets/src/pass/propagate_precision.cpp new file mode 100644 index 00000000000000..19be34b4e97648 --- /dev/null +++ b/src/common/snippets/src/pass/propagate_precision.cpp @@ -0,0 +1,293 @@ +// Copyright (C) 2023 Intel Corporation +// SPDX-License-Identifier: Apache-2.0 +// + +#include "snippets/pass/propagate_precision.hpp" + +#include +#include +#include "ov_ops/type_relaxed.hpp" +#include "snippets/itt.hpp" +#include "ngraph/rt_info.hpp" + +using namespace ngraph; + +ngraph::snippets::pass::PropagatePrecision::PropagatePrecision( + const std::shared_ptr& target_machine) : target_machine(target_machine) { +} + +bool ngraph::snippets::pass::PropagatePrecision::run_on_model(const std::shared_ptr& f) { + RUN_ON_MODEL_SCOPE(PropagatePrecision); + OV_ITT_SCOPED_TASK(ngraph::pass::itt::domains::SnippetsTransform, "Snippets::op::PropagatePrecision") + + std::unordered_map, element::Type> result_types; + auto results = f->get_results(); + for (auto& result : results) { + result_types.emplace(result, result->get_input_element_type(0)); + } + + bool was_updated = true; + for (const auto& op : f->get_ordered_ops()) { + auto type_info = op->get_type_info(); + OPENVINO_ASSERT( + target_machine->has(type_info), + "operation '" + std::string(type_info.version_id) + "::" + std::string(type_info.name) + "' was not found in target machine"); + + auto exec = target_machine->get_supported_precisions(type_info); + const auto supported_precisions = exec(op); + if (supported_precisions.empty()) { + continue; + } + + // There are two operation types which break precision propagation: + // 1) Existing convertion operations. Solution: remove convertion + // operation before general algo + // 2) Type relaxed based operations. Will be resolved by snippet opset. + + auto input_precisions_were_changed = false; + + for (const auto& input : op->inputs()) { + const auto convert = ngraph::as_type(input.get_source_output().get_node()); + if (convert == nullptr) { + continue; + } + + const auto precision_before = convert->get_input_element_type(0); + const auto precision_after = convert->get_output_element_type(0); + if (can_be_removed(precision_before, precision_after, precision_before)) { + op->set_argument(input.get_index(), convert->input(0).get_source_output()); + input_precisions_were_changed = true; + } + } + + std::vector input_precisions; + for (const auto& input : op->inputs()) { + const auto input_precision = input.get_source_output().get_element_type(); + input_precisions.push_back(input_precision); + } + + assert(std::all_of( + supported_precisions.begin(), + supported_precisions.end(), + [&input_precisions](const std::vector& precisions) { + return precisions.size() == input_precisions.size(); + }) && "input precisions count is not equal for supported precisions"); + + // update input precisions + // if possible then convert precisions to supported + if (!supported_precisions.empty() && + std::all_of( + supported_precisions.begin(), + supported_precisions.end(), + [&input_precisions](const std::vector& precisions) { + return precisions != input_precisions; + })) { + auto precisions = get_precisions(input_precisions, + supported_precisions); + OPENVINO_ASSERT( + !precisions.empty(), + "there are no supported precisions for operation '" + std::string(type_info.version_id) + "::" + std::string(type_info.name) + "'"); + + auto find_convert = []( + const ngraph::Output parent_output, + const ngraph::element::Type convert_type) -> snippets::op::ConvertSaturation* { + for (const auto& input : parent_output.get_target_inputs()) { + const auto child = ngraph::as_type(input.get_node()); + if ((child != nullptr) && (child->get_output_element_type(0) == convert_type)) { + return child; + } + } + return nullptr; + }; + + for (size_t i = 0; i < op->get_input_size(); ++i) { + const auto& op_input = op->input(i); + const auto& required_after = precisions[i]; + auto parent_output = op_input.get_source_output(); + const auto actual_before = parent_output.get_element_type(); + if (actual_before != required_after) { + was_updated = true; + input_precisions_were_changed = true; + auto existing_convert = ngraph::as_type( + parent_output.get_node()); + + if (existing_convert == nullptr) { + existing_convert = find_convert(parent_output, required_after); + if (existing_convert != nullptr) { + // reuse existing convert + op->set_argument(op_input.get_index(), existing_convert->shared_from_this()); + continue; + } + } + + if (existing_convert == nullptr) { + // create new Convert + auto convert = std::make_shared( + parent_output, + required_after); + ngraph::copy_runtime_info(parent_output.get_node_shared_ptr(), convert); + op->set_argument(op_input.get_index(), convert); + continue; + } + + const auto actual_before = existing_convert->get_input_element_type(0); + const auto actual_after = existing_convert->get_output_element_type(0); + + if (can_be_removed(actual_before, actual_after, required_after)) { + // remove existing convert + existing_convert->output(0).replace(parent_output); + continue; + } + + if (can_be_fused(actual_after, required_after)) { + // fuse existing convert + auto convert = std::make_shared( + existing_convert->get_input_node_shared_ptr(0), + required_after); + ngraph::copy_runtime_info(parent_output.get_node_shared_ptr(), convert); + op->set_argument(op_input.get_index(), convert); + continue; + } + + // create new convert + auto convert = std::make_shared( + existing_convert->output(0), + required_after); + ngraph::copy_runtime_info(existing_convert->output(0).get_node()->shared_from_this(), convert); + op->set_argument(op_input.get_index(), convert); + } + } + } + + auto type_relaxed_node = std::dynamic_pointer_cast(op); + if (input_precisions_were_changed || (type_relaxed_node != nullptr)) { + // update output precision + std::vector op_output_types; + for (auto& output : op->outputs()) { + op_output_types.push_back(output.get_element_type()); + } + + if (type_relaxed_node != nullptr) { + // TODO: user story 104284 + // to keep previous functionality + // unary and binary element-wise operations are supported + // will be replaced to snippets opset later + const auto op_element_type = op->get_input_element_type(0); + if (type_relaxed_node->get_overridden_output_type(0) != op_element_type) { + was_updated = true; + OPENVINO_ASSERT(op->get_output_size() == 1ull, "operation with several output is not supported"); + + type_relaxed_node->set_overridden_output_type(op_element_type, 0); + op->validate_and_infer_types(); + } + } else { + op->validate_and_infer_types(); + } + + for (size_t i = 0; i < op->get_output_size(); ++i) { + auto output = op->output(i); + + if (output.get_element_type() != op_output_types[i]) { + was_updated = true; + auto convert = std::make_shared( + output, + op_output_types[i]); + ngraph::copy_runtime_info(output.get_node_shared_ptr(), convert); + + for (auto& input : output.get_target_inputs()) { + auto child = input.get_node(); + if (child == convert.get()) { + continue; + } + + input.replace_source_output(convert->output(0)); + + + if (ngraph::is_type(input.get_node())) { + input.get_tensor_ptr()->add_names(output.get_tensor_ptr()->get_names()); + + const std::string original_name = op->get_friendly_name(); + op->set_friendly_name(original_name + "_original"); + convert->set_friendly_name(original_name); + } + } + output.get_tensor_ptr()->set_names({}); + } + } + } + } + + for (auto it = result_types.begin(); it != result_types.end(); ++it) { + const auto result = it->first; + const auto actual_type = result->get_input_element_type(0); + const auto expected_type = it->second; + if (actual_type != it->second) { + was_updated = true; + auto convert = std::make_shared( + result->get_input_node_shared_ptr(0), + expected_type); + ngraph::copy_runtime_info(result->get_input_node_shared_ptr(0), convert); + result->set_argument(0, convert); + } + } + + return was_updated; +} + +bool ngraph::snippets::pass::PropagatePrecision::can_be_removed( + const element::Type& actual_before, + const element::Type& actual_after, + const element::Type& required_after) noexcept { + if (actual_before != required_after) { + return false; + } + + return can_be_fused(actual_after, actual_before); +} + +bool ngraph::snippets::pass::PropagatePrecision::can_be_fused( + const element::Type& actual, + const element::Type& required) noexcept { + // custom conditions: between int & float precisions + if (((actual == element::bf16) || (actual == element::f16) || (actual == element::f32)) && + ((required == element::u8) || (required == element::i8))) { + return true; + } + + if ((actual == element::f32) && ((required == element::u16) || (required == element::i16))) { + return true; + } + + // general conditions: any new added precision will support + return + (actual.is_real() == required.is_real()) && + (actual.bitwidth() >= required.bitwidth()); +} + +std::vector ngraph::snippets::pass::PropagatePrecision::get_precisions( + const std::vector& input_precisions, + const std::set>& supported_precisions_pack) noexcept { + bool was_found = false; + for (const auto& supported_precisions : supported_precisions_pack) { + for (size_t i = 0; i < supported_precisions.size(); ++i) { + const auto& supported_precision = supported_precisions[i]; + const auto& input_precision = input_precisions[i]; + if ((supported_precision.is_real() != input_precision.is_real()) || + (input_precision.bitwidth() > supported_precision.bitwidth())) { + was_found = false; + break; + } + + was_found = true; + } + if (was_found) { + return supported_precisions; + } + } + + if (!supported_precisions_pack.empty()) { + return *supported_precisions_pack.begin(); + } + + return {}; +} diff --git a/src/common/snippets/tests/include/lowering_utils.hpp b/src/common/snippets/tests/include/lowering_utils.hpp index 4fd06f760f3207..7dfa71a4b6a7f7 100644 --- a/src/common/snippets/tests/include/lowering_utils.hpp +++ b/src/common/snippets/tests/include/lowering_utils.hpp @@ -16,7 +16,7 @@ using BlockedShapeVector = ngraph::snippets::op::Subgraph::BlockedShapeVector; class DummyEmitter : public ngraph::snippets::Emitter { public: // Here I pass Add to Emitter, but could be any other op, since it's ignored anyway. - DummyEmitter() : ngraph::snippets::Emitter(std::make_shared()) {} + DummyEmitter(const std::vector& custom_opset = {}) : ngraph::snippets::Emitter(std::make_shared()) {} void emit_code(const std::vector&, const std::vector&, const std::vector&, @@ -52,7 +52,9 @@ class LoweringTests : public TransformationTestsF { static std::shared_ptr getSubgraph(const std::shared_ptr& f); static std::shared_ptr getLoweredSubgraph(const std::shared_ptr& f, const ov::PartialShape& master_shape, - ov::pass::Manager target_optimizations = {}, + ov::pass::Manager pre_dialect = {}, + ov::pass::Manager post_dialect = {}, + ov::pass::Manager post_precision = {}, const std::shared_ptr generator = nullptr); static std::shared_ptr getTokenizedSubgraph(const std::shared_ptr& f); ov::PartialShape master_shape{}; diff --git a/src/common/snippets/tests/include/pass/precision_propagation.hpp b/src/common/snippets/tests/include/pass/precision_propagation.hpp new file mode 100644 index 00000000000000..a60b9161ab4fc4 --- /dev/null +++ b/src/common/snippets/tests/include/pass/precision_propagation.hpp @@ -0,0 +1,54 @@ +// Copyright (C) 2023 Intel Corporation +// SPDX-License-Identifier: Apache-2.0 +// + +#pragma once + +#include "lowering_utils.hpp" +#include "snippets_helpers.hpp" + +namespace ov { +namespace test { +namespace snippets { + +class PrecisionPropagationParamsValues { +public: + class Actual { + public: + std::pair convertion_before_op1; + element::Type convertion_before_op2_1; + std::pair convertion_before_op2_2; + std::set> op1_supported_precisions; + std::set> op2_supported_precisions; + }; + + class Expected { + public: + std::pair convertion_before_op1; + element::Type convertion_before_op2_1; + std::pair convertion_before_op2_2; + element::Type convertion_after_op2; + }; + + std::vector input_types; + Actual actual; + Expected expected; +}; + +typedef std::tuple< + std::pair, // input shapes + PrecisionPropagationParamsValues +> PrecisionPropagationParams; + +class PrecisionPropagationTest : public TransformationTestsF, + public testing::WithParamInterface { +public: + static std::string getTestCaseName(testing::TestParamInfo obj); + +protected: + std::shared_ptr snippets_function; +}; + +} // namespace snippets +} // namespace test +} // namespace ov diff --git a/src/common/snippets/tests/src/lowering_utils.cpp b/src/common/snippets/tests/src/lowering_utils.cpp index a536a0317eae12..55480e95dae510 100644 --- a/src/common/snippets/tests/src/lowering_utils.cpp +++ b/src/common/snippets/tests/src/lowering_utils.cpp @@ -11,10 +11,12 @@ namespace ov { namespace test { namespace snippets { -DummyTargetMachine::DummyTargetMachine(const std::vector& custom_opset) { - auto dummy_functor = [](const std::shared_ptr& n) { - return std::make_shared(); +DummyTargetMachine::DummyTargetMachine(const std::vector&custom_opset) { + auto dummy_functor = ngraph::snippets::jitters_value { + [](const std::shared_ptr& n) { return std::make_shared(); }, + [](const std::shared_ptr& n) { return std::set>{};} }; + jitters[op::v0::Parameter::get_type_info_static()] = dummy_functor; jitters[op::v0::Constant::get_type_info_static()] = dummy_functor; jitters[op::v0::Result::get_type_info_static()] = dummy_functor; @@ -97,7 +99,9 @@ std::shared_ptr LoweringTests::getSubgraph(const std::shared_ptr LoweringTests::getLoweredSubgraph(const std::shared_ptr &f, const ov::PartialShape& master_shape, - ov::pass::Manager target_optimizations, + ov::pass::Manager pre_dialect, + ov::pass::Manager post_dialect, + ov::pass::Manager post_precision, const std::shared_ptr generator) { auto subgraph = getTokenizedSubgraph(f); subgraph->set_generator(generator == nullptr ? std::make_shared() : generator); @@ -119,7 +123,7 @@ std::shared_ptr LoweringTests::getLoweredSubgrap } body_rt_info["PluginShapesOverride"] = new_shapes; subgraph->set_tile_rank(2); - subgraph->generate(target_optimizations); + subgraph->generate(pre_dialect, post_precision, post_precision); return subgraph; } diff --git a/src/common/snippets/tests/src/pass/precision_propagation.cpp b/src/common/snippets/tests/src/pass/precision_propagation.cpp new file mode 100644 index 00000000000000..3c7da4d06aa165 --- /dev/null +++ b/src/common/snippets/tests/src/pass/precision_propagation.cpp @@ -0,0 +1,294 @@ +// Copyright (C) 2023 Intel Corporation +// SPDX-License-Identifier: Apache-2.0 +// + +#include "pass/precision_propagation.hpp" + +#include +#include "ngraph/pass/validate.hpp" +#include "snippets/pass/propagate_precision.hpp" +#include "snippets/op/convert_saturation.hpp" +#include "common_test_utils/common_utils.hpp" +#include "precision_propagation_function.hpp" + +namespace ov { +namespace test { +namespace snippets { + +namespace { + +class DummyPrecisionPropagationTargetMachine : public DummyTargetMachine { +public: + DummyPrecisionPropagationTargetMachine( + const std::set>& op1_supported_precisions, + const std::set>& op2_supported_precisions) + : DummyTargetMachine() { + jitters[DummyAdd::get_type_info_static()] = ngraph::snippets::jitters_value { + [](const std::shared_ptr& n) { return std::make_shared(); }, + [op1_supported_precisions](const std::shared_ptr& n) { return op1_supported_precisions; }}; + jitters[op::v1::Maximum::get_type_info_static()] = ngraph::snippets::jitters_value{ + [](const std::shared_ptr& n) { return std::make_shared(); }, + [op2_supported_precisions](const std::shared_ptr&n) { return op2_supported_precisions; }}; + + auto default_jitter = ngraph::snippets::jitters_value{ + [](const std::shared_ptr& n) { return std::make_shared(); }, + [](const std::shared_ptr& n) { return std::set>{};} }; + jitters[ngraph::snippets::op::ConvertSaturation::get_type_info_static()] = default_jitter; + } +}; + +} // namespace + +std::string PrecisionPropagationTest::getTestCaseName(testing::TestParamInfo obj) { + std::pair shapes; + PrecisionPropagationParamsValues test_values; + std::tie(shapes, test_values) = obj.param; + + auto to_string = [](const std::set>& precisions_pack) noexcept { + std::ostringstream result; + result << "{"; + for (const auto& precisions : precisions_pack) { + result << CommonTestUtils::vec2str(precisions) << "_"; + } + result << "}"; + return result.str(); + }; + + std::ostringstream result; + result << "IN0_" << shapes.first << "_" << test_values.input_types[0] << "_" + << "IN1_" << shapes.second << "_" << test_values.input_types[1] << "_" + << "IN2_" << test_values.input_types[2] + << to_string(test_values.actual.op1_supported_precisions) << "_" + << to_string(test_values.actual.op2_supported_precisions) << "_" + << test_values.expected.convertion_before_op1.first << "_" << test_values.expected.convertion_before_op1.second << "_" + << test_values.expected.convertion_before_op2_1 << "_" + << test_values.expected.convertion_before_op2_2.first << "_" << test_values.expected.convertion_before_op2_2.second << "_" + << test_values.expected.convertion_after_op2 << "_"; + return result.str(); +} + +TEST_P(PrecisionPropagationTest, CompareFunctions) { + disable_rt_info_check(); + + const auto param = GetParam(); + const auto shapes = std::get<0>(param); + const auto test_values = std::get<1>(param); + + const auto input_shapes = std::vector({ shapes.first, shapes.second }); + PrecisionPropagationAddFunction function_stub( + input_shapes, + test_values.input_types[0], + test_values.input_types[1], + test_values.input_types[2], + { + test_values.actual.convertion_before_op1, + test_values.actual.convertion_before_op2_1, + test_values.actual.convertion_before_op2_2 + }, + { + test_values.expected.convertion_before_op1, + test_values.expected.convertion_before_op2_1, + test_values.expected.convertion_before_op2_2, + test_values.expected.convertion_after_op2 + }); + function = function_stub.getOriginal(); + + const auto target_machine = std::make_shared( + test_values.actual.op1_supported_precisions, + test_values.actual.op2_supported_precisions); + + manager.register_pass(target_machine); + + function_ref = function_stub.getReference(); +} + +namespace PrecisionPropagationTestInstantiation { +// clang-format off + +std::vector> shapes { + {{1, 3, 16, 16}, {1, 3, 16, 16}} +}; + +std::vector test_cases { + { + {element::f32, element::f32, element::f32}, + { + {}, + {}, + {}, + {{element::f32, element::f32}}, + {{element::f32, element::f32}} + }, + {} + }, + // in: Parameter I8 => Op1 I32 => Convert I8 => Op1 I8 => Result + // out: Parameter I8 => Add I32 => Convert I8 => Convert FP32 => Op1 FP32 => Result + { + {element::i8, element::i8, element::i8}, + { + {}, + {}, + {}, + {{element::i8, element::i8}}, + {{element::f32, element::f32}} + }, + { + {}, + element::i8, + {element::f32, element::f32}, + {element::i8} + } + }, + { + {element::i8, element::i8, element::i8}, + { + {}, + {}, + {}, + {{element::i8, element::i8}}, + {{element::i8, element::i8}} + }, + { + {}, + {}, + {element::i8, element::undefined}, + {} + } + }, + { + {element::i8, element::i8, element::i8}, + { + {}, + {}, + {}, + {{element::i8, element::i8}}, + {{element::i32, element::i32}} + }, + { + {}, + {element::i8}, + {element::i32, element::i32}, + {element::i8} + } + }, + { + {element::bf16, element::bf16, element::f32}, + { + {element::f32, element::f32}, + {}, + {}, + { + {element::f32, element::f32}, + {element::i8, element::i8} + }, + { + {element::f32, element::f32}, + {element::i32, element::i32} + } + }, + { + {element::f32, element::f32}, + {}, + {}, + {} + } + }, + // propagate precision via operation #1 + { + {element::bf16, element::bf16, element::f32}, + { + {element::f32, element::f32}, + {}, + {}, + { + {element::f32, element::f32}, + {element::bf16, element::bf16} + }, + { + {element::f32, element::f32} + } + }, + { + {}, + {}, + {element::f32, element::undefined}, + {} + } + }, + // propagate precision via operation #1 + { + {element::bf16, element::bf16, element::bf16}, + { + {element::f32, element::f32}, + {}, + {element::undefined, element::f32}, + { + {element::f32, element::f32}, + {element::bf16, element::bf16} + }, + { + {element::f32, element::f32} + } + }, + { + {}, + {}, + {element::f32, element::f32}, + {} + } + }, + // propagate precision via both operations + { + {element::bf16, element::bf16, element::bf16}, + { + {element::f32, element::f32}, + {}, + {element::undefined, element::f32}, + { + {element::f32, element::f32}, + {element::bf16, element::bf16} + }, + { + {element::f32, element::f32}, + {element::bf16, element::bf16} + } + }, + { + {}, + {}, + {}, + {element::f32} + } + }, + { + {element::bf16, element::bf16, element::bf16}, + { + {}, + {}, + {}, + {{element::f32, element::f32}}, + {{element::f32, element::f32}} + }, + { + {{element::f32}, {element::f32}}, + {element::bf16}, + {{element::f32}, {element::f32}}, + {element::bf16} + } + }, +}; + +INSTANTIATE_TEST_SUITE_P( + smoke_Snippets_PrecisionPropagationTest, + PrecisionPropagationTest, + ::testing::Combine( + ::testing::ValuesIn(shapes), + ::testing::ValuesIn(test_cases)), + PrecisionPropagationTest::getTestCaseName); + +// clang-format on +} // namespace PrecisionPropagationTestInstantiation + +} // namespace snippets +} // namespace test +} // namespace ov diff --git a/src/common/snippets/tests/src/pass/precision_propagation_convert_test.cpp b/src/common/snippets/tests/src/pass/precision_propagation_convert_test.cpp new file mode 100644 index 00000000000000..cc6c113cc3f671 --- /dev/null +++ b/src/common/snippets/tests/src/pass/precision_propagation_convert_test.cpp @@ -0,0 +1,153 @@ +// Copyright (C) 2023 Intel Corporation +// SPDX-License-Identifier: Apache-2.0 +// + +#include +#include "snippets/pass/propagate_precision.hpp" + +namespace ov { +namespace test { +namespace snippets { + +class PrecisionPropagationConvertTest : public testing::Test {}; + +TEST_F(PrecisionPropagationConvertTest, smoke_Snippets_PrecisionPropagation_can_be_fused) { + const std::set> precisions_set = { + {element::u64, element::u64}, + {element::u64, element::u32}, + {element::u64, element::u16}, + {element::u64, element::u8}, + {element::u32, element::u32}, + {element::u32, element::u16}, + {element::u32, element::u8}, + {element::u16, element::u16}, + {element::u16, element::u8}, + {element::u8, element::u8}, + + {element::i64, element::i64}, + {element::i64, element::i32}, + {element::i64, element::i16}, + {element::i64, element::i8}, + {element::i32, element::i32}, + {element::i32, element::i16}, + {element::i32, element::i8}, + {element::i16, element::i16}, + {element::i16, element::i8}, + {element::i8, element::i8}, + + {element::f64, element::f64}, + {element::f64, element::f32}, + {element::f64, element::f16}, + {element::f32, element::f32}, + {element::f32, element::f16}, + {element::f16, element::f16}, + + {element::f32, element::bf16}, + {element::bf16, element::bf16}, + {element::f32, element::i8}, + {element::f16, element::i8}, + {element::bf16, element::i8}, + {element::f32, element::u8}, + {element::f16, element::u8}, + {element::bf16, element::u8} + }; + + for (const auto& precisions : precisions_set) { + ASSERT_TRUE(ngraph::snippets::pass::PropagatePrecision::can_be_fused( + precisions.first, + precisions.second)) << precisions.second << " can replace " << precisions.first; + + if (precisions.first == precisions.second) { + continue; + } + + ASSERT_FALSE(ngraph::snippets::pass::PropagatePrecision::can_be_fused( + precisions.second, + precisions.first)) << precisions.second << " can not replace " << precisions.first; + } +} + +TEST_F(PrecisionPropagationConvertTest, smoke_Snippets_PrecisionPropagation_can_not_be_fused) { + const std::set> precisions_set = { + {element::i64, element::f32}, + {element::i64, element::f16}, + {element::i64, element::bf16}, + + {element::i32, element::f32}, + {element::i32, element::f16}, + {element::i32, element::bf16}, + + {element::i16, element::f16}, + {element::i16, element::bf16}, + + {element::u64, element::f32}, + {element::u64, element::f16}, + {element::u64, element::bf16}, + + {element::u32, element::f32}, + {element::u32, element::f16}, + {element::u32, element::bf16}, + + {element::u16, element::f16}, + {element::u16, element::bf16} + }; + + for (const auto& precisions : precisions_set) { + ASSERT_FALSE(ngraph::snippets::pass::PropagatePrecision::can_be_fused( + precisions.first, + precisions.second)) << precisions.second << " can not replace " << precisions.first; + } +} + +TEST_F(PrecisionPropagationConvertTest, smoke_Snippets_PrecisionPropagation_can_be_removed) { + const std::set> precisions_set = { + {element::u64, element::u64, element::u64}, + {element::u32, element::u64, element::u32}, + {element::u16, element::u64, element::u16}, + {element::u8, element::u64, element::u8}, + {element::u32, element::u32, element::u32}, + {element::u16, element::u32, element::u16}, + {element::u8, element::u32, element::u8}, + {element::u16, element::u16, element::u16}, + {element::u8, element::u16, element::u8}, + {element::u8, element::u8, element::u8}, + + {element::i64, element::i64, element::i64}, + {element::i32, element::i64, element::i32}, + {element::i16, element::i64, element::i16}, + {element::i8, element::i64, element::i8}, + {element::i32, element::i32, element::i32}, + {element::i16, element::i32, element::i16}, + {element::i8, element::i32, element::i8}, + {element::i16, element::i16, element::i16}, + {element::i8, element::i16, element::i8}, + {element::i8, element::i8, element::i8}, + + {element::f64, element::f64, element::f64}, + {element::f32, element::f64, element::f32}, + {element::f16, element::f64, element::f16}, + {element::f32, element::f32, element::f32}, + {element::f16, element::f16, element::f16}, + + {element::bf16, element::f32, element::bf16}, + {element::bf16, element::bf16, element::bf16}, + }; + + for (const auto& precisions : precisions_set) { + const auto actual_before = std::get<0>(precisions); + const auto actual_after = std::get<1>(precisions); + const auto required_after = std::get<2>(precisions); + ASSERT_TRUE(ngraph::snippets::pass::PropagatePrecision::can_be_removed( + actual_before, + actual_after, + required_after)) << "can_be_removed: " << actual_before << " => " << actual_after << " => " << required_after; + + if ((actual_before == actual_after) && (actual_before == required_after)) { + continue; + } + } +} + +} // namespace snippets +} // namespace test +} // namespace ov \ No newline at end of file diff --git a/src/common/snippets/tests/src/pass/precision_propagation_get_precisions.cpp b/src/common/snippets/tests/src/pass/precision_propagation_get_precisions.cpp new file mode 100644 index 00000000000000..9e97fcc8ad4aa1 --- /dev/null +++ b/src/common/snippets/tests/src/pass/precision_propagation_get_precisions.cpp @@ -0,0 +1,45 @@ +// Copyright (C) 2023 Intel Corporation +// SPDX-License-Identifier: Apache-2.0 +// + +#include +#include "snippets/pass/propagate_precision.hpp" + +namespace ov { +namespace test { +namespace snippets { + + +class PrecisionPropagationGetPrecisionsTest : public testing::Test {}; + +TEST_F(PrecisionPropagationGetPrecisionsTest, empty) { + ASSERT_EQ(std::vector{}, ngraph::snippets::pass::PropagatePrecision::get_precisions({}, {})); +} + +TEST_F(PrecisionPropagationGetPrecisionsTest, selected) { + ASSERT_EQ( + std::vector({element::f32, element::f32}), + ngraph::snippets::pass::PropagatePrecision::get_precisions( + { element::f32, element::f32 }, + { + {element::bf16, element::bf16}, + {element::f32, element::f32}, + {element::i8, element::i8}, + })); +} + +TEST_F(PrecisionPropagationGetPrecisionsTest, first) { + ASSERT_EQ( + std::vector({ element::bf16, element::bf16 }), + ngraph::snippets::pass::PropagatePrecision::get_precisions( + { element::i32, element::i32 }, + { + {element::bf16, element::bf16}, + {element::f32, element::f32}, + {element::i8, element::i8}, + })); +} + +} // namespace snippets +} // namespace test +} // namespace ov diff --git a/src/common/transformations/include/ov_ops/nms_static_shape_ie.hpp b/src/common/transformations/include/ov_ops/nms_static_shape_ie.hpp index 0c4f38628dce1b..5da183db15a240 100644 --- a/src/common/transformations/include/ov_ops/nms_static_shape_ie.hpp +++ b/src/common/transformations/include/ov_ops/nms_static_shape_ie.hpp @@ -29,14 +29,9 @@ namespace internal { template class NmsStaticShapeIE : public BaseNmsOp { public: - OPENVINO_SUPPRESS_DEPRECATED_START // TODO: it should be std::string("NmsStaticShapeIE_") + BaseNmsOp::get_type_info_static().name, // but currently it does not pass conversion to Legacy Opset correctly - OPENVINO_RTTI(BaseNmsOp::get_type_info_static().name, - "ie_internal_opset", - BaseNmsOp, - BaseNmsOp::get_type_info_static().version); - OPENVINO_SUPPRESS_DEPRECATED_END + OPENVINO_RTTI(BaseNmsOp::get_type_info_static().name, "ie_internal_opset", BaseNmsOp); NmsStaticShapeIE() = default; diff --git a/src/common/transformations/include/ov_ops/type_relaxed.hpp b/src/common/transformations/include/ov_ops/type_relaxed.hpp index 47a791a4b0be42..fa84ab4eb7c945 100644 --- a/src/common/transformations/include/ov_ops/type_relaxed.hpp +++ b/src/common/transformations/include/ov_ops/type_relaxed.hpp @@ -189,10 +189,7 @@ OPENVINO_SUPPRESS_DEPRECATED_START template class TypeRelaxed : public BaseOp, public TypeRelaxedBase { public: - OPENVINO_OP(BaseOp::get_type_info_static().name, - BaseOp::get_type_info_static().version_id, - BaseOp, - BaseOp::get_type_info_static().version); + OPENVINO_OP(BaseOp::get_type_info_static().name, BaseOp::get_type_info_static().version_id, BaseOp); using BaseOp::BaseOp; diff --git a/src/common/transformations/include/transformations/op_conversions/convert_interpolate11_downgrade.hpp b/src/common/transformations/include/transformations/op_conversions/convert_interpolate11_downgrade.hpp new file mode 100644 index 00000000000000..b112c5d8abdf45 --- /dev/null +++ b/src/common/transformations/include/transformations/op_conversions/convert_interpolate11_downgrade.hpp @@ -0,0 +1,24 @@ +// Copyright (C) 2018-2023 Intel Corporation +// SPDX-License-Identifier: Apache-2.0 +// + +#pragma once + +#include +#include + +namespace ov { +namespace pass { +/** + * @ingroup ie_transformation_common_api + * @brief Converts Interpolate version 11 to Interpolate version 4 if the new op uses any of the v4 allowed + * interpolation modes. + */ +class TRANSFORMATIONS_API ConvertInterpolate11ToInterpolate4 : public MatcherPass { +public: + OPENVINO_RTTI("ConvertInterpolate11ToInterpolate4", "0"); + ConvertInterpolate11ToInterpolate4(); +}; + +} // namespace pass +} // namespace ov diff --git a/src/common/transformations/src/transformations/common_optimizations/common_optimizations.cpp b/src/common/transformations/src/transformations/common_optimizations/common_optimizations.cpp index 8b43dcfc8d2b29..6064effe880c4b 100644 --- a/src/common/transformations/src/transformations/common_optimizations/common_optimizations.cpp +++ b/src/common/transformations/src/transformations/common_optimizations/common_optimizations.cpp @@ -76,6 +76,7 @@ #include "transformations/op_conversions/convert_gather_downgrade.hpp" #include "transformations/op_conversions/convert_gather_upgrade.hpp" #include "transformations/op_conversions/convert_gelu.hpp" +#include "transformations/op_conversions/convert_interpolate11_downgrade.hpp" #include "transformations/op_conversions/convert_interpolate1_to_interpolate4.hpp" #include "transformations/op_conversions/convert_maxpool_downgrade.hpp" #include "transformations/op_conversions/convert_maxpool_upgrade.hpp" @@ -211,6 +212,7 @@ bool ov::pass::CommonOptimizations::run_on_model(const std::shared_ptr(); ADD_MATCHER(fq_fusions, FakeQuantizeMulFusion) diff --git a/src/common/transformations/src/transformations/flush_fp32_subnormals_to_zero.cpp b/src/common/transformations/src/transformations/flush_fp32_subnormals_to_zero.cpp index f01c60b1c0389b..ca03c288092260 100644 --- a/src/common/transformations/src/transformations/flush_fp32_subnormals_to_zero.cpp +++ b/src/common/transformations/src/transformations/flush_fp32_subnormals_to_zero.cpp @@ -36,14 +36,28 @@ ov::pass::FlushFP32SubnormalsToZero::FlushFP32SubnormalsToZero() { bool has_subnormals = false; for (size_t i = 0; i < size; ++i) { if (fpclassify(std::abs(data[i])) == FP_SUBNORMAL) { - data[i] = 0.0f; has_subnormals = true; + break; } } - if (has_subnormals) - return true; + if (!has_subnormals) + return false; + + auto new_constant = std::make_shared(ov::element::f32, node->get_shape()); + auto* dst_data = const_cast(new_constant->get_data_ptr()); + + for (size_t i = 0; i < size; ++i) { + if (fpclassify(std::abs(data[i])) != FP_SUBNORMAL) + dst_data[i] = data[i]; + else + dst_data[i] = 0.0f; + } + + new_constant->set_friendly_name(node->get_friendly_name()); + ov::copy_runtime_info(node, new_constant); + ov::replace_node(node, new_constant); - return false; + return true; }; auto m = make_shared(node_pattern, matcher_name); diff --git a/src/common/transformations/src/transformations/op_conversions/convert_interpolate11_downgrade.cpp b/src/common/transformations/src/transformations/op_conversions/convert_interpolate11_downgrade.cpp new file mode 100644 index 00000000000000..c9b2e15dd4cfaf --- /dev/null +++ b/src/common/transformations/src/transformations/op_conversions/convert_interpolate11_downgrade.cpp @@ -0,0 +1,75 @@ +// Copyright (C) 2018-2023 Intel Corporation +// SPDX-License-Identifier: Apache-2.0 +// + +#include "transformations/op_conversions/convert_interpolate11_downgrade.hpp" + +#include +#include +#include +#include +#include + +#include "itt.hpp" + +ov::pass::ConvertInterpolate11ToInterpolate4::ConvertInterpolate11ToInterpolate4() { + MATCHER_SCOPE(ConvertInterpolate11ToInterpolate4); + + const auto interpolate_v11_pattern = pattern::wrap_type(); + + const matcher_pass_callback callback = [=](pattern::Matcher& m) { + const auto v4_compatible_interpolation_mode = [](const op::util::InterpolateBase::InterpolateMode mode) { + constexpr std::array allowed_modes = { + op::util::InterpolateBase::InterpolateMode::NEAREST, + op::util::InterpolateBase::InterpolateMode::LINEAR, + op::util::InterpolateBase::InterpolateMode::LINEAR_ONNX, + op::util::InterpolateBase::InterpolateMode::CUBIC}; + + return std::find(std::begin(allowed_modes), std::end(allowed_modes), mode) != std::end(allowed_modes); + }; + + const auto interpolate_v11 = std::dynamic_pointer_cast(m.get_match_root()); + if (!interpolate_v11 || !v4_compatible_interpolation_mode(interpolate_v11->get_attrs().mode) || + transformation_callback(interpolate_v11)) { + return false; + } + + // downgrade only if the interpolation mode used to create v11 is supported by v4 + std::shared_ptr interpolate_v4; + ov::Output v4_input_output_shape; + ov::Output v4_input_scales; + + if (interpolate_v11->get_attrs().shape_calculation_mode == + ov::op::util::InterpolateBase::ShapeCalcMode::SCALES) { + v4_input_scales = interpolate_v11->input_value(1); + v4_input_output_shape = opset4::Constant::create(element::i32, Shape{1}, {1}); + copy_runtime_info(interpolate_v11, v4_input_output_shape.get_node_shared_ptr()); + } else { + v4_input_output_shape = interpolate_v11->input_value(1); + v4_input_scales = opset4::Constant::create(element::f32, Shape{1}, {1.0f}); + copy_runtime_info(interpolate_v11, v4_input_scales.get_node_shared_ptr()); + } + + if (interpolate_v11->get_input_size() == 3) { // with axes input + interpolate_v4 = std::make_shared(interpolate_v11->input_value(0), + v4_input_output_shape, + v4_input_scales, + interpolate_v11->input_value(2), + interpolate_v11->get_attrs()); + } else { + interpolate_v4 = std::make_shared(interpolate_v11->input_value(0), + v4_input_output_shape, + v4_input_scales, + interpolate_v11->get_attrs()); + } + + interpolate_v4->set_friendly_name(interpolate_v11->get_friendly_name()); + copy_runtime_info(interpolate_v11, interpolate_v4); + replace_node(interpolate_v11, interpolate_v4); + + return true; + }; + + auto m = std::make_shared(interpolate_v11_pattern, matcher_name); + register_matcher(m, callback); +} diff --git a/src/common/transformations/tests/op_conversions/convert_interpolate11_downgrade_test.cpp b/src/common/transformations/tests/op_conversions/convert_interpolate11_downgrade_test.cpp new file mode 100644 index 00000000000000..7504cd378ebba6 --- /dev/null +++ b/src/common/transformations/tests/op_conversions/convert_interpolate11_downgrade_test.cpp @@ -0,0 +1,147 @@ +// Copyright (C) 2018-2023 Intel Corporation +// SPDX-License-Identifier: Apache-2.0 +// + +#include + +#include +#include +#include +#include +#include +#include + +#include "common_test_utils/ngraph_test_utils.hpp" + +using namespace testing; + +namespace { +constexpr bool WITH_AXES = true; +constexpr bool WITHOUT_AXES = false; + +std::shared_ptr create_v11_model(const bool with_axes, + const ov::opset11::Interpolate::ShapeCalcMode shape_calc_mode) { + auto attributes = ov::opset11::Interpolate::InterpolateAttrs{}; + attributes.shape_calculation_mode = shape_calc_mode; + attributes.pads_begin = {0, 0}; + attributes.pads_end = {0, 0}; + + const auto input = std::make_shared(ov::element::i32, ov::Shape{1, 2, 10, 10}); + std::shared_ptr scales_or_sizes; + std::shared_ptr interpolate; + + const size_t num_scales_or_sizes = with_axes ? 2 : 4; + if (shape_calc_mode == ov::opset11::Interpolate::ShapeCalcMode::SCALES) { + scales_or_sizes = std::make_shared(ov::element::f32, ov::Shape{num_scales_or_sizes}); + } else { + scales_or_sizes = std::make_shared(ov::element::i32, ov::Shape{num_scales_or_sizes}); + } + + ov::ParameterVector model_params; + model_params.push_back(input); + model_params.push_back(scales_or_sizes); + if (with_axes) { + const auto axes = std::make_shared(ov::element::i32, ov::Shape{2}); + model_params.push_back(axes); + interpolate = std::make_shared(input, scales_or_sizes, axes, attributes); + } else { + interpolate = std::make_shared(input, scales_or_sizes, attributes); + } + interpolate->set_friendly_name("interpolate11"); + + return std::make_shared(interpolate->outputs(), model_params); +} + +std::shared_ptr create_v4_model(const bool with_axes, + const ov::opset4::Interpolate::ShapeCalcMode shape_calc_mode) { + auto attributes = ov::opset4::Interpolate::InterpolateAttrs{}; + attributes.shape_calculation_mode = shape_calc_mode; + attributes.pads_begin = {0, 0}; + attributes.pads_end = {0, 0}; + + const auto input = std::make_shared(ov::element::i32, ov::Shape{1, 2, 10, 10}); + std::shared_ptr output_shape; + std::shared_ptr scales; + std::shared_ptr interpolate; + + ov::ParameterVector model_params; + model_params.push_back(input); + + const size_t num_scales_or_sizes = with_axes ? 2 : 4; + if (shape_calc_mode == ov::opset4::Interpolate::ShapeCalcMode::SCALES) { + scales = std::make_shared(ov::element::f32, ov::Shape{num_scales_or_sizes}); + model_params.push_back(std::dynamic_pointer_cast(scales)); + output_shape = ov::opset4::Constant::create(ov::element::i32, ov::Shape{1}, {1}); + + } else { + output_shape = std::make_shared(ov::element::i32, ov::Shape{num_scales_or_sizes}); + model_params.push_back(std::dynamic_pointer_cast(output_shape)); + scales = ov::opset4::Constant::create(ov::element::f32, ov::Shape{1}, {1.0f}); + } + + if (with_axes) { + const auto axes = std::make_shared(ov::element::i32, ov::Shape{2}); + model_params.push_back(axes); + interpolate = std::make_shared(input, output_shape, scales, axes, attributes); + } else { + interpolate = std::make_shared(input, output_shape, scales, attributes); + } + interpolate->set_friendly_name("interpolate11"); + + return std::make_shared(interpolate->outputs(), model_params); +} + +} // namespace + +TEST_F(TransformationTestsF, ConvertInterpolate11ToInterpolate4_scales) { + manager.register_pass(); + function = create_v11_model(WITH_AXES, ov::opset11::Interpolate::ShapeCalcMode::SCALES); + function_ref = create_v4_model(WITH_AXES, ov::opset4::Interpolate::ShapeCalcMode::SCALES); +} + +TEST_F(TransformationTestsF, ConvertInterpolate11ToInterpolate4_sizes) { + manager.register_pass(); + function = create_v11_model(WITH_AXES, ov::opset11::Interpolate::ShapeCalcMode::SIZES); + function_ref = create_v4_model(WITH_AXES, ov::opset4::Interpolate::ShapeCalcMode::SIZES); +} + +TEST_F(TransformationTestsF, ConvertInterpolate11ToInterpolate4_scales_no_axes) { + manager.register_pass(); + function = create_v11_model(WITHOUT_AXES, ov::opset11::Interpolate::ShapeCalcMode::SCALES); + function_ref = create_v4_model(WITHOUT_AXES, ov::opset4::Interpolate::ShapeCalcMode::SCALES); +} + +TEST_F(TransformationTestsF, ConvertInterpolate11ToInterpolate4_sizes_no_axes) { + manager.register_pass(); + function = create_v11_model(WITHOUT_AXES, ov::opset11::Interpolate::ShapeCalcMode::SIZES); + function_ref = create_v4_model(WITHOUT_AXES, ov::opset4::Interpolate::ShapeCalcMode::SIZES); +} + +namespace { +std::shared_ptr create_non_downgradeable_model(const ov::opset11::Interpolate::InterpolateMode mode) { + auto attributes = ov::opset11::Interpolate::InterpolateAttrs{}; + attributes.mode = mode; + attributes.shape_calculation_mode = ov::opset11::Interpolate::ShapeCalcMode::SCALES; + attributes.pads_begin = {0, 0}; + attributes.pads_end = {0, 0}; + + const auto input = std::make_shared(ov::element::i32, ov::Shape{1, 2, 10, 10}); + const auto scales = std::make_shared(ov::element::f32, ov::Shape{2}); + const auto axes = std::make_shared(ov::element::i32, ov::Shape{2}); + + const auto interpolate = std::make_shared(input, scales, axes, attributes); + interpolate->set_friendly_name("interpolate11"); + + return std::make_shared(interpolate->outputs(), ov::ParameterVector{input, scales, axes}); +} +} // namespace + +TEST_F(TransformationTestsF, ConvertInterpolate11ToInterpolate4_bicubic_pillow) { + function = create_non_downgradeable_model(ov::opset11::Interpolate::InterpolateMode::BICUBIC_PILLOW); + manager.register_pass(); +} + +TEST_F(TransformationTestsF, ConvertInterpolate11ToInterpolate4_bilinear_pillow) { + function = create_non_downgradeable_model(ov::opset11::Interpolate::InterpolateMode::BILINEAR_PILLOW); + manager.register_pass(); +} diff --git a/src/common/transformations/tests/utils/compare_functions_test.cpp b/src/common/transformations/tests/utils/compare_functions_test.cpp index f27b30f18b933b..c89eddb97a4611 100644 --- a/src/common/transformations/tests/utils/compare_functions_test.cpp +++ b/src/common/transformations/tests/utils/compare_functions_test.cpp @@ -201,7 +201,7 @@ TEST(TransformationTests, CompareFunctoinsTINegative) { const auto fc = FunctionsComparator::with_default().enable(FunctionsComparator::ATTRIBUTES); auto res = fc(f, f_ref); EXPECT_FALSE(res.valid); - EXPECT_THAT(res.message, HasSubstr("LSTMCell/4 != Relu/0")); + EXPECT_THAT(res.message, HasSubstr("LSTMCell/opset4 != Relu/opset1")); } TEST(TransformationTests, CompareFunctoinsTINegativeDifferentElementTypeBetweenSubGraphsInputs) { @@ -514,7 +514,7 @@ class DummyConstant : public ngraph::op::Op { DummyConstant& operator=(const DummyConstant&) = delete; const NodeTypeInfo& get_type_info() const override { - static const NodeTypeInfo type_info{typeid(this).name(), static_cast(0)}; + static const NodeTypeInfo type_info{typeid(this).name(), "0"}; return type_info; } diff --git a/src/common/util/CMakeLists.txt b/src/common/util/CMakeLists.txt index a589c283390761..4b538aab7c7461 100644 --- a/src/common/util/CMakeLists.txt +++ b/src/common/util/CMakeLists.txt @@ -24,6 +24,10 @@ endif() # Create named folders for the sources within the .vcproj # Empty name lists them directly under the .vcproj +set_source_files_properties( + "${CMAKE_CURRENT_SOURCE_DIR}/src/file_util.cpp" + PROPERTIES COMPILE_DEFINITIONS OpenVINO_VERSION="${OpenVINO_VERSION}") + source_group("src" FILES ${LIBRARY_SRC}) source_group("include" FILES ${PUBLIC_HEADERS}) diff --git a/src/common/util/include/openvino/util/file_util.hpp b/src/common/util/include/openvino/util/file_util.hpp index 00d8dbe073cd61..ccf8ed4e46c6e0 100644 --- a/src/common/util/include/openvino/util/file_util.hpp +++ b/src/common/util/include/openvino/util/file_util.hpp @@ -260,6 +260,14 @@ inline std::basic_string make_plugin_library_name(const std::basic_string& */ FilePath get_plugin_path(const std::string& plugin); +/** + * @brief Find the plugins which are located together with OV library + * @param plugin - Path (absolute or relative) or name of a plugin. Depending on platform, `plugin` is wrapped with + * shared library suffix and prefix to identify library full name + * @return absolute path or file name with extension (to be found in ENV) + */ +FilePath get_compiled_plugin_path(const std::string& plugin); + /** * @brief Format plugin path (canonicalize, complete to absolute or complete to file name) for further * dynamic loading by OS diff --git a/src/common/util/src/file_util.cpp b/src/common/util/src/file_util.cpp index bcbd3fe2f906c7..0de2ab70d377b4 100644 --- a/src/common/util/src/file_util.cpp +++ b/src/common/util/src/file_util.cpp @@ -504,6 +504,37 @@ ov::util::FilePath ov::util::get_plugin_path(const std::string& plugin) { return ov::util::to_file_path(lib_name); } +ov::util::FilePath ov::util::get_compiled_plugin_path(const std::string& plugin) { + const auto ov_library_path = get_ov_lib_path(); + + // plugin can be found either: + + // 1. in openvino-X.Y.Z folder relative to libopenvino.so + std::ostringstream str; + str << "openvino-" << OpenVINO_VERSION; + const auto sub_folder = str.str(); + + std::string abs_file_path = ov::util::path_join({ov_library_path, sub_folder, plugin}); + if (ov::util::file_exists(abs_file_path)) + return ov::util::to_file_path(abs_file_path); + + // 2. in the openvino.so location + abs_file_path = ov::util::path_join({ov_library_path, plugin}); + if (ov::util::file_exists(abs_file_path)) + return ov::util::to_file_path(abs_file_path); + + auto lib_name = plugin; + // For 3rd case - convert to 4th case + if (!ov::util::ends_with(plugin, ov::util::FileTraits::library_ext())) + lib_name = ov::util::make_plugin_library_name({}, plugin); + + // For 4th case + auto lib_path = ov::util::to_file_path(ov::util::get_absolute_file_path(lib_name)); + if (ov::util::file_exists(lib_path)) + return lib_path; + return ov::util::to_file_path(lib_name); +} + ov::util::FilePath ov::util::get_plugin_path(const std::string& plugin, const std::string& xml_path, bool as_abs_only) { // Assume `plugin` (from XML "location" record) contains only: // 1. /path/to/libexample.so absolute path diff --git a/src/core/dev_api/openvino/runtime/itensor.hpp b/src/core/dev_api/openvino/runtime/itensor.hpp index 932b7d41627f2c..c66c2a15197f2e 100644 --- a/src/core/dev_api/openvino/runtime/itensor.hpp +++ b/src/core/dev_api/openvino/runtime/itensor.hpp @@ -26,12 +26,12 @@ class OPENVINO_API ITensor : public std::enable_shared_from_this { /** * @return A tensor element type */ - virtual const element::Type& get_element_type() const = 0; + virtual const ov::element::Type& get_element_type() const = 0; /** * @return A tensor shape */ - virtual const Shape& get_shape() const = 0; + virtual const ov::Shape& get_shape() const = 0; /** * @brief Returns the total number of elements (a product of all the dims or 1 for scalar) @@ -48,7 +48,7 @@ class OPENVINO_API ITensor : public std::enable_shared_from_this { /** * @return Tensor's strides in bytes */ - virtual const Strides& get_strides() const = 0; + virtual const ov::Strides& get_strides() const = 0; /** * @brief Provides an access to the underlaying host memory diff --git a/src/core/dev_api/tensor_conversion_util.hpp b/src/core/dev_api/tensor_conversion_util.hpp index 2bc1a6ce551d6b..47a906ff9f0235 100644 --- a/src/core/dev_api/tensor_conversion_util.hpp +++ b/src/core/dev_api/tensor_conversion_util.hpp @@ -39,7 +39,7 @@ OPENVINO_DEPRECATED("This function is deprecated and will be removed soon.") OPENVINO_API TensorVector wrap_tensors(const std::vector& tensors); /** - * @brief Update output host tensors if they got dynamic shapee before evaluation (not allocated). + * @brief Update output host tensors if they got dynamic shape before evaluation (not allocated). * * Other tensor not requires update as they are created from outputs and points to same data blob. * diff --git a/src/core/include/ngraph/node.hpp b/src/core/include/ngraph/node.hpp index 776dcb6bebc653..76090d3cca32b1 100644 --- a/src/core/include/ngraph/node.hpp +++ b/src/core/include/ngraph/node.hpp @@ -150,21 +150,19 @@ using ov::check_new_args_count; } #endif -#define _NGRAPH_RTTI_DEFINITION_WITH_PARENT(CLASS, TYPE_NAME, _VERSION_INDEX, PARENT_CLASS) \ - const ::ngraph::Node::type_info_t& CLASS::get_type_info_static() { \ - static const ::ngraph::Node::type_info_t type_info_static{TYPE_NAME, \ - static_cast(_VERSION_INDEX), \ - &PARENT_CLASS::get_type_info_static()}; \ - return type_info_static; \ - } \ - _NGRAPH_RTTI_DEFINITION_COMMON(CLASS) - -#define _NGRAPH_RTTI_DEFINITION_NO_PARENT(CLASS, TYPE_NAME, _VERSION_INDEX) \ +#define _NGRAPH_RTTI_DEFINITION_WITH_PARENT(CLASS, TYPE_NAME, PARENT_CLASS) \ const ::ngraph::Node::type_info_t& CLASS::get_type_info_static() { \ - static const ::ngraph::Node::type_info_t type_info_static{TYPE_NAME, static_cast(_VERSION_INDEX)}; \ + static const ::ngraph::Node::type_info_t type_info_static{TYPE_NAME, &PARENT_CLASS::get_type_info_static()}; \ return type_info_static; \ } \ _NGRAPH_RTTI_DEFINITION_COMMON(CLASS) + +#define _NGRAPH_RTTI_DEFINITION_NO_PARENT(CLASS, TYPE_NAME) \ + const ::ngraph::Node::type_info_t& CLASS::get_type_info_static() { \ + static const ::ngraph::Node::type_info_t type_info_static{TYPE_NAME}; \ + return type_info_static; \ + } \ + _NGRAPH_RTTI_DEFINITION_COMMON(CLASS) #define NGRAPH_RTTI_DEFINITION(...) \ _OPENVINO_RTTI_EXPAND(_OPENVINO_RTTI_DEFINITION_SELECTOR(__VA_ARGS__, \ _NGRAPH_RTTI_DEFINITION_WITH_PARENT, \ diff --git a/src/core/include/openvino/core/any.hpp b/src/core/include/openvino/core/any.hpp index 805fd808516d0e..0e070f3a0e7c71 100644 --- a/src/core/include/openvino/core/any.hpp +++ b/src/core/include/openvino/core/any.hpp @@ -28,6 +28,9 @@ namespace ov { class Plugin; /** @cond INTERNAL */ class Any; + +using AnyMap = std::map; + namespace util { OPENVINO_API bool equal(std::type_index lhs, std::type_index rhs); @@ -126,6 +129,11 @@ struct OPENVINO_API Read> { void operator()(std::istream& is, std::tuple& tuple) const; }; +template <> +struct OPENVINO_API Read { + void operator()(std::istream& is, AnyMap& map) const; +}; + template auto from_string(const std::string& str) -> const typename std::enable_if::value, T>::type& { @@ -210,14 +218,36 @@ struct Read< std::map, typename std::enable_if::value && std::is_default_constructible::value>::type> { void operator()(std::istream& is, std::map& map) const { - while (is.good()) { - std::string str; - is >> str; - auto k = from_string(str); - is >> str; - auto v = from_string(str); - map.emplace(std::move(k), std::move(v)); + char c; + + is >> c; + OPENVINO_ASSERT(c == '{', "Failed to parse std::map. Starting symbols is not '{', it's ", c); + + while (c != '}') { + std::string key, value; + std::getline(is, key, ':'); + size_t enclosed_container_level = 0; + + while (is.good()) { + is >> c; + if (c == ',') { // delimiter between map's pairs + if (enclosed_container_level == 0) // we should interrupt after delimiter + break; + } + if (c == '{' || c == '[') // case of enclosed maps / arrays + ++enclosed_container_level; + if (c == '}' || c == ']') { + if (enclosed_container_level == 0) + break; // end of map + --enclosed_container_level; + } + + value += c; // accumulate current value + } + map.emplace(from_string(key), from_string(value)); } + + OPENVINO_ASSERT(c == '}', "Failed to parse std::map. Ending symbols is not '}', it's ", c); } }; @@ -322,14 +352,14 @@ struct Write> { void operator()(std::ostream& os, const std::map& map) const { if (!map.empty()) { std::size_t i = 0; + os << '{'; for (auto&& v : map) { - os << to_string(v.first); - os << ' '; - os << to_string(v.second); + os << to_string(v.first) << ':' << to_string(v.second); if (i < (map.size() - 1)) - os << ' '; + os << ','; ++i; } + os << '}'; } } }; @@ -914,8 +944,6 @@ class OPENVINO_API Any { const void* addressof() const; }; -using AnyMap = std::map; - using RTMap = AnyMap; using AnyVector = std::vector; diff --git a/src/core/include/openvino/core/model.hpp b/src/core/include/openvino/core/model.hpp index 159a2c57533a77..e5d0158e538ff4 100644 --- a/src/core/include/openvino/core/model.hpp +++ b/src/core/include/openvino/core/model.hpp @@ -47,7 +47,7 @@ class OPENVINO_API Model : public std::enable_shared_from_this { public: _OPENVINO_HIDDEN_METHOD static const ::ov::DiscreteTypeInfo& get_type_info_static() { - static const ::ov::DiscreteTypeInfo type_info_static{"Model", static_cast(0)}; + static const ::ov::DiscreteTypeInfo type_info_static{"Model"}; return type_info_static; } const ::ov::DiscreteTypeInfo& get_type_info() const { diff --git a/src/core/include/openvino/core/node.hpp b/src/core/include/openvino/core/node.hpp index 549fca57e2c73a..c2bb3fef23404f 100644 --- a/src/core/include/openvino/core/node.hpp +++ b/src/core/include/openvino/core/node.hpp @@ -409,14 +409,6 @@ class OPENVINO_API Node : public std::enable_shared_from_this { /// Get all the nodes that uses the current node NodeVector get_users(bool check_is_used = false) const; - /// \return Version of this node - OPENVINO_DEPRECATED("This method is deprecated and will be removed soon.") - virtual size_t get_version() const { - OPENVINO_SUPPRESS_DEPRECATED_START - return get_type_info().version; - OPENVINO_SUPPRESS_DEPRECATED_END - } - /// Use instance ids for comparison instead of memory addresses to improve determinism bool operator<(const Node& other) const { return m_instance_id < other.m_instance_id; diff --git a/src/core/include/openvino/core/rtti.hpp b/src/core/include/openvino/core/rtti.hpp index 505d6e687eb29b..06b541204d88c3 100644 --- a/src/core/include/openvino/core/rtti.hpp +++ b/src/core/include/openvino/core/rtti.hpp @@ -7,14 +7,14 @@ #include "openvino/core/type.hpp" #include "openvino/core/visibility.hpp" -#define _OPENVINO_RTTI_EXPAND(X) X -#define _OPENVINO_RTTI_DEFINITION_SELECTOR(_1, _2, _3, _4, NAME, ...) NAME +#define _OPENVINO_RTTI_EXPAND(X) X +#define _OPENVINO_RTTI_DEFINITION_SELECTOR(_1, _2, _3, NAME, ...) NAME #define _OPENVINO_RTTI_WITH_TYPE(TYPE_NAME) _OPENVINO_RTTI_WITH_TYPE_VERSION(TYPE_NAME, "util") #define _OPENVINO_RTTI_WITH_TYPE_VERSION(TYPE_NAME, VERSION_NAME) \ _OPENVINO_HIDDEN_METHOD static const ::ov::DiscreteTypeInfo& get_type_info_static() { \ - static ::ov::DiscreteTypeInfo type_info_static{TYPE_NAME, 0, VERSION_NAME}; \ + static ::ov::DiscreteTypeInfo type_info_static{TYPE_NAME, VERSION_NAME}; \ type_info_static.hash(); \ return type_info_static; \ } \ @@ -23,19 +23,18 @@ } #define _OPENVINO_RTTI_WITH_TYPE_VERSION_PARENT(TYPE_NAME, VERSION_NAME, PARENT_CLASS) \ - _OPENVINO_RTTI_WITH_TYPE_VERSIONS_PARENT(TYPE_NAME, VERSION_NAME, PARENT_CLASS, 0) + _OPENVINO_RTTI_WITH_TYPE_VERSIONS_PARENT(TYPE_NAME, VERSION_NAME, PARENT_CLASS) -#define _OPENVINO_RTTI_WITH_TYPE_VERSIONS_PARENT(TYPE_NAME, VERSION_NAME, PARENT_CLASS, OLD_VERSION) \ - _OPENVINO_HIDDEN_METHOD static const ::ov::DiscreteTypeInfo& get_type_info_static() { \ - static ::ov::DiscreteTypeInfo type_info_static{TYPE_NAME, \ - OLD_VERSION, \ - VERSION_NAME, \ - &PARENT_CLASS::get_type_info_static()}; \ - type_info_static.hash(); \ - return type_info_static; \ - } \ - const ::ov::DiscreteTypeInfo& get_type_info() const override { \ - return get_type_info_static(); \ +#define _OPENVINO_RTTI_WITH_TYPE_VERSIONS_PARENT(TYPE_NAME, VERSION_NAME, PARENT_CLASS) \ + _OPENVINO_HIDDEN_METHOD static const ::ov::DiscreteTypeInfo& get_type_info_static() { \ + static ::ov::DiscreteTypeInfo type_info_static{TYPE_NAME, \ + VERSION_NAME, \ + &PARENT_CLASS::get_type_info_static()}; \ + type_info_static.hash(); \ + return type_info_static; \ + } \ + const ::ov::DiscreteTypeInfo& get_type_info() const override { \ + return get_type_info_static(); \ } /// Helper macro that puts necessary declarations of RTTI block inside a class definition. @@ -92,11 +91,10 @@ /// OPENVINO_RTTI(name, version_id) /// OPENVINO_RTTI(name, version_id, parent) /// OPENVINO_RTTI(name, version_id, parent, old_version) -#define OPENVINO_RTTI(...) \ - _OPENVINO_RTTI_EXPAND(_OPENVINO_RTTI_DEFINITION_SELECTOR(__VA_ARGS__, \ - _OPENVINO_RTTI_WITH_TYPE_VERSIONS_PARENT, \ - _OPENVINO_RTTI_WITH_TYPE_VERSION_PARENT, \ - _OPENVINO_RTTI_WITH_TYPE_VERSION, \ +#define OPENVINO_RTTI(...) \ + _OPENVINO_RTTI_EXPAND(_OPENVINO_RTTI_DEFINITION_SELECTOR(__VA_ARGS__, \ + _OPENVINO_RTTI_WITH_TYPE_VERSION_PARENT, \ + _OPENVINO_RTTI_WITH_TYPE_VERSION, \ _OPENVINO_RTTI_WITH_TYPE)(__VA_ARGS__)) /// Note: Please don't use this macros for new operations diff --git a/src/core/include/openvino/core/runtime_attribute.hpp b/src/core/include/openvino/core/runtime_attribute.hpp index 4aca1b00330fdb..5502c3eb8ff280 100644 --- a/src/core/include/openvino/core/runtime_attribute.hpp +++ b/src/core/include/openvino/core/runtime_attribute.hpp @@ -20,7 +20,7 @@ class Any; class OPENVINO_API RuntimeAttribute { public: _OPENVINO_HIDDEN_METHOD static const DiscreteTypeInfo& get_type_info_static() { - static const ::ov::DiscreteTypeInfo type_info_static{"RuntimeAttribute", static_cast(0)}; + static const ::ov::DiscreteTypeInfo type_info_static{"RuntimeAttribute"}; return type_info_static; } virtual const DiscreteTypeInfo& get_type_info() const { diff --git a/src/core/include/openvino/core/type.hpp b/src/core/include/openvino/core/type.hpp index 6fa3e3d56f6285..6ceaa39cbe08e0 100644 --- a/src/core/include/openvino/core/type.hpp +++ b/src/core/include/openvino/core/type.hpp @@ -30,14 +30,11 @@ namespace ov { */ struct OPENVINO_API DiscreteTypeInfo { const char* name; - OPENVINO_DEPRECATED("This member was deprecated. Please use version_id instead.") - uint64_t version; const char* version_id; // A pointer to a parent type info; used for casting and inheritance traversal, not for // exact type identification const DiscreteTypeInfo* parent; - OPENVINO_SUPPRESS_DEPRECATED_START DiscreteTypeInfo() = default; DiscreteTypeInfo(const DiscreteTypeInfo&) = default; DiscreteTypeInfo(DiscreteTypeInfo&&) = default; @@ -47,29 +44,16 @@ struct OPENVINO_API DiscreteTypeInfo { const char* _version_id, const DiscreteTypeInfo* _parent = nullptr) : name(_name), - version(0), version_id(_version_id), parent(_parent), hash_value(0) {} - constexpr DiscreteTypeInfo(const char* _name, uint64_t _version, const DiscreteTypeInfo* _parent = nullptr) + constexpr DiscreteTypeInfo(const char* _name, const DiscreteTypeInfo* _parent = nullptr) : name(_name), - version(_version), version_id(nullptr), parent(_parent), hash_value(0) {} - constexpr DiscreteTypeInfo(const char* _name, - uint64_t _version, - const char* _version_id, - const DiscreteTypeInfo* _parent = nullptr) - : name(_name), - version(_version), - version_id(_version_id), - parent(_parent), - hash_value(0) {} - OPENVINO_SUPPRESS_DEPRECATED_END - bool is_castable(const DiscreteTypeInfo& target_type) const; std::string get_version() const; diff --git a/src/core/include/openvino/op/acosh.hpp b/src/core/include/openvino/op/acosh.hpp index 97224708df1f50..59a2dd45e4b06c 100644 --- a/src/core/include/openvino/op/acosh.hpp +++ b/src/core/include/openvino/op/acosh.hpp @@ -16,7 +16,7 @@ namespace v3 { /// \ingroup ov_ops_cpp_api class OPENVINO_API Acosh : public util::UnaryElementwiseArithmetic { public: - OPENVINO_OP("Acosh", "opset4", util::UnaryElementwiseArithmetic, 3); + OPENVINO_OP("Acosh", "opset4", util::UnaryElementwiseArithmetic); /// \brief Constructs an Acosh operation. Acosh() = default; diff --git a/src/core/include/openvino/op/add.hpp b/src/core/include/openvino/op/add.hpp index 0e2a812a9dc938..054ec3302d846c 100644 --- a/src/core/include/openvino/op/add.hpp +++ b/src/core/include/openvino/op/add.hpp @@ -16,7 +16,7 @@ namespace v1 { /// \ingroup ov_ops_cpp_api class OPENVINO_API Add : public util::BinaryElementwiseArithmetic { public: - OPENVINO_OP("Add", "opset1", util::BinaryElementwiseArithmetic, 1); + OPENVINO_OP("Add", "opset1", util::BinaryElementwiseArithmetic); /// \brief Constructs an uninitialized addition operation Add() : util::BinaryElementwiseArithmetic(AutoBroadcastType::NUMPY) {} diff --git a/src/core/include/openvino/op/asinh.hpp b/src/core/include/openvino/op/asinh.hpp index 8fa8eedd687dc4..96a7f02bfc7e12 100644 --- a/src/core/include/openvino/op/asinh.hpp +++ b/src/core/include/openvino/op/asinh.hpp @@ -15,7 +15,7 @@ namespace v3 { /// class OPENVINO_API Asinh : public util::UnaryElementwiseArithmetic { public: - OPENVINO_OP("Asinh", "opset4", util::UnaryElementwiseArithmetic, 3); + OPENVINO_OP("Asinh", "opset4", util::UnaryElementwiseArithmetic); /// \brief Constructs an Asinh operation. Asinh() = default; diff --git a/src/core/include/openvino/op/assign.hpp b/src/core/include/openvino/op/assign.hpp index 7c5f2b477a573e..2ba16d46195ffc 100644 --- a/src/core/include/openvino/op/assign.hpp +++ b/src/core/include/openvino/op/assign.hpp @@ -14,7 +14,7 @@ namespace v3 { /// \ingroup ov_ops_cpp_api class OPENVINO_API Assign : public util::AssignBase { public: - OPENVINO_OP("Assign", "opset3", util::AssignBase, 3); + OPENVINO_OP("Assign", "opset3", util::AssignBase); Assign() = default; /// \brief Constructs an Assign operation. @@ -44,7 +44,7 @@ namespace v6 { /// \ingroup ov_ops_cpp_api class OPENVINO_API Assign : public util::AssignBase { public: - OPENVINO_OP("Assign", "opset6", util::AssignBase, 6); + OPENVINO_OP("Assign", "opset6", util::AssignBase); Assign() = default; /// \brief Constructs an Assign operation. diff --git a/src/core/include/openvino/op/atanh.hpp b/src/core/include/openvino/op/atanh.hpp index 5fc62facb560af..4d4220b6fdcaa5 100644 --- a/src/core/include/openvino/op/atanh.hpp +++ b/src/core/include/openvino/op/atanh.hpp @@ -16,7 +16,7 @@ namespace v3 { /// \ingroup ov_ops_cpp_api class OPENVINO_API Atanh : public util::UnaryElementwiseArithmetic { public: - OPENVINO_OP("Atanh", "opset4", util::UnaryElementwiseArithmetic, 3); + OPENVINO_OP("Atanh", "opset4", util::UnaryElementwiseArithmetic); /// \brief Constructs an Atanh operation. Atanh() = default; diff --git a/src/core/include/openvino/op/avg_pool.hpp b/src/core/include/openvino/op/avg_pool.hpp index a69e6cc8502da0..24298c8602bbd2 100644 --- a/src/core/include/openvino/op/avg_pool.hpp +++ b/src/core/include/openvino/op/avg_pool.hpp @@ -14,7 +14,7 @@ namespace v1 { /// class OPENVINO_API AvgPool : public Op { public: - OPENVINO_OP("AvgPool", "opset1", op::Op, 1); + OPENVINO_OP("AvgPool", "opset1", op::Op); /// \brief Constructs a batched average pooling operation. AvgPool() = default; diff --git a/src/core/include/openvino/op/batch_norm.hpp b/src/core/include/openvino/op/batch_norm.hpp index 7131f0078d665d..3800090312cb7f 100644 --- a/src/core/include/openvino/op/batch_norm.hpp +++ b/src/core/include/openvino/op/batch_norm.hpp @@ -59,7 +59,7 @@ namespace v5 { /// \ingroup ov_ops_cpp_api class OPENVINO_API BatchNormInference : public Op { public: - OPENVINO_OP("BatchNormInference", "opset5", op::Op, 5); + OPENVINO_OP("BatchNormInference", "opset5", op::Op); BatchNormInference() = default; /// \param input [., C, ...] /// \param gamma gamma scaling for normalized value. [C] diff --git a/src/core/include/openvino/op/batch_to_space.hpp b/src/core/include/openvino/op/batch_to_space.hpp index 4d28c16d067229..6609e539087628 100644 --- a/src/core/include/openvino/op/batch_to_space.hpp +++ b/src/core/include/openvino/op/batch_to_space.hpp @@ -23,7 +23,7 @@ namespace v1 { /// \ingroup ov_ops_cpp_api class OPENVINO_API BatchToSpace : public Op { public: - OPENVINO_OP("BatchToSpace", "opset2", op::Op, 1); + OPENVINO_OP("BatchToSpace", "opset2", op::Op); BatchToSpace() = default; /// \brief Constructs a BatchToSpace operation. /// diff --git a/src/core/include/openvino/op/binary_convolution.hpp b/src/core/include/openvino/op/binary_convolution.hpp index 0dbb2d494bd6eb..8cdcd91e1b03a2 100644 --- a/src/core/include/openvino/op/binary_convolution.hpp +++ b/src/core/include/openvino/op/binary_convolution.hpp @@ -16,7 +16,7 @@ namespace v1 { /// \ingroup ov_ops_cpp_api class OPENVINO_API BinaryConvolution : public Op { public: - OPENVINO_OP("BinaryConvolution", "opset1", op::Op, 1); + OPENVINO_OP("BinaryConvolution", "opset1", op::Op); enum class BinaryConvolutionMode { // Interpret input data and kernel values: 0 as -1, 1 as 1 diff --git a/src/core/include/openvino/op/broadcast.hpp b/src/core/include/openvino/op/broadcast.hpp index e41c7e6c601193..fccffc5ed1d22e 100644 --- a/src/core/include/openvino/op/broadcast.hpp +++ b/src/core/include/openvino/op/broadcast.hpp @@ -17,7 +17,7 @@ namespace v3 { /// \ingroup ov_ops_cpp_api class OPENVINO_API Broadcast : public util::BroadcastBase { public: - OPENVINO_OP("Broadcast", "opset3", op::util::BroadcastBase, 3); + OPENVINO_OP("Broadcast", "opset3", op::util::BroadcastBase); /// \brief Constructs a broadcast operation. Broadcast() = default; @@ -81,7 +81,7 @@ namespace v1 { /// \ingroup ov_ops_cpp_api class OPENVINO_API Broadcast : public util::BroadcastBase { public: - OPENVINO_OP("Broadcast", "opset1", op::util::BroadcastBase, 1); + OPENVINO_OP("Broadcast", "opset1", op::util::BroadcastBase); /// \brief Constructs a broadcast operation. Broadcast() = default; diff --git a/src/core/include/openvino/op/bucketize.hpp b/src/core/include/openvino/op/bucketize.hpp index fa8d34579e7273..33bbfbc8003862 100644 --- a/src/core/include/openvino/op/bucketize.hpp +++ b/src/core/include/openvino/op/bucketize.hpp @@ -13,7 +13,7 @@ namespace v3 { /// \ingroup ov_ops_cpp_api class OPENVINO_API Bucketize : public Op { public: - OPENVINO_OP("Bucketize", "opset3", op::Op, 3); + OPENVINO_OP("Bucketize", "opset3", op::Op); Bucketize() = default; /// \brief Constructs a Bucketize node diff --git a/src/core/include/openvino/op/convert_like.hpp b/src/core/include/openvino/op/convert_like.hpp index 27bd89c6e9f106..2621a1ce860131 100644 --- a/src/core/include/openvino/op/convert_like.hpp +++ b/src/core/include/openvino/op/convert_like.hpp @@ -13,7 +13,7 @@ namespace v1 { /// \ingroup ov_ops_cpp_api class OPENVINO_API ConvertLike : public Op { public: - OPENVINO_OP("ConvertLike", "opset1", op::Op, 1); + OPENVINO_OP("ConvertLike", "opset1", op::Op); /// \brief Constructs a conversion operation. ConvertLike() = default; diff --git a/src/core/include/openvino/op/convolution.hpp b/src/core/include/openvino/op/convolution.hpp index 87c17f8167c41d..f340fbb544c8c5 100644 --- a/src/core/include/openvino/op/convolution.hpp +++ b/src/core/include/openvino/op/convolution.hpp @@ -16,7 +16,7 @@ namespace v1 { /// \ingroup ov_ops_cpp_api class OPENVINO_API Convolution : public Op { public: - OPENVINO_OP("Convolution", "opset1", op::Op, 1); + OPENVINO_OP("Convolution", "opset1", op::Op); /// \brief Constructs a batched convolution operation. Convolution() = default; @@ -129,7 +129,7 @@ class OPENVINO_API Convolution : public Op { /// \ingroup ov_ops_cpp_api class OPENVINO_API ConvolutionBackpropData : public Op { public: - OPENVINO_OP("ConvolutionBackpropData", "opset1", op::Op, 1); + OPENVINO_OP("ConvolutionBackpropData", "opset1", op::Op); /// \brief Constructs a batched-convolution data batch-backprop operation. ConvolutionBackpropData() = default; diff --git a/src/core/include/openvino/op/ctc_greedy_decoder_seq_len.hpp b/src/core/include/openvino/op/ctc_greedy_decoder_seq_len.hpp index 4846f750ceaf6b..e07493a781128c 100644 --- a/src/core/include/openvino/op/ctc_greedy_decoder_seq_len.hpp +++ b/src/core/include/openvino/op/ctc_greedy_decoder_seq_len.hpp @@ -14,7 +14,7 @@ namespace v6 { /// \ingroup ov_ops_cpp_api class OPENVINO_API CTCGreedyDecoderSeqLen : public Op { public: - OPENVINO_OP("CTCGreedyDecoderSeqLen", "opset6", op::Op, 6); + OPENVINO_OP("CTCGreedyDecoderSeqLen", "opset6", op::Op); CTCGreedyDecoderSeqLen() = default; /// \brief Constructs a CTCGreedyDecoderSeqLen operation /// diff --git a/src/core/include/openvino/op/ctc_loss.hpp b/src/core/include/openvino/op/ctc_loss.hpp index 21f143608fc90f..3a1077d9394e22 100644 --- a/src/core/include/openvino/op/ctc_loss.hpp +++ b/src/core/include/openvino/op/ctc_loss.hpp @@ -14,7 +14,7 @@ namespace v4 { /// \ingroup ov_ops_cpp_api class OPENVINO_API CTCLoss : public Op { public: - OPENVINO_OP("CTCLoss", "opset4", op::Op, 4); + OPENVINO_OP("CTCLoss", "opset4", op::Op); CTCLoss() = default; /// \brief Constructs a CTCLoss operation diff --git a/src/core/include/openvino/op/deformable_convolution.hpp b/src/core/include/openvino/op/deformable_convolution.hpp index 3eb673cd340ad6..993c779e5cb2bd 100644 --- a/src/core/include/openvino/op/deformable_convolution.hpp +++ b/src/core/include/openvino/op/deformable_convolution.hpp @@ -16,7 +16,7 @@ namespace v1 { /// \ingroup ov_ops_cpp_api class OPENVINO_API DeformableConvolution : public op::util::DeformableConvolutionBase { public: - OPENVINO_OP("DeformableConvolution", "opset1", op::util::DeformableConvolutionBase, 1); + OPENVINO_OP("DeformableConvolution", "opset1", op::util::DeformableConvolutionBase); /// \brief Constructs a conversion operation. DeformableConvolution() = default; diff --git a/src/core/include/openvino/op/deformable_psroi_pooling.hpp b/src/core/include/openvino/op/deformable_psroi_pooling.hpp index 60bc07f3dd2861..3e3315a95b93de 100644 --- a/src/core/include/openvino/op/deformable_psroi_pooling.hpp +++ b/src/core/include/openvino/op/deformable_psroi_pooling.hpp @@ -14,7 +14,7 @@ namespace v1 { /// \ingroup ov_ops_cpp_api class OPENVINO_API DeformablePSROIPooling : public Op { public: - OPENVINO_OP("DeformablePSROIPooling", "opset1", op::Op, 1); + OPENVINO_OP("DeformablePSROIPooling", "opset1", op::Op); DeformablePSROIPooling() = default; /// \brief Constructs a DeformablePSROIPooling operation diff --git a/src/core/include/openvino/op/dft.hpp b/src/core/include/openvino/op/dft.hpp index 6072f711650b9c..b87262fd78b919 100644 --- a/src/core/include/openvino/op/dft.hpp +++ b/src/core/include/openvino/op/dft.hpp @@ -29,7 +29,7 @@ namespace v7 { /// \ingroup ov_ops_cpp_api class OPENVINO_API DFT : public util::FFTBase { public: - OPENVINO_OP("DFT", "opset7", util::FFTBase, 7); + OPENVINO_OP("DFT", "opset7", util::FFTBase); DFT() = default; /// \brief Constructs a DFT operation. DFT is performed for full size axes. diff --git a/src/core/include/openvino/op/divide.hpp b/src/core/include/openvino/op/divide.hpp index c4ef648683c293..4d83d0043f4a64 100644 --- a/src/core/include/openvino/op/divide.hpp +++ b/src/core/include/openvino/op/divide.hpp @@ -13,7 +13,7 @@ namespace v1 { /// \ingroup ov_ops_cpp_api class OPENVINO_API Divide : public util::BinaryElementwiseArithmetic { public: - OPENVINO_OP("Divide", "opset1", util::BinaryElementwiseArithmetic, 1); + OPENVINO_OP("Divide", "opset1", util::BinaryElementwiseArithmetic); /// \brief Constructs a division operation. Divide() : util::BinaryElementwiseArithmetic(AutoBroadcastType::NUMPY) {} diff --git a/src/core/include/openvino/op/einsum.hpp b/src/core/include/openvino/op/einsum.hpp index e42cdc0a94f806..9d1c179c2c5503 100644 --- a/src/core/include/openvino/op/einsum.hpp +++ b/src/core/include/openvino/op/einsum.hpp @@ -13,7 +13,7 @@ namespace v7 { /// \ingroup ov_ops_cpp_api class OPENVINO_API Einsum : public Op { public: - OPENVINO_OP("Einsum", "opset7", op::Op, 7); + OPENVINO_OP("Einsum", "opset7", op::Op); Einsum() = default; diff --git a/src/core/include/openvino/op/embedding_segments_sum.hpp b/src/core/include/openvino/op/embedding_segments_sum.hpp index 8601bd9ac4ec12..55108f6ccb9d5c 100644 --- a/src/core/include/openvino/op/embedding_segments_sum.hpp +++ b/src/core/include/openvino/op/embedding_segments_sum.hpp @@ -14,7 +14,7 @@ namespace v3 { /// \ingroup ov_ops_cpp_api class OPENVINO_API EmbeddingSegmentsSum : public Op { public: - OPENVINO_OP("EmbeddingSegmentsSum", "opset3", op::Op, 3); + OPENVINO_OP("EmbeddingSegmentsSum", "opset3", op::Op); /// \brief Constructs a EmbeddingSegmentsSum operation. EmbeddingSegmentsSum() = default; /// \brief Constructs a EmbeddingSegmentsSum operation. diff --git a/src/core/include/openvino/op/embeddingbag_offsets_sum.hpp b/src/core/include/openvino/op/embeddingbag_offsets_sum.hpp index 0d88bdbd21af16..7c3ad7a7b74e7e 100644 --- a/src/core/include/openvino/op/embeddingbag_offsets_sum.hpp +++ b/src/core/include/openvino/op/embeddingbag_offsets_sum.hpp @@ -15,7 +15,7 @@ namespace v3 { /// \ingroup ov_ops_cpp_api class OPENVINO_API EmbeddingBagOffsetsSum : public util::EmbeddingBagOffsetsBase { public: - OPENVINO_OP("EmbeddingBagOffsetsSum", "opset3", util::EmbeddingBagOffsetsBase, 3); + OPENVINO_OP("EmbeddingBagOffsetsSum", "opset3", util::EmbeddingBagOffsetsBase); /// \brief Constructs a EmbeddingBagOffsetsSum operation. EmbeddingBagOffsetsSum() = default; /// \brief Constructs a EmbeddingBagOffsetsSum operation. diff --git a/src/core/include/openvino/op/embeddingbag_packedsum.hpp b/src/core/include/openvino/op/embeddingbag_packedsum.hpp index 169b44484cbeab..b095b226a9e86d 100644 --- a/src/core/include/openvino/op/embeddingbag_packedsum.hpp +++ b/src/core/include/openvino/op/embeddingbag_packedsum.hpp @@ -15,7 +15,7 @@ namespace v3 { /// \ingroup ov_ops_cpp_api class OPENVINO_API EmbeddingBagPackedSum : public util::EmbeddingBagPackedBase { public: - OPENVINO_OP("EmbeddingBagPackedSum", "opset3", util::EmbeddingBagPackedBase, 3); + OPENVINO_OP("EmbeddingBagPackedSum", "opset3", util::EmbeddingBagPackedBase); /// \brief Constructs a EmbeddingBagPackedSum operation. EmbeddingBagPackedSum() = default; /// \brief Constructs a EmbeddingBagPackedSum operation. diff --git a/src/core/include/openvino/op/equal.hpp b/src/core/include/openvino/op/equal.hpp index c8e2af9cd575b5..ae16f6c6d52703 100644 --- a/src/core/include/openvino/op/equal.hpp +++ b/src/core/include/openvino/op/equal.hpp @@ -29,7 +29,7 @@ namespace v1 { // clang-format on class OPENVINO_API Equal : public util::BinaryElementwiseComparison { public: - OPENVINO_OP("Equal", "opset1", op::util::BinaryElementwiseComparison, 1); + OPENVINO_OP("Equal", "opset1", op::util::BinaryElementwiseComparison); /// \brief Constructs an equal operation. Equal() : util::BinaryElementwiseComparison(AutoBroadcastType::NUMPY) {} /// \brief Constructs an equal operation. diff --git a/src/core/include/openvino/op/experimental_detectron_detection_output.hpp b/src/core/include/openvino/op/experimental_detectron_detection_output.hpp index c8e84bd6f09622..17221d907cb27e 100644 --- a/src/core/include/openvino/op/experimental_detectron_detection_output.hpp +++ b/src/core/include/openvino/op/experimental_detectron_detection_output.hpp @@ -20,7 +20,7 @@ namespace v6 { /// \ingroup ov_ops_cpp_api class OPENVINO_API ExperimentalDetectronDetectionOutput : public Op { public: - OPENVINO_OP("ExperimentalDetectronDetectionOutput", "opset6", op::Op, 6); + OPENVINO_OP("ExperimentalDetectronDetectionOutput", "opset6", op::Op); /// \brief Structure that specifies attributes of the operation struct Attributes { diff --git a/src/core/include/openvino/op/experimental_detectron_generate_proposals.hpp b/src/core/include/openvino/op/experimental_detectron_generate_proposals.hpp index 83bf6f769c73e1..af2bfe1511f9fe 100644 --- a/src/core/include/openvino/op/experimental_detectron_generate_proposals.hpp +++ b/src/core/include/openvino/op/experimental_detectron_generate_proposals.hpp @@ -19,7 +19,7 @@ namespace v6 { /// \ingroup ov_ops_cpp_api class OPENVINO_API ExperimentalDetectronGenerateProposalsSingleImage : public Op { public: - OPENVINO_OP("ExperimentalDetectronGenerateProposalsSingleImage", "opset6", op::Op, 6); + OPENVINO_OP("ExperimentalDetectronGenerateProposalsSingleImage", "opset6", op::Op); /// \brief Structure that specifies attributes of the operation struct Attributes { diff --git a/src/core/include/openvino/op/experimental_detectron_prior_grid_generator.hpp b/src/core/include/openvino/op/experimental_detectron_prior_grid_generator.hpp index 0dd697aec211a2..0865654a3dc358 100644 --- a/src/core/include/openvino/op/experimental_detectron_prior_grid_generator.hpp +++ b/src/core/include/openvino/op/experimental_detectron_prior_grid_generator.hpp @@ -19,7 +19,7 @@ namespace v6 { /// \ingroup ov_ops_cpp_api class OPENVINO_API ExperimentalDetectronPriorGridGenerator : public Op { public: - OPENVINO_OP("ExperimentalDetectronPriorGridGenerator", "opset6", op::Op, 6); + OPENVINO_OP("ExperimentalDetectronPriorGridGenerator", "opset6", op::Op); /// \brief Structure that specifies attributes of the operation struct Attributes { diff --git a/src/core/include/openvino/op/experimental_detectron_roi_feature.hpp b/src/core/include/openvino/op/experimental_detectron_roi_feature.hpp index 109115d08abec9..b6b3d73b47ddbe 100644 --- a/src/core/include/openvino/op/experimental_detectron_roi_feature.hpp +++ b/src/core/include/openvino/op/experimental_detectron_roi_feature.hpp @@ -20,7 +20,7 @@ namespace v6 { /// \ingroup ov_ops_cpp_api class OPENVINO_API ExperimentalDetectronROIFeatureExtractor : public Op { public: - OPENVINO_OP("ExperimentalDetectronROIFeatureExtractor", "opset6", op::Op, 6); + OPENVINO_OP("ExperimentalDetectronROIFeatureExtractor", "opset6", op::Op); /// \brief Structure that specifies attributes of the operation struct Attributes { diff --git a/src/core/include/openvino/op/experimental_detectron_topkrois.hpp b/src/core/include/openvino/op/experimental_detectron_topkrois.hpp index c12e1dcb374fc2..90bb99ebc5c89c 100644 --- a/src/core/include/openvino/op/experimental_detectron_topkrois.hpp +++ b/src/core/include/openvino/op/experimental_detectron_topkrois.hpp @@ -19,7 +19,7 @@ namespace v6 { /// \ingroup ov_ops_cpp_api class OPENVINO_API ExperimentalDetectronTopKROIs : public Op { public: - OPENVINO_OP("ExperimentalDetectronTopKROIs", "opset6", op::Op, 6); + OPENVINO_OP("ExperimentalDetectronTopKROIs", "opset6", op::Op); ExperimentalDetectronTopKROIs() = default; /// \brief Constructs a ExperimentalDetectronTopKROIs operation. diff --git a/src/core/include/openvino/op/extractimagepatches.hpp b/src/core/include/openvino/op/extractimagepatches.hpp index d9a0ca6ff6b89d..37d506fe1895b8 100644 --- a/src/core/include/openvino/op/extractimagepatches.hpp +++ b/src/core/include/openvino/op/extractimagepatches.hpp @@ -14,7 +14,7 @@ namespace v3 { /// \ingroup ov_ops_cpp_api class OPENVINO_API ExtractImagePatches : public Op { public: - OPENVINO_OP("ExtractImagePatches", "opset3", op::Op, 3); + OPENVINO_OP("ExtractImagePatches", "opset3", op::Op); ExtractImagePatches() = default; /// \brief Constructs a ExtractImagePatches operation diff --git a/src/core/include/openvino/op/floor_mod.hpp b/src/core/include/openvino/op/floor_mod.hpp index 2216de28b617e4..4df54f3bcd7334 100644 --- a/src/core/include/openvino/op/floor_mod.hpp +++ b/src/core/include/openvino/op/floor_mod.hpp @@ -14,7 +14,7 @@ namespace v1 { /// \ingroup ov_ops_cpp_api class OPENVINO_API FloorMod : public util::BinaryElementwiseArithmetic { public: - OPENVINO_OP("FloorMod", "opset1", op::util::BinaryElementwiseArithmetic, 1); + OPENVINO_OP("FloorMod", "opset1", op::util::BinaryElementwiseArithmetic); /// \brief Constructs an uninitialized addition operation FloorMod() : util::BinaryElementwiseArithmetic(AutoBroadcastType::NUMPY) {} diff --git a/src/core/include/openvino/op/gather.hpp b/src/core/include/openvino/op/gather.hpp index 55564080af1651..e752259271cfb4 100644 --- a/src/core/include/openvino/op/gather.hpp +++ b/src/core/include/openvino/op/gather.hpp @@ -13,7 +13,7 @@ namespace v1 { /// \ingroup ov_ops_cpp_api class OPENVINO_API Gather : public op::util::GatherBase { public: - OPENVINO_OP("Gather", "opset1", op::util::GatherBase, 1); + OPENVINO_OP("Gather", "opset1", op::util::GatherBase); static constexpr int64_t AXIS_NOT_SET_VALUE = std::numeric_limits::max(); Gather() = default; /// \param data The tensor from which slices are gathered @@ -33,7 +33,7 @@ namespace v7 { /// \ingroup ov_ops_cpp_api class OPENVINO_API Gather : public op::util::GatherBase { public: - OPENVINO_OP("Gather", "opset7", op::util::GatherBase, 7); + OPENVINO_OP("Gather", "opset7", op::util::GatherBase); Gather() = default; /// \param data The tensor from which slices are gathered diff --git a/src/core/include/openvino/op/gather_elements.hpp b/src/core/include/openvino/op/gather_elements.hpp index c184d86e3628b4..4d8c419e616a10 100644 --- a/src/core/include/openvino/op/gather_elements.hpp +++ b/src/core/include/openvino/op/gather_elements.hpp @@ -14,7 +14,7 @@ namespace v6 { /// \ingroup ov_ops_cpp_api class OPENVINO_API GatherElements : public Op { public: - OPENVINO_OP("GatherElements", "opset6", op::Op, 6); + OPENVINO_OP("GatherElements", "opset6", op::Op); GatherElements() = default; /// \brief Constructs a GatherElements operation. diff --git a/src/core/include/openvino/op/gather_nd.hpp b/src/core/include/openvino/op/gather_nd.hpp index 146a1511664968..59680a37d4e4c6 100644 --- a/src/core/include/openvino/op/gather_nd.hpp +++ b/src/core/include/openvino/op/gather_nd.hpp @@ -13,7 +13,7 @@ namespace v5 { /// \ingroup ov_ops_cpp_api class OPENVINO_API GatherND : public op::util::GatherNDBase { public: - OPENVINO_OP("GatherND", "opset5", op::util::GatherNDBase, 5); + OPENVINO_OP("GatherND", "opset5", op::util::GatherNDBase); GatherND() = default; /// \brief Constructs a GatherND operation. diff --git a/src/core/include/openvino/op/gather_tree.hpp b/src/core/include/openvino/op/gather_tree.hpp index 67f455be8a2aa6..bc2169019a8cea 100644 --- a/src/core/include/openvino/op/gather_tree.hpp +++ b/src/core/include/openvino/op/gather_tree.hpp @@ -14,7 +14,7 @@ namespace v1 { /// \ingroup ov_ops_cpp_api class OPENVINO_API GatherTree : public Op { public: - OPENVINO_OP("GatherTree", "opset1", op::Op, 1); + OPENVINO_OP("GatherTree", "opset1", op::Op); GatherTree() = default; /// \param step_ids Tensor of shape [MAX_TIME, BATCH_SIZE, BEAM_WIDTH] with diff --git a/src/core/include/openvino/op/gelu.hpp b/src/core/include/openvino/op/gelu.hpp index 021a7e0ef8bd02..ae868e3909bbfd 100644 --- a/src/core/include/openvino/op/gelu.hpp +++ b/src/core/include/openvino/op/gelu.hpp @@ -15,7 +15,7 @@ namespace v0 { /// \ingroup ov_ops_cpp_api class OPENVINO_API Gelu : public util::UnaryElementwiseArithmetic { public: - OPENVINO_OP("Gelu", "opset2", util::UnaryElementwiseArithmetic, 0); + OPENVINO_OP("Gelu", "opset2", util::UnaryElementwiseArithmetic); Gelu(); /// \brief Constructs a Gelu operation. @@ -43,7 +43,7 @@ namespace v7 { /// \ingroup ov_ops_cpp_api class OPENVINO_API Gelu : public util::UnaryElementwiseArithmetic { public: - OPENVINO_OP("Gelu", "opset7", util::UnaryElementwiseArithmetic, 7); + OPENVINO_OP("Gelu", "opset7", util::UnaryElementwiseArithmetic); Gelu() = default; /// \brief Constructs a Gelu operation. diff --git a/src/core/include/openvino/op/greater.hpp b/src/core/include/openvino/op/greater.hpp index 1693c09579b1e7..de889a0acae370 100644 --- a/src/core/include/openvino/op/greater.hpp +++ b/src/core/include/openvino/op/greater.hpp @@ -13,7 +13,7 @@ namespace v1 { /// \ingroup ov_ops_cpp_api class OPENVINO_API Greater : public util::BinaryElementwiseComparison { public: - OPENVINO_OP("Greater", "opset1", op::util::BinaryElementwiseComparison, 1); + OPENVINO_OP("Greater", "opset1", op::util::BinaryElementwiseComparison); /// \brief Constructs a greater-than operation. Greater() : util::BinaryElementwiseComparison(AutoBroadcastType::NUMPY) {} /// \brief Constructs a greater-than operation. diff --git a/src/core/include/openvino/op/greater_eq.hpp b/src/core/include/openvino/op/greater_eq.hpp index f4731a2da74a50..1f5fe1f984c95d 100644 --- a/src/core/include/openvino/op/greater_eq.hpp +++ b/src/core/include/openvino/op/greater_eq.hpp @@ -13,7 +13,7 @@ namespace v1 { /// \ingroup ov_ops_cpp_api class OPENVINO_API GreaterEqual : public util::BinaryElementwiseComparison { public: - OPENVINO_OP("GreaterEqual", "opset1", op::util::BinaryElementwiseComparison, 1); + OPENVINO_OP("GreaterEqual", "opset1", op::util::BinaryElementwiseComparison); /// \brief Constructs a greater-than-or-equal operation. GreaterEqual() : util::BinaryElementwiseComparison(AutoBroadcastType::NUMPY) {} /// \brief Constructs a greater-than-or-equal operation. diff --git a/src/core/include/openvino/op/group_conv.hpp b/src/core/include/openvino/op/group_conv.hpp index f01c2a7f3dc891..a37a26e480e8de 100644 --- a/src/core/include/openvino/op/group_conv.hpp +++ b/src/core/include/openvino/op/group_conv.hpp @@ -14,7 +14,7 @@ namespace v1 { /// \brief Batched convolution operation, with optional window dilation and stride. class OPENVINO_API GroupConvolution : public Op { public: - OPENVINO_OP("GroupConvolution", "opset1", op::Op, 1); + OPENVINO_OP("GroupConvolution", "opset1", op::Op); /// \brief Constructs a batched convolution operation. GroupConvolution() = default; @@ -126,7 +126,7 @@ class OPENVINO_API GroupConvolution : public Op { /// \brief Data batch backprop for batched convolution operation. class OPENVINO_API GroupConvolutionBackpropData : public Op { public: - OPENVINO_OP("GroupConvolutionBackpropData", "opset1", op::Op, 1); + OPENVINO_OP("GroupConvolutionBackpropData", "opset1", op::Op); /// \brief Constructs a batched-convolution data batch-backprop operation. GroupConvolutionBackpropData(); diff --git a/src/core/include/openvino/op/gru_cell.hpp b/src/core/include/openvino/op/gru_cell.hpp index 2610c4731ae139..15543eec2b943d 100644 --- a/src/core/include/openvino/op/gru_cell.hpp +++ b/src/core/include/openvino/op/gru_cell.hpp @@ -24,7 +24,7 @@ namespace v3 { /// \ingroup ov_ops_cpp_api class OPENVINO_API GRUCell : public util::RNNCellBase { public: - OPENVINO_OP("GRUCell", "opset3", op::util::RNNCellBase, 3); + OPENVINO_OP("GRUCell", "opset3", op::util::RNNCellBase); GRUCell(); /// /// \brief Constructs GRUCell node. diff --git a/src/core/include/openvino/op/gru_sequence.hpp b/src/core/include/openvino/op/gru_sequence.hpp index 1fc9e7c9147fcf..fae54509ad0c64 100644 --- a/src/core/include/openvino/op/gru_sequence.hpp +++ b/src/core/include/openvino/op/gru_sequence.hpp @@ -19,7 +19,7 @@ namespace v5 { /// \ingroup ov_ops_cpp_api class OPENVINO_API GRUSequence : public util::RNNCellBase { public: - OPENVINO_OP("GRUSequence", "opset5", op::Op, 5); + OPENVINO_OP("GRUSequence", "opset5", op::Op); GRUSequence(); GRUSequence(const Output& X, diff --git a/src/core/include/openvino/op/hsigmoid.hpp b/src/core/include/openvino/op/hsigmoid.hpp index abf8d2e1cf87a9..71b07ada902617 100644 --- a/src/core/include/openvino/op/hsigmoid.hpp +++ b/src/core/include/openvino/op/hsigmoid.hpp @@ -17,7 +17,7 @@ namespace v5 { /// \ingroup ov_ops_cpp_api class OPENVINO_API HSigmoid : public util::UnaryElementwiseArithmetic { public: - OPENVINO_OP("HSigmoid", "opset5", op::util::UnaryElementwiseArithmetic, 5); + OPENVINO_OP("HSigmoid", "opset5", op::util::UnaryElementwiseArithmetic); HSigmoid() = default; /// \brief Constructs a HSigmoid operation. diff --git a/src/core/include/openvino/op/hswish.hpp b/src/core/include/openvino/op/hswish.hpp index b20e3498b90dd0..34cff2955b5ab6 100644 --- a/src/core/include/openvino/op/hswish.hpp +++ b/src/core/include/openvino/op/hswish.hpp @@ -17,7 +17,7 @@ namespace v4 { /// \ingroup ov_ops_cpp_api class OPENVINO_API HSwish : public util::UnaryElementwiseArithmetic { public: - OPENVINO_OP("HSwish", "opset4", op::util::UnaryElementwiseArithmetic, 4); + OPENVINO_OP("HSwish", "opset4", op::util::UnaryElementwiseArithmetic); HSwish() = default; /// \brief Constructs a HSwish (hard version of Swish) operation. diff --git a/src/core/include/openvino/op/idft.hpp b/src/core/include/openvino/op/idft.hpp index ea6b0a737a44ba..1fd0948bc9c8a2 100644 --- a/src/core/include/openvino/op/idft.hpp +++ b/src/core/include/openvino/op/idft.hpp @@ -17,7 +17,7 @@ namespace v7 { /// \ingroup ov_ops_cpp_api class OPENVINO_API IDFT : public util::FFTBase { public: - OPENVINO_OP("IDFT", "opset7", util::FFTBase, 7); + OPENVINO_OP("IDFT", "opset7", util::FFTBase); IDFT() = default; /// \brief Constructs a IDFT operation. IDFT is performed for full size axes. diff --git a/src/core/include/openvino/op/interpolate.hpp b/src/core/include/openvino/op/interpolate.hpp index cec3a88e3f5f3f..d6e32cc28cb3c2 100644 --- a/src/core/include/openvino/op/interpolate.hpp +++ b/src/core/include/openvino/op/interpolate.hpp @@ -83,7 +83,7 @@ namespace v4 { /// \ingroup ov_ops_cpp_api class OPENVINO_API Interpolate : public util::InterpolateBase { public: - OPENVINO_OP("Interpolate", "opset4", util::InterpolateBase, 4); + OPENVINO_OP("Interpolate", "opset4", util::InterpolateBase); Interpolate() = default; /// \brief Constructs a Interpolate operation without 'axes' input. @@ -190,7 +190,7 @@ namespace v11 { /// \ingroup ov_ops_cpp_api class OPENVINO_API Interpolate : public util::InterpolateBase { public: - OPENVINO_OP("Interpolate", "opset11", util::InterpolateBase, 11); + OPENVINO_OP("Interpolate", "opset11", util::InterpolateBase); Interpolate() = default; /// \brief Constructs a Interpolate operation without 'axes' input. /// diff --git a/src/core/include/openvino/op/less.hpp b/src/core/include/openvino/op/less.hpp index d11285b871e392..3d87ab9b6ffbe3 100644 --- a/src/core/include/openvino/op/less.hpp +++ b/src/core/include/openvino/op/less.hpp @@ -13,7 +13,7 @@ namespace v1 { /// \ingroup ov_ops_cpp_api class OPENVINO_API Less : public util::BinaryElementwiseComparison { public: - OPENVINO_OP("Less", "opset1", op::util::BinaryElementwiseComparison, 1); + OPENVINO_OP("Less", "opset1", op::util::BinaryElementwiseComparison); /// \brief Constructs a less-than operation. Less() : util::BinaryElementwiseComparison(AutoBroadcastType::NUMPY) {} /// \brief Constructs a less-than operation. diff --git a/src/core/include/openvino/op/less_eq.hpp b/src/core/include/openvino/op/less_eq.hpp index 6b725f749c23ae..111f4c07140af5 100644 --- a/src/core/include/openvino/op/less_eq.hpp +++ b/src/core/include/openvino/op/less_eq.hpp @@ -13,7 +13,7 @@ namespace v1 { /// \ingroup ov_ops_cpp_api class OPENVINO_API LessEqual : public util::BinaryElementwiseComparison { public: - OPENVINO_OP("LessEqual", "opset1", op::util::BinaryElementwiseComparison, 1); + OPENVINO_OP("LessEqual", "opset1", op::util::BinaryElementwiseComparison); /// \brief Constructs a less-than-or-equal operation. LessEqual() : util::BinaryElementwiseComparison(AutoBroadcastType::NUMPY) {} diff --git a/src/core/include/openvino/op/log_softmax.hpp b/src/core/include/openvino/op/log_softmax.hpp index d441e645998aea..b4bca830a0832f 100644 --- a/src/core/include/openvino/op/log_softmax.hpp +++ b/src/core/include/openvino/op/log_softmax.hpp @@ -14,7 +14,7 @@ namespace v5 { /// \ingroup ov_ops_cpp_api class OPENVINO_API LogSoftmax : public Op { public: - OPENVINO_OP("LogSoftmax", "opset5", op::Op, 5); + OPENVINO_OP("LogSoftmax", "opset5", op::Op); LogSoftmax() = default; /// \brief Constructs a LogSoftmax operation. /// diff --git a/src/core/include/openvino/op/logical_and.hpp b/src/core/include/openvino/op/logical_and.hpp index 8580accfe46df7..6d55f8f3585e0f 100644 --- a/src/core/include/openvino/op/logical_and.hpp +++ b/src/core/include/openvino/op/logical_and.hpp @@ -16,7 +16,7 @@ namespace v1 { /// \ingroup ov_ops_cpp_api class OPENVINO_API LogicalAnd : public util::BinaryElementwiseLogical { public: - OPENVINO_OP("LogicalAnd", "opset1", util::BinaryElementwiseLogical, 1); + OPENVINO_OP("LogicalAnd", "opset1", util::BinaryElementwiseLogical); /// \brief Constructs a logical-and operation. LogicalAnd() = default; diff --git a/src/core/include/openvino/op/logical_not.hpp b/src/core/include/openvino/op/logical_not.hpp index 9b50a8e2ecce1e..c5421b8db14a47 100644 --- a/src/core/include/openvino/op/logical_not.hpp +++ b/src/core/include/openvino/op/logical_not.hpp @@ -13,7 +13,7 @@ namespace v1 { /// \ingroup ov_ops_cpp_api class OPENVINO_API LogicalNot : public Op { public: - OPENVINO_OP("LogicalNot", "opset1", op::Op, 1); + OPENVINO_OP("LogicalNot", "opset1", op::Op); /// \brief Constructs a logical negation operation. LogicalNot() = default; /// \brief Constructs a logical negation operation. diff --git a/src/core/include/openvino/op/logical_or.hpp b/src/core/include/openvino/op/logical_or.hpp index 379b773d37617f..15c00eea04baf3 100644 --- a/src/core/include/openvino/op/logical_or.hpp +++ b/src/core/include/openvino/op/logical_or.hpp @@ -16,7 +16,7 @@ namespace v1 { /// \ingroup ov_ops_cpp_api class OPENVINO_API LogicalOr : public util::BinaryElementwiseLogical { public: - OPENVINO_OP("LogicalOr", "opset1", util::BinaryElementwiseLogical, 1); + OPENVINO_OP("LogicalOr", "opset1", util::BinaryElementwiseLogical); LogicalOr() = default; /// \brief Constructs a logical-or operation. /// diff --git a/src/core/include/openvino/op/logical_xor.hpp b/src/core/include/openvino/op/logical_xor.hpp index 9e94a1756f98c0..41ad89abca2638 100644 --- a/src/core/include/openvino/op/logical_xor.hpp +++ b/src/core/include/openvino/op/logical_xor.hpp @@ -16,7 +16,7 @@ namespace v1 { /// \ingroup ov_ops_cpp_api class OPENVINO_API LogicalXor : public util::BinaryElementwiseLogical { public: - OPENVINO_OP("LogicalXor", "opset2", util::BinaryElementwiseLogical, 1); + OPENVINO_OP("LogicalXor", "opset2", util::BinaryElementwiseLogical); LogicalXor() = default; /// \brief Constructs a logical-xor operation. /// diff --git a/src/core/include/openvino/op/loop.hpp b/src/core/include/openvino/op/loop.hpp index 7bbc00dc75c7fb..cb174d588b7bc3 100644 --- a/src/core/include/openvino/op/loop.hpp +++ b/src/core/include/openvino/op/loop.hpp @@ -31,7 +31,7 @@ class OPENVINO_API Loop : public op::util::SubGraphOp { int64_t body_condition_output_idx = -1; }; - OPENVINO_OP("Loop", "opset5", op::util::SubGraphOp, 5); + OPENVINO_OP("Loop", "opset5", op::util::SubGraphOp); /// \brief Constructs a Loop operation. Loop() = default; diff --git a/src/core/include/openvino/op/lstm_cell.hpp b/src/core/include/openvino/op/lstm_cell.hpp index 3c9e53be10c46b..249b3dccdc2bf6 100644 --- a/src/core/include/openvino/op/lstm_cell.hpp +++ b/src/core/include/openvino/op/lstm_cell.hpp @@ -278,7 +278,7 @@ namespace v4 { /// \ingroup ov_ops_cpp_api class OPENVINO_API LSTMCell : public util::RNNCellBase { public: - OPENVINO_OP("LSTMCell", "opset4", op::util::RNNCellBase, 4); + OPENVINO_OP("LSTMCell", "opset4", op::util::RNNCellBase); LSTMCell(); /// diff --git a/src/core/include/openvino/op/lstm_sequence.hpp b/src/core/include/openvino/op/lstm_sequence.hpp index 3296bd54208134..1e7599a35ba982 100644 --- a/src/core/include/openvino/op/lstm_sequence.hpp +++ b/src/core/include/openvino/op/lstm_sequence.hpp @@ -127,7 +127,7 @@ namespace v5 { /// \ingroup ov_ops_cpp_api class OPENVINO_API LSTMSequence : public util::RNNCellBase { public: - OPENVINO_OP("LSTMSequence", "opset5", util::RNNCellBase, 5); + OPENVINO_OP("LSTMSequence", "opset5", util::RNNCellBase); LSTMSequence() = default; using direction = RecurrentSequenceDirection; diff --git a/src/core/include/openvino/op/max_pool.hpp b/src/core/include/openvino/op/max_pool.hpp index 2acfdb9d12b6ce..c1741eef6cb717 100644 --- a/src/core/include/openvino/op/max_pool.hpp +++ b/src/core/include/openvino/op/max_pool.hpp @@ -15,7 +15,7 @@ namespace v1 { /// \ingroup ov_ops_cpp_api class OPENVINO_API MaxPool : public op::util::MaxPoolBase { public: - OPENVINO_OP("MaxPool", "opset1", op::util::MaxPoolBase, 1); + OPENVINO_OP("MaxPool", "opset1", op::util::MaxPoolBase); /// \brief Constructs a batched max pooling operation. MaxPool() = default; diff --git a/src/core/include/openvino/op/maximum.hpp b/src/core/include/openvino/op/maximum.hpp index 5c21463c2ec727..742878b09c4eba 100644 --- a/src/core/include/openvino/op/maximum.hpp +++ b/src/core/include/openvino/op/maximum.hpp @@ -13,7 +13,7 @@ namespace v1 { /// \ingroup ov_ops_cpp_api class OPENVINO_API Maximum : public util::BinaryElementwiseArithmetic { public: - OPENVINO_OP("Maximum", "opset1", op::util::BinaryElementwiseArithmetic, 1); + OPENVINO_OP("Maximum", "opset1", op::util::BinaryElementwiseArithmetic); /// \brief Constructs a maximum operation. Maximum() : util::BinaryElementwiseArithmetic(AutoBroadcastType::NUMPY) {} diff --git a/src/core/include/openvino/op/minimum.hpp b/src/core/include/openvino/op/minimum.hpp index afd75c41ea577b..c8cfc5c9d7c999 100644 --- a/src/core/include/openvino/op/minimum.hpp +++ b/src/core/include/openvino/op/minimum.hpp @@ -13,7 +13,7 @@ namespace v1 { /// \ingroup ov_ops_cpp_api class OPENVINO_API Minimum : public util::BinaryElementwiseArithmetic { public: - OPENVINO_OP("Minimum", "opset1", op::util::BinaryElementwiseArithmetic, 1); + OPENVINO_OP("Minimum", "opset1", op::util::BinaryElementwiseArithmetic); /// \brief Constructs a minimum operation. Minimum() : util::BinaryElementwiseArithmetic(AutoBroadcastType::NUMPY) {} diff --git a/src/core/include/openvino/op/mish.hpp b/src/core/include/openvino/op/mish.hpp index 1d3a53ca7534c4..455bd713166f95 100644 --- a/src/core/include/openvino/op/mish.hpp +++ b/src/core/include/openvino/op/mish.hpp @@ -15,7 +15,7 @@ namespace v4 { /// \ingroup ov_ops_cpp_api class OPENVINO_API Mish : public util::UnaryElementwiseArithmetic { public: - OPENVINO_OP("Mish", "opset4", util::UnaryElementwiseArithmetic, 4); + OPENVINO_OP("Mish", "opset4", util::UnaryElementwiseArithmetic); Mish() = default; /// \brief Constructs an Mish operation. diff --git a/src/core/include/openvino/op/mod.hpp b/src/core/include/openvino/op/mod.hpp index 749a7ae2b7e5d4..5e58a2ec03d733 100644 --- a/src/core/include/openvino/op/mod.hpp +++ b/src/core/include/openvino/op/mod.hpp @@ -14,7 +14,7 @@ namespace v1 { /// \ingroup ov_ops_cpp_api class OPENVINO_API Mod : public util::BinaryElementwiseArithmetic { public: - OPENVINO_OP("Mod", "opset1", op::util::BinaryElementwiseArithmetic, 1); + OPENVINO_OP("Mod", "opset1", op::util::BinaryElementwiseArithmetic); /// \brief Constructs a Mod node. Mod() : util::BinaryElementwiseArithmetic(AutoBroadcastType::NUMPY) {} diff --git a/src/core/include/openvino/op/multiply.hpp b/src/core/include/openvino/op/multiply.hpp index 259c0b9f03a117..2e2f3bd4c73000 100644 --- a/src/core/include/openvino/op/multiply.hpp +++ b/src/core/include/openvino/op/multiply.hpp @@ -13,7 +13,7 @@ namespace v1 { /// \ingroup ov_ops_cpp_api class OPENVINO_API Multiply : public util::BinaryElementwiseArithmetic { public: - OPENVINO_OP("Multiply", "opset1", util::BinaryElementwiseArithmetic, 1); + OPENVINO_OP("Multiply", "opset1", util::BinaryElementwiseArithmetic); /// \brief Constructs a multiplication operation. Multiply() : util::BinaryElementwiseArithmetic(AutoBroadcastType::NUMPY) {} diff --git a/src/core/include/openvino/op/mvn.hpp b/src/core/include/openvino/op/mvn.hpp index c9a3920aedbb4b..7f198ec1444047 100644 --- a/src/core/include/openvino/op/mvn.hpp +++ b/src/core/include/openvino/op/mvn.hpp @@ -99,7 +99,7 @@ namespace v6 { /// \ingroup ov_ops_cpp_api class OPENVINO_API MVN : public Op { public: - OPENVINO_OP("MVN", "opset6", op::Op, 6); + OPENVINO_OP("MVN", "opset6", op::Op); MVN() = default; /// \brief Constructs an MVN operation. diff --git a/src/core/include/openvino/op/non_max_suppression.hpp b/src/core/include/openvino/op/non_max_suppression.hpp index 143df579e0ac69..5216c53700684f 100644 --- a/src/core/include/openvino/op/non_max_suppression.hpp +++ b/src/core/include/openvino/op/non_max_suppression.hpp @@ -16,7 +16,7 @@ class OPENVINO_API NonMaxSuppression : public Op { public: enum class BoxEncodingType { CORNER, CENTER }; - OPENVINO_OP("NonMaxSuppression", "opset1", op::Op, 1); + OPENVINO_OP("NonMaxSuppression", "opset1", op::Op); NonMaxSuppression() = default; @@ -86,7 +86,7 @@ class OPENVINO_API NonMaxSuppression : public Op { public: enum class BoxEncodingType { CORNER, CENTER }; - OPENVINO_OP("NonMaxSuppression", "opset3", op::Op, 3); + OPENVINO_OP("NonMaxSuppression", "opset3", op::Op); NonMaxSuppression() = default; /// \brief Constructs a NonMaxSuppression operation. @@ -166,7 +166,7 @@ namespace v4 { /// \ingroup ov_ops_cpp_api class OPENVINO_API NonMaxSuppression : public op::v3::NonMaxSuppression { public: - OPENVINO_OP("NonMaxSuppression", "opset4", op::v3::NonMaxSuppression, 4); + OPENVINO_OP("NonMaxSuppression", "opset4", op::v3::NonMaxSuppression); NonMaxSuppression() = default; /// \brief Constructs a NonMaxSuppression operation. @@ -217,7 +217,7 @@ namespace v5 { /// \ingroup ov_ops_cpp_api class OPENVINO_API NonMaxSuppression : public Op { public: - OPENVINO_OP("NonMaxSuppression", "opset5", op::Op, 5); + OPENVINO_OP("NonMaxSuppression", "opset5", op::Op); enum class BoxEncodingType { CORNER, CENTER }; NonMaxSuppression() = default; @@ -365,7 +365,7 @@ namespace v9 { /// class OPENVINO_API NonMaxSuppression : public Op { public: - OPENVINO_OP("NonMaxSuppression", "opset9", op::Op, 9); + OPENVINO_OP("NonMaxSuppression", "opset9", op::Op); enum class BoxEncodingType { CORNER, CENTER }; NonMaxSuppression() = default; diff --git a/src/core/include/openvino/op/non_zero.hpp b/src/core/include/openvino/op/non_zero.hpp index f45ae824a47cd5..e14d757e5ff341 100644 --- a/src/core/include/openvino/op/non_zero.hpp +++ b/src/core/include/openvino/op/non_zero.hpp @@ -20,7 +20,7 @@ namespace v3 { /// \ingroup ov_ops_cpp_api class OPENVINO_API NonZero : public Op { public: - OPENVINO_OP("NonZero", "opset3", op::Op, 3); + OPENVINO_OP("NonZero", "opset3", op::Op); /// \brief Constructs a NonZero operation. NonZero() = default; /// \brief Constructs a NonZero operation. diff --git a/src/core/include/openvino/op/not_equal.hpp b/src/core/include/openvino/op/not_equal.hpp index 930244094d37a6..dfae8b59a8fd0e 100644 --- a/src/core/include/openvino/op/not_equal.hpp +++ b/src/core/include/openvino/op/not_equal.hpp @@ -13,7 +13,7 @@ namespace v1 { /// \ingroup ov_ops_cpp_api class OPENVINO_API NotEqual : public util::BinaryElementwiseComparison { public: - OPENVINO_OP("NotEqual", "opset1", op::util::BinaryElementwiseComparison, 1); + OPENVINO_OP("NotEqual", "opset1", op::util::BinaryElementwiseComparison); /// \brief Constructs a not-equal operation. NotEqual() : util::BinaryElementwiseComparison(AutoBroadcastType::NUMPY) {} /// \brief Constructs a not-equal operation. diff --git a/src/core/include/openvino/op/one_hot.hpp b/src/core/include/openvino/op/one_hot.hpp index e911d838a52baa..621fd8483c0649 100644 --- a/src/core/include/openvino/op/one_hot.hpp +++ b/src/core/include/openvino/op/one_hot.hpp @@ -14,7 +14,7 @@ namespace v1 { /// \ingroup ov_ops_cpp_api class OPENVINO_API OneHot : public Op { public: - OPENVINO_OP("OneHot", "opset1", op::Op, 1); + OPENVINO_OP("OneHot", "opset1", op::Op); /// \brief Constructs a one-hot operation. OneHot() = default; diff --git a/src/core/include/openvino/op/op.hpp b/src/core/include/openvino/op/op.hpp index c0a57a90323660..7964007180775f 100644 --- a/src/core/include/openvino/op/op.hpp +++ b/src/core/include/openvino/op/op.hpp @@ -16,7 +16,6 @@ #define OPENVINO_OP(...) \ _OPENVINO_RTTI_EXPAND(_OPENVINO_RTTI_DEFINITION_SELECTOR(__VA_ARGS__, \ - _OPENVINO_RTTI_WITH_TYPE_VERSIONS_PARENT, \ _OPENVINO_RTTI_WITH_TYPE_VERSION_PARENT, \ _OPENVINO_RTTI_OP_WITH_TYPE_VERSION, \ _OPENVINO_RTTI_OP_WITH_TYPE)(__VA_ARGS__)) \ @@ -40,7 +39,7 @@ class OPENVINO_API Op : public Node { public: _OPENVINO_HIDDEN_METHOD static const ::ov::Node::type_info_t& get_type_info_static() { - static ::ov::Node::type_info_t info{"Op", 0, "util"}; + static ::ov::Node::type_info_t info{"Op", "util"}; info.hash(); return info; } diff --git a/src/core/include/openvino/op/pad.hpp b/src/core/include/openvino/op/pad.hpp index 4f8779f6d16b4b..a45c1f33dd52ef 100644 --- a/src/core/include/openvino/op/pad.hpp +++ b/src/core/include/openvino/op/pad.hpp @@ -15,7 +15,7 @@ namespace v1 { /// \ingroup ov_ops_cpp_api class OPENVINO_API Pad : public Op { public: - OPENVINO_OP("Pad", "opset1", op::Op, 1); + OPENVINO_OP("Pad", "opset1", op::Op); /// \brief Constructs a generic padding operation. /// diff --git a/src/core/include/openvino/op/power.hpp b/src/core/include/openvino/op/power.hpp index c2bb08f4154bcf..c89a98c61a203b 100644 --- a/src/core/include/openvino/op/power.hpp +++ b/src/core/include/openvino/op/power.hpp @@ -28,7 +28,7 @@ namespace v1 { // clang-format on class OPENVINO_API Power : public util::BinaryElementwiseArithmetic { public: - OPENVINO_OP("Power", "opset1", op::util::BinaryElementwiseArithmetic, 1); + OPENVINO_OP("Power", "opset1", op::util::BinaryElementwiseArithmetic); Power() : util::BinaryElementwiseArithmetic(AutoBroadcastType::NUMPY) {} diff --git a/src/core/include/openvino/op/proposal.hpp b/src/core/include/openvino/op/proposal.hpp index 6f7960133d3171..c09282594d3bcb 100644 --- a/src/core/include/openvino/op/proposal.hpp +++ b/src/core/include/openvino/op/proposal.hpp @@ -78,7 +78,7 @@ namespace v4 { /// \ingroup ov_ops_cpp_api class OPENVINO_API Proposal : public op::v0::Proposal { public: - OPENVINO_OP("Proposal", "opset4", op::Op, 4); + OPENVINO_OP("Proposal", "opset4", op::Op); Proposal() = default; /// \brief Constructs a Proposal operation /// diff --git a/src/core/include/openvino/op/range.hpp b/src/core/include/openvino/op/range.hpp index 1cd44aed49f2de..1222d14874dba0 100644 --- a/src/core/include/openvino/op/range.hpp +++ b/src/core/include/openvino/op/range.hpp @@ -13,7 +13,7 @@ namespace v4 { /// \ingroup ov_ops_cpp_api class OPENVINO_API Range : public Op { public: - OPENVINO_OP("Range", "opset4", op::Op, 4); + OPENVINO_OP("Range", "opset4", op::Op); /// \brief Constructs an unitialized range operation. Range() = default; diff --git a/src/core/include/openvino/op/read_value.hpp b/src/core/include/openvino/op/read_value.hpp index 87a861b4a1e3bf..38c539427b0c4c 100644 --- a/src/core/include/openvino/op/read_value.hpp +++ b/src/core/include/openvino/op/read_value.hpp @@ -15,7 +15,7 @@ namespace v3 { /// \ingroup ov_ops_cpp_api class OPENVINO_API ReadValue : public util::ReadValueBase { public: - OPENVINO_OP("ReadValue", "opset3", util::ReadValueBase, 3); + OPENVINO_OP("ReadValue", "opset3", util::ReadValueBase); ReadValue() = default; /// \brief Constructs a ReadValue operation. @@ -45,7 +45,7 @@ namespace v6 { /// \ingroup ov_ops_cpp_api class OPENVINO_API ReadValue : public util::ReadValueBase { public: - OPENVINO_OP("ReadValue", "opset6", util::ReadValueBase, 6); + OPENVINO_OP("ReadValue", "opset6", util::ReadValueBase); ReadValue() = default; /// \brief Constructs a ReadValue operation. diff --git a/src/core/include/openvino/op/reduce_l1.hpp b/src/core/include/openvino/op/reduce_l1.hpp index a9f5024c6aaf06..4889e5c52a6aba 100644 --- a/src/core/include/openvino/op/reduce_l1.hpp +++ b/src/core/include/openvino/op/reduce_l1.hpp @@ -16,7 +16,7 @@ namespace v4 { /// \ingroup ov_ops_cpp_api class OPENVINO_API ReduceL1 : public util::ArithmeticReductionKeepDims { public: - OPENVINO_OP("ReduceL1", "opset4", util::ArithmeticReductionKeepDims, 4); + OPENVINO_OP("ReduceL1", "opset4", util::ArithmeticReductionKeepDims); /// \brief Constructs a reducet L1-norm operation. ReduceL1() = default; /// \brief Constructs a reduce L1-norm operation. diff --git a/src/core/include/openvino/op/reduce_l2.hpp b/src/core/include/openvino/op/reduce_l2.hpp index c2a18ac3668483..9f9b38b7dc5747 100644 --- a/src/core/include/openvino/op/reduce_l2.hpp +++ b/src/core/include/openvino/op/reduce_l2.hpp @@ -15,7 +15,7 @@ namespace v4 { /// \ingroup ov_ops_cpp_api class OPENVINO_API ReduceL2 : public util::ArithmeticReductionKeepDims { public: - OPENVINO_OP("ReduceL2", "opset4", util::ArithmeticReductionKeepDims, 4); + OPENVINO_OP("ReduceL2", "opset4", util::ArithmeticReductionKeepDims); /// \brief Constructs a reducet L2-norm operation. ReduceL2() = default; /// \brief Constructs a reduce L2-norm operation. diff --git a/src/core/include/openvino/op/reduce_logical_and.hpp b/src/core/include/openvino/op/reduce_logical_and.hpp index b7e839ab1069da..1358702a1fd39a 100644 --- a/src/core/include/openvino/op/reduce_logical_and.hpp +++ b/src/core/include/openvino/op/reduce_logical_and.hpp @@ -16,7 +16,7 @@ namespace v1 { /// \ingroup ov_ops_cpp_api class OPENVINO_API ReduceLogicalAnd : public util::LogicalReductionKeepDims { public: - OPENVINO_OP("ReduceLogicalAnd", "opset1", util::LogicalReductionKeepDims, 1); + OPENVINO_OP("ReduceLogicalAnd", "opset1", util::LogicalReductionKeepDims); ReduceLogicalAnd() = default; /// \brief Constructs a ReduceLogicalAnd node. /// diff --git a/src/core/include/openvino/op/reduce_logical_or.hpp b/src/core/include/openvino/op/reduce_logical_or.hpp index 67fe065db4585b..36a3fd34759b24 100644 --- a/src/core/include/openvino/op/reduce_logical_or.hpp +++ b/src/core/include/openvino/op/reduce_logical_or.hpp @@ -16,7 +16,7 @@ namespace v1 { /// \ingroup ov_ops_cpp_api class OPENVINO_API ReduceLogicalOr : public util::LogicalReductionKeepDims { public: - OPENVINO_OP("ReduceLogicalOr", "opset1", util::LogicalReductionKeepDims, 1); + OPENVINO_OP("ReduceLogicalOr", "opset1", util::LogicalReductionKeepDims); ReduceLogicalOr() = default; /// \brief Constructs a ReduceLogicalOr node. /// diff --git a/src/core/include/openvino/op/reduce_max.hpp b/src/core/include/openvino/op/reduce_max.hpp index b1579ad5ccbf03..499dec82bb9f77 100644 --- a/src/core/include/openvino/op/reduce_max.hpp +++ b/src/core/include/openvino/op/reduce_max.hpp @@ -14,7 +14,7 @@ namespace v1 { /// \ingroup ov_ops_cpp_api class OPENVINO_API ReduceMax : public util::ArithmeticReductionKeepDims { public: - OPENVINO_OP("ReduceMax", "opset1", util::ArithmeticReductionKeepDims, 1); + OPENVINO_OP("ReduceMax", "opset1", util::ArithmeticReductionKeepDims); /// \brief Constructs a summation operation. ReduceMax() = default; /// \brief Constructs a summation operation. diff --git a/src/core/include/openvino/op/reduce_mean.hpp b/src/core/include/openvino/op/reduce_mean.hpp index 41459857e081da..7b50dd57b7dafc 100644 --- a/src/core/include/openvino/op/reduce_mean.hpp +++ b/src/core/include/openvino/op/reduce_mean.hpp @@ -14,7 +14,7 @@ namespace v1 { /// \ingroup ov_ops_cpp_api class OPENVINO_API ReduceMean : public util::ArithmeticReductionKeepDims { public: - OPENVINO_OP("ReduceMean", "opset1", util::ArithmeticReductionKeepDims, 1); + OPENVINO_OP("ReduceMean", "opset1", util::ArithmeticReductionKeepDims); ReduceMean() = default; /// \param arg The tensor to be summed. diff --git a/src/core/include/openvino/op/reduce_min.hpp b/src/core/include/openvino/op/reduce_min.hpp index 464b232ed5fbf9..830021a0bb2ae0 100644 --- a/src/core/include/openvino/op/reduce_min.hpp +++ b/src/core/include/openvino/op/reduce_min.hpp @@ -14,7 +14,7 @@ namespace v1 { /// \ingroup ov_ops_cpp_api class OPENVINO_API ReduceMin : public util::ArithmeticReductionKeepDims { public: - OPENVINO_OP("ReduceMin", "opset1", util::ArithmeticReductionKeepDims, 1); + OPENVINO_OP("ReduceMin", "opset1", util::ArithmeticReductionKeepDims); /// \brief Constructs a summation operation. ReduceMin() = default; /// \brief Constructs a summation operation. diff --git a/src/core/include/openvino/op/reduce_prod.hpp b/src/core/include/openvino/op/reduce_prod.hpp index c8697c81bcdc2a..4a9af6339b6797 100644 --- a/src/core/include/openvino/op/reduce_prod.hpp +++ b/src/core/include/openvino/op/reduce_prod.hpp @@ -15,7 +15,7 @@ namespace v1 { /// \ingroup ov_ops_cpp_api class OPENVINO_API ReduceProd : public util::ArithmeticReductionKeepDims { public: - OPENVINO_OP("ReduceProd", "opset1", util::ArithmeticReductionKeepDims, 1); + OPENVINO_OP("ReduceProd", "opset1", util::ArithmeticReductionKeepDims); /// \brief Constructs a product reduction operation. ReduceProd() = default; /// \brief Constructs a product reduction operation. diff --git a/src/core/include/openvino/op/reduce_sum.hpp b/src/core/include/openvino/op/reduce_sum.hpp index 60622fd2b7e3ea..7a3221c68e52ef 100644 --- a/src/core/include/openvino/op/reduce_sum.hpp +++ b/src/core/include/openvino/op/reduce_sum.hpp @@ -61,7 +61,7 @@ namespace v1 { // clang-format on class OPENVINO_API ReduceSum : public util::ArithmeticReductionKeepDims { public: - OPENVINO_OP("ReduceSum", "opset1", util::ArithmeticReductionKeepDims, 1); + OPENVINO_OP("ReduceSum", "opset1", util::ArithmeticReductionKeepDims); /// \brief Constructs a summation operation. ReduceSum() = default; /// \brief Constructs a summation operation. diff --git a/src/core/include/openvino/op/reshape.hpp b/src/core/include/openvino/op/reshape.hpp index 2905e4ad5ec182..9d4ecc18da1cc0 100644 --- a/src/core/include/openvino/op/reshape.hpp +++ b/src/core/include/openvino/op/reshape.hpp @@ -17,7 +17,7 @@ namespace v1 { /// \ingroup ov_ops_cpp_api class OPENVINO_API Reshape : public Op { public: - OPENVINO_OP("Reshape", "opset1", op::Op, 1); + OPENVINO_OP("Reshape", "opset1", op::Op); Reshape() = default; /// \brief Constructs a dynamic reshape operation. This operation does not perform /// transpose. diff --git a/src/core/include/openvino/op/reverse.hpp b/src/core/include/openvino/op/reverse.hpp index 37266573f7232d..7b1a904aacf201 100644 --- a/src/core/include/openvino/op/reverse.hpp +++ b/src/core/include/openvino/op/reverse.hpp @@ -14,7 +14,7 @@ namespace v1 { /// \ingroup ov_ops_cpp_api class OPENVINO_API Reverse : public Op { public: - OPENVINO_OP("Reverse", "opset1", op::Op, 1); + OPENVINO_OP("Reverse", "opset1", op::Op); enum class Mode { INDEX, MASK }; diff --git a/src/core/include/openvino/op/rnn_sequence.hpp b/src/core/include/openvino/op/rnn_sequence.hpp index 44eaf247281a6a..0ee5543687422f 100644 --- a/src/core/include/openvino/op/rnn_sequence.hpp +++ b/src/core/include/openvino/op/rnn_sequence.hpp @@ -18,7 +18,7 @@ namespace v5 { /// \ingroup ov_ops_cpp_api class OPENVINO_API RNNSequence : public util::RNNCellBase { public: - OPENVINO_OP("RNNSequence", "opset5", util::RNNCellBase, 4); + OPENVINO_OP("RNNSequence", "opset5", util::RNNCellBase); RNNSequence(); diff --git a/src/core/include/openvino/op/roi_align.hpp b/src/core/include/openvino/op/roi_align.hpp index 2922a7bc001df3..ba1ceefe09d3ed 100644 --- a/src/core/include/openvino/op/roi_align.hpp +++ b/src/core/include/openvino/op/roi_align.hpp @@ -14,7 +14,7 @@ namespace v3 { /// \ingroup ov_ops_cpp_api class OPENVINO_API ROIAlign : public Op { public: - OPENVINO_OP("ROIAlign", "opset3", op::Op, 3); + OPENVINO_OP("ROIAlign", "opset3", op::Op); enum class PoolingMode { AVG, MAX }; ROIAlign() = default; diff --git a/src/core/include/openvino/op/roi_pooling.hpp b/src/core/include/openvino/op/roi_pooling.hpp index b0b04648d7b3ea..57799954a7641f 100644 --- a/src/core/include/openvino/op/roi_pooling.hpp +++ b/src/core/include/openvino/op/roi_pooling.hpp @@ -34,12 +34,30 @@ class OPENVINO_API ROIPooling : public Op { std::shared_ptr clone_with_new_inputs(const OutputVector& new_args) const override; + /// \brief Set the output ROI feature map (pooled_h, pooled_w). + /// \param output_size Shape with pooling attributes pooled_h and pooled_w sizes. + void set_output_roi(Shape output_size); + + /// \brief Get the output ROI feature map shape (H x W) + /// \return Shape with pooled_h and pooled_w attributes. + const Shape& get_output_roi() const; + + OPENVINO_DEPRECATED("Use 'get_output_roi' instead. Use of this member can be ambiguous with Node base " + "'get_output_size' which return number of outputs.") const Shape& get_output_size() const { return m_output_size; } + + /// \brief Set the spatial scale value. + /// \param scale Scale value to set. + void set_spatial_scale(float scale); float get_spatial_scale() const { return m_spatial_scale; } + + /// \brief Set the method of pooling + /// \param method_name Pooling method name. + void set_method(std::string method_name); const std::string& get_method() const { return m_method; } @@ -47,7 +65,7 @@ class OPENVINO_API ROIPooling : public Op { private: Shape m_output_size{0, 0}; - float m_spatial_scale{0}; + float m_spatial_scale{0.0f}; std::string m_method = "max"; }; } // namespace v0 diff --git a/src/core/include/openvino/op/roll.hpp b/src/core/include/openvino/op/roll.hpp index 35344dffbad668..844a39b19cfd68 100644 --- a/src/core/include/openvino/op/roll.hpp +++ b/src/core/include/openvino/op/roll.hpp @@ -13,7 +13,7 @@ namespace v7 { /// \ingroup ov_ops_cpp_api class OPENVINO_API Roll : public Op { public: - OPENVINO_OP("Roll", "opset7", op::Op, 7); + OPENVINO_OP("Roll", "opset7", op::Op); Roll() = default; diff --git a/src/core/include/openvino/op/round.hpp b/src/core/include/openvino/op/round.hpp index 994b5507ca944c..c63cee3738464a 100644 --- a/src/core/include/openvino/op/round.hpp +++ b/src/core/include/openvino/op/round.hpp @@ -18,7 +18,7 @@ namespace v5 { class OPENVINO_API Round : public util::UnaryElementwiseArithmetic { public: enum class RoundMode { HALF_TO_EVEN, HALF_AWAY_FROM_ZERO }; - OPENVINO_OP("Round", "opset5", util::UnaryElementwiseArithmetic, 5); + OPENVINO_OP("Round", "opset5", util::UnaryElementwiseArithmetic); /// \brief Constructs a round operation. Round() = default; diff --git a/src/core/include/openvino/op/scatter_elements_update.hpp b/src/core/include/openvino/op/scatter_elements_update.hpp index 903b1fb9bab0cc..4172e99afc50df 100644 --- a/src/core/include/openvino/op/scatter_elements_update.hpp +++ b/src/core/include/openvino/op/scatter_elements_update.hpp @@ -14,7 +14,7 @@ namespace v3 { /// \ingroup ov_ops_cpp_api class OPENVINO_API ScatterElementsUpdate : public Op { public: - OPENVINO_OP("ScatterElementsUpdate", "opset3", op::Op, 3); + OPENVINO_OP("ScatterElementsUpdate", "opset3", op::Op); ScatterElementsUpdate() = default; /// \brief Constructs a ScatterElementsUpdate node diff --git a/src/core/include/openvino/op/scatter_nd_update.hpp b/src/core/include/openvino/op/scatter_nd_update.hpp index 1c7ac4355e1009..ab28cd374dfc6b 100644 --- a/src/core/include/openvino/op/scatter_nd_update.hpp +++ b/src/core/include/openvino/op/scatter_nd_update.hpp @@ -13,7 +13,7 @@ namespace v3 { /// \ingroup ov_ops_cpp_api class OPENVINO_API ScatterNDUpdate : public util::ScatterNDBase { public: - OPENVINO_OP("ScatterNDUpdate", "opset4", util::ScatterNDBase, 3); + OPENVINO_OP("ScatterNDUpdate", "opset4", util::ScatterNDBase); ScatterNDUpdate() = default; /// \param inputs Tensor /// \param indices Index tensor: Data type must be `element::i32` or `element::i64` diff --git a/src/core/include/openvino/op/scatter_update.hpp b/src/core/include/openvino/op/scatter_update.hpp index 66936cf2f73654..4e29bf9ab2ac2a 100644 --- a/src/core/include/openvino/op/scatter_update.hpp +++ b/src/core/include/openvino/op/scatter_update.hpp @@ -15,7 +15,7 @@ namespace v3 { /// \ingroup ov_ops_cpp_api class OPENVINO_API ScatterUpdate : public util::ScatterBase { public: - OPENVINO_OP("ScatterUpdate", "opset3", util::ScatterBase, 3); + OPENVINO_OP("ScatterUpdate", "opset3", util::ScatterBase); ScatterUpdate() = default; /// /// \brief Constructs ScatterUpdate operator object. diff --git a/src/core/include/openvino/op/select.hpp b/src/core/include/openvino/op/select.hpp index 5d2bec70d6779f..78e8e802ab94d3 100644 --- a/src/core/include/openvino/op/select.hpp +++ b/src/core/include/openvino/op/select.hpp @@ -30,7 +30,7 @@ namespace v1 { // clang-format on class OPENVINO_API Select : public Op { public: - OPENVINO_OP("Select", "opset1", op::Op, 1); + OPENVINO_OP("Select", "opset1", op::Op); /// \brief Constructs a selection operation. Select() : m_auto_broadcast(AutoBroadcastSpec(AutoBroadcastType::NUMPY)) {} diff --git a/src/core/include/openvino/op/shape_of.hpp b/src/core/include/openvino/op/shape_of.hpp index 5e2cace77f9d51..e7ec34c1c87d86 100644 --- a/src/core/include/openvino/op/shape_of.hpp +++ b/src/core/include/openvino/op/shape_of.hpp @@ -13,7 +13,7 @@ namespace v3 { /// \ingroup ov_ops_cpp_api class OPENVINO_API ShapeOf : public util::ShapeOfBase { public: - OPENVINO_OP("ShapeOf", "opset3", util::ShapeOfBase, 3); + OPENVINO_OP("ShapeOf", "opset3", util::ShapeOfBase); ShapeOf() = default; /// \brief Constructs a shape-of operation. ShapeOf(const Output& arg, const element::Type output_type = element::i64); diff --git a/src/core/include/openvino/op/softmax.hpp b/src/core/include/openvino/op/softmax.hpp index fc9c414df5af02..8a43c6dae7bdef 100644 --- a/src/core/include/openvino/op/softmax.hpp +++ b/src/core/include/openvino/op/softmax.hpp @@ -14,7 +14,7 @@ namespace v1 { /// \ingroup ov_ops_cpp_api class OPENVINO_API Softmax : public Op { public: - OPENVINO_OP("Softmax", "opset1", op::Op, 1); + OPENVINO_OP("Softmax", "opset1", op::Op); Softmax() = default; /// \brief Constructs a softmax operation. diff --git a/src/core/include/openvino/op/softplus.hpp b/src/core/include/openvino/op/softplus.hpp index d3358268ac326c..aaff04caa53471 100644 --- a/src/core/include/openvino/op/softplus.hpp +++ b/src/core/include/openvino/op/softplus.hpp @@ -15,7 +15,7 @@ namespace v4 { /// \ingroup ov_ops_cpp_api class OPENVINO_API SoftPlus : public util::UnaryElementwiseArithmetic { public: - OPENVINO_OP("SoftPlus", "opset4", util::UnaryElementwiseArithmetic, 4); + OPENVINO_OP("SoftPlus", "opset4", util::UnaryElementwiseArithmetic); SoftPlus() = default; /// \brief Constructs an SoftPlus operation. diff --git a/src/core/include/openvino/op/space_to_batch.hpp b/src/core/include/openvino/op/space_to_batch.hpp index ceaac33345b2c3..83d47b96ba4c45 100644 --- a/src/core/include/openvino/op/space_to_batch.hpp +++ b/src/core/include/openvino/op/space_to_batch.hpp @@ -23,7 +23,7 @@ namespace v1 { /// \ingroup ov_ops_cpp_api class OPENVINO_API SpaceToBatch : public Op { public: - OPENVINO_OP("SpaceToBatch", "opset2", op::Op, 1); + OPENVINO_OP("SpaceToBatch", "opset2", op::Op); SpaceToBatch() = default; diff --git a/src/core/include/openvino/op/split.hpp b/src/core/include/openvino/op/split.hpp index 6a2b70434af549..918457c0d84a05 100644 --- a/src/core/include/openvino/op/split.hpp +++ b/src/core/include/openvino/op/split.hpp @@ -17,7 +17,7 @@ namespace v1 { /// \ingroup ov_ops_cpp_api class OPENVINO_API Split : public Op { public: - OPENVINO_OP("Split", "opset1", op::Op, 1); + OPENVINO_OP("Split", "opset1", op::Op); /// \brief Constructs a split operation. Split() = default; diff --git a/src/core/include/openvino/op/strided_slice.hpp b/src/core/include/openvino/op/strided_slice.hpp index f52b095424c196..5535e2925800c9 100644 --- a/src/core/include/openvino/op/strided_slice.hpp +++ b/src/core/include/openvino/op/strided_slice.hpp @@ -18,7 +18,7 @@ namespace v1 { /// \ingroup ov_ops_cpp_api class OPENVINO_API StridedSlice : public Op { public: - OPENVINO_OP("StridedSlice", "opset1", op::Op, 1); + OPENVINO_OP("StridedSlice", "opset1", op::Op); StridedSlice() = default; diff --git a/src/core/include/openvino/op/subtract.hpp b/src/core/include/openvino/op/subtract.hpp index 3c129cb1f9b233..5fd58da3bd6ff5 100644 --- a/src/core/include/openvino/op/subtract.hpp +++ b/src/core/include/openvino/op/subtract.hpp @@ -13,7 +13,7 @@ namespace v1 { /// \ingroup ov_ops_cpp_api class OPENVINO_API Subtract : public util::BinaryElementwiseArithmetic { public: - OPENVINO_OP("Subtract", "opset1", util::BinaryElementwiseArithmetic, 1); + OPENVINO_OP("Subtract", "opset1", util::BinaryElementwiseArithmetic); Subtract() : util::BinaryElementwiseArithmetic(AutoBroadcastType::NUMPY) {} diff --git a/src/core/include/openvino/op/swish.hpp b/src/core/include/openvino/op/swish.hpp index 1395e62e409522..bc9935d7f3e88f 100644 --- a/src/core/include/openvino/op/swish.hpp +++ b/src/core/include/openvino/op/swish.hpp @@ -16,7 +16,7 @@ namespace v4 { /// \ingroup ov_ops_cpp_api class OPENVINO_API Swish : public Op { public: - OPENVINO_OP("Swish", "opset4", op::Op, 4); + OPENVINO_OP("Swish", "opset4", op::Op); Swish() = default; /// \brief Constructs an Swish operation. diff --git a/src/core/include/openvino/op/topk.hpp b/src/core/include/openvino/op/topk.hpp index 2af15c7baefb92..9c2ec7a9ce1492 100644 --- a/src/core/include/openvino/op/topk.hpp +++ b/src/core/include/openvino/op/topk.hpp @@ -18,7 +18,7 @@ namespace v1 { /// \ingroup ov_ops_cpp_api class OPENVINO_API TopK : public util::TopKBase { public: - OPENVINO_OP("TopK", "opset1", op::util::TopKBase, 1); + OPENVINO_OP("TopK", "opset1", op::util::TopKBase); using SortType = TopKSortType; using Mode = TopKMode; @@ -69,7 +69,7 @@ namespace v3 { /// \ingroup ov_ops_cpp_api class OPENVINO_API TopK : public util::TopKBase { public: - OPENVINO_OP("TopK", "opset3", op::util::TopKBase, 3); + OPENVINO_OP("TopK", "opset3", op::util::TopKBase); /// \brief Constructs a TopK operation TopK() = default; /// \brief Constructs a TopK operation with two outputs: values and indices. @@ -111,7 +111,7 @@ namespace v11 { /// \ingroup ov_ops_cpp_api class OPENVINO_API TopK : public util::TopKBase { public: - OPENVINO_OP("TopK", "opset11", op::util::TopKBase, 11); + OPENVINO_OP("TopK", "opset11", op::util::TopKBase); /// \brief Constructs a TopK operation TopK() = default; /// \brief Constructs a TopK operation with two outputs: values and indices. diff --git a/src/core/include/openvino/op/transpose.hpp b/src/core/include/openvino/op/transpose.hpp index 341906128d3273..2b4af853893270 100644 --- a/src/core/include/openvino/op/transpose.hpp +++ b/src/core/include/openvino/op/transpose.hpp @@ -13,7 +13,7 @@ namespace v1 { /// \ingroup ov_ops_cpp_api class OPENVINO_API Transpose : public Op { public: - OPENVINO_OP("Transpose", "opset1", op::Op, 1); + OPENVINO_OP("Transpose", "opset1", op::Op); Transpose() = default; /// diff --git a/src/core/include/openvino/op/util/variable_value.hpp b/src/core/include/openvino/op/util/variable_value.hpp index a62f9265f280fe..f4a1f5fb972db7 100644 --- a/src/core/include/openvino/op/util/variable_value.hpp +++ b/src/core/include/openvino/op/util/variable_value.hpp @@ -8,6 +8,7 @@ #include "ngraph/runtime/host_tensor.hpp" #include "openvino/core/core_visibility.hpp" +#include "openvino/core/deprecated.hpp" namespace ov { namespace op { @@ -18,42 +19,69 @@ class OPENVINO_API VariableValue { public: using Ptr = std::shared_ptr; /// \brief Constructs an uninitialized VariableValue. - VariableValue() = default; + VariableValue(); /// \brief Constructor for VariableValue. + /// \deprecated This method is deprecated and will be removed in 2024.0 release. Please use method with ov::Tensor + /// instead /// \param value The data for Variable. - explicit VariableValue(ngraph::HostTensorPtr value) : m_value(std::move(value)) {} + OPENVINO_DEPRECATED( + "This method is deprecated and will be removed in 2024.0 release. Please use method with ov::Tensor instead.") + explicit VariableValue(ngraph::HostTensorPtr value); /// \brief Constructor for VariableValue. + /// \deprecated This method is deprecated and will be removed in 2024.0 release. Please use method with ov::Tensor + /// instead /// \param value Data for Variable. /// \param reset The current state of the reset flag. - VariableValue(ngraph::HostTensorPtr value, bool reset) : m_reset(reset), m_value(std::move(value)) {} + OPENVINO_DEPRECATED( + "This method is deprecated and will be removed in 2024.0 release. Please use method with ov::Tensor instead.") + VariableValue(ngraph::HostTensorPtr value, bool reset); + + /// \brief Returns the current stored data. + /// \deprecated This method is deprecated and will be removed in 2024.0 release. Please use method with ov::Tensor + /// instead + OPENVINO_DEPRECATED("This method is deprecated and will be removed in 2024.0 release. Please get_state() instead.") + ngraph::HostTensorPtr get_value() const; + + /// \brief Sets new values for Variable. + /// \deprecated This method is deprecated and will be removed in 2024.0 release. Please use method with ov::Tensor + /// instead + /// \param value New data for Variable. + OPENVINO_DEPRECATED( + "This method is deprecated and will be removed in 2024.0 release. Please use set_state() instead.") + void set_value(const ngraph::HostTensorPtr& value); /// \brief Sets the reset flag to a new state. /// \param reset The new state of the reset flag. - void set_reset(bool reset) { - m_reset = reset; - } + void set_reset(bool reset); /// \brief Returns the current reset flag state. - bool get_reset() const { - return m_reset; - } + bool get_reset() const; + + explicit VariableValue(const ov::Tensor& value); + + /// \brief Constructor for VariableValue. + /// \deprecated This method is deprecated and will be removed in 2024.0 release. Please use method with ov::Tensor + /// instead + /// \param value Data for Variable. + /// \param reset The current state of the reset flag. + VariableValue(const ov::Tensor& value, bool reset); /// \brief Returns the current stored data. - const ngraph::HostTensorPtr& get_value() const { - return m_value; - } + /// \deprecated This method is deprecated and will be removed in 2024.0 release. Please use method with ov::Tensor + /// instead + const ov::Tensor& get_state() const; /// \brief Sets new values for Variable. + /// \deprecated This method is deprecated and will be removed in 2024.0 release. Please use method with ov::Tensor + /// instead /// \param value New data for Variable. - void set_value(const ngraph::HostTensorPtr& value) { - m_value = value; - } + void set_state(const ov::Tensor& value); private: bool m_reset = true; - ngraph::HostTensorPtr m_value; + ov::Tensor m_value; }; } // namespace util } // namespace op diff --git a/src/core/include/openvino/op/variadic_split.hpp b/src/core/include/openvino/op/variadic_split.hpp index 8c5034cf031cc9..2d6f751d48d3ba 100644 --- a/src/core/include/openvino/op/variadic_split.hpp +++ b/src/core/include/openvino/op/variadic_split.hpp @@ -14,7 +14,7 @@ namespace v1 { /// \ingroup ov_ops_cpp_api class OPENVINO_API VariadicSplit : public Op { public: - OPENVINO_OP("VariadicSplit", "opset1", op::Op, 1); + OPENVINO_OP("VariadicSplit", "opset1", op::Op); /// \brief Constructs a variadic split operation. VariadicSplit() = default; diff --git a/src/core/include/openvino/runtime/tensor.hpp b/src/core/include/openvino/runtime/tensor.hpp index 66526a9b04767f..a43f99fe3a655f 100644 --- a/src/core/include/openvino/runtime/tensor.hpp +++ b/src/core/include/openvino/runtime/tensor.hpp @@ -35,6 +35,12 @@ class IVariableStateInternalWrapper; class ITensor; class RemoteTensor; +namespace op { +namespace util { +class VariableValue; +} +} // namespace op + /** * @brief Tensor API holding host memory * It can throw exceptions safely for the application, where it is properly handled. @@ -64,6 +70,7 @@ class OPENVINO_API Tensor { friend class ov::IVariableStateInternalWrapper; friend class InferenceEngine::IAsyncInferRequestWrapper; friend class InferenceEngine::IVariableStateWrapper; + friend class ov::op::util::VariableValue; public: /// @brief Default constructor diff --git a/src/core/shape_inference/include/roi_pooling_shape_inference.hpp b/src/core/shape_inference/include/roi_pooling_shape_inference.hpp new file mode 100644 index 00000000000000..1568ce3cbe960c --- /dev/null +++ b/src/core/shape_inference/include/roi_pooling_shape_inference.hpp @@ -0,0 +1,107 @@ +// Copyright (C) 2018-2023 Intel Corporation +// SPDX-License-Identifier: Apache-2.0 +// + +#pragma once + +#include + +#include "compare.hpp" +#include "dimension_util.hpp" +#include "openvino/op/roi_pooling.hpp" + +namespace ov { +namespace op { +namespace pooling { +namespace validate { +template +void rois_input_shape(const TROIPooling* op, const TShape rois_shape) { + if (rois_shape.rank().is_static()) { + NODE_VALIDATION_CHECK(op, + rois_shape.size() == 2, + "Expected a 2D tensor for the ROIs input with box coordinates. Got: ", + rois_shape); + + NODE_VALIDATION_CHECK(op, + rois_shape[1].compatible(5), + "The second dimension of ROIs input should contain batch id and box coordinates. ", + "This dimension is expected to be equal to 5. Got: ", + rois_shape[1]); + } +} + +template +void output_roi_attr(const TROIPooling* op) { + const auto& out_roi = op->get_output_roi(); + + NODE_VALIDATION_CHECK(op, + out_roi.size() == 2, + "The dimension of pooled size is expected to be equal to 2. Got: ", + out_roi.size()); + + NODE_VALIDATION_CHECK(op, + std::none_of(out_roi.cbegin(), out_roi.cend(), cmp::Less(1)), + "Pooled size attributes pooled_h and pooled_w should should be positive integers. Got: ", + out_roi[0], + " and: ", + out_roi[1], + "respectively"); +} + +template +void scale_attr(const TROIPooling* op) { + const auto scale = op->get_spatial_scale(); + NODE_VALIDATION_CHECK(op, + std::isnormal(scale) && !std::signbit(scale), + "The spatial scale attribute should be a positive floating point number. Got: ", + scale); +} + +template +void method_attr(const TROIPooling* op) { + const auto& method = op->get_method(); + NODE_VALIDATION_CHECK(op, + method == "max" || method == "bilinear", + "Pooling method attribute should be either \'max\' or \'bilinear\'. Got: ", + method); +} +} // namespace validate +} // namespace pooling + +namespace v0 { +template +std::vector shape_infer(const ROIPooling* op, const std::vector& input_shapes) { + NODE_VALIDATION_CHECK(op, input_shapes.size() == 2); + using namespace ov::util; + + const auto& feat_shape = input_shapes[0]; + const auto& rois_shape = input_shapes[1]; + const auto& feat_rank = feat_shape.rank(); + + NODE_VALIDATION_CHECK(op, + feat_rank.compatible(4), + "Expected a 4D tensor for the feature maps input. Got: ", + feat_shape); + + pooling::validate::rois_input_shape(op, rois_shape); + pooling::validate::output_roi_attr(op); + pooling::validate::scale_attr(op); + pooling::validate::method_attr(op); + + TShape out_shape; + out_shape.reserve(4); + + out_shape.emplace_back(rois_shape.rank().is_static() ? rois_shape[0] : dim::inf_bound); + out_shape.emplace_back(feat_rank.is_static() ? feat_shape[1] : dim::inf_bound); + std::copy(op->get_output_roi().cbegin(), op->get_output_roi().cend(), std::back_inserter(out_shape)); + + return {out_shape}; +} + +template +void shape_infer(const ROIPooling* op, const std::vector& input_shapes, std::vector& output_shapes) { + output_shapes = shape_infer(op, input_shapes); +} +} // namespace v0 +} // namespace op +} // namespace ov diff --git a/src/core/src/any.cpp b/src/core/src/any.cpp index 21296939b0198f..d1ecbd0716b27e 100644 --- a/src/core/src/any.cpp +++ b/src/core/src/any.cpp @@ -216,6 +216,39 @@ void Read>::operator()( Read{}(is, std::get<2>(tuple)); } +void Read::operator()(std::istream& is, AnyMap& map) const { + std::string key, value; + char c; + + is >> c; + OPENVINO_ASSERT(c == '{', "Failed to parse ov::AnyMap. Starting symbols is not '{', it's ", c); + + while (c != '}') { + std::getline(is, key, ':'); + size_t enclosed_container_level = 0; + + while (is.good()) { + is >> c; + if (c == ',') { // delimiter between map's pairs + if (enclosed_container_level == 0) // we should interrupt after delimiter + break; + } + if (c == '{' || c == '[') // case of enclosed maps / arrays + ++enclosed_container_level; + if (c == '}' || c == ']') { + if (enclosed_container_level == 0) + break; // end of map + --enclosed_container_level; + } + + value += c; // accumulate current value + } + map.emplace(std::move(key), std::move(value)); + } + + OPENVINO_ASSERT(c == '}', "Failed to parse ov::AnyMap. Ending symbols is not '}', it's ", c); +} + void Read>::operator()(std::istream& is, std::tuple& tuple) const { Read{}(is, std::get<0>(tuple)); diff --git a/src/core/src/bound_evaluate.cpp b/src/core/src/bound_evaluate.cpp index 930d71b80cefee..1aea44f9fa7ee2 100644 --- a/src/core/src/bound_evaluate.cpp +++ b/src/core/src/bound_evaluate.cpp @@ -179,14 +179,26 @@ ov::Tensor or_tensor(const ov::Tensor& lhs, const ov::Tensor& rhs) { } struct TensorVectorCmp { + // Comparing Tensor vectors as numbers composed with pointers as digits. + // Indexed loop used to preserve order of comparison. bool operator()(const ov::TensorVector& lhs, const ov::TensorVector& rhs) const { - auto rhs_it = rhs.begin(); - return std::any_of(lhs.begin(), lhs.end(), [&rhs_it](const ov::Tensor& lhs) { - bool is_less = - (lhs && *rhs_it) ? lhs.data() < rhs_it->data() : static_cast(lhs) < static_cast(*rhs_it); - ++rhs_it; - return is_less; - }); + const auto lhs_size = lhs.size(); + const auto rhs_size = rhs.size(); + + if (lhs_size < rhs_size) + return true; + if (lhs_size > rhs_size) + return false; + + for (size_t i = 0; i < lhs_size; ++i) { + if (lhs[i].data() < rhs[i].data()) + return true; + if (lhs[i].data() > rhs[i].data()) + return false; + } + + // if all equals + return false; } }; @@ -281,17 +293,14 @@ bool ov::interval_bound_evaluator(const Node* node, auto low_1 = ov::evaluate_lower_bound(node->get_input_source_output(1)); auto up_0 = ov::evaluate_upper_bound(node->get_input_source_output(0)); auto up_1 = ov::evaluate_upper_bound(node->get_input_source_output(1)); + if (!low_0 || !low_1 || !up_0 || !up_1) + return false; std::set input_variants = {{low_0, low_1}, {low_0, up_1}, {up_0, low_1}, {up_0, up_1}}; - for (const auto& variant_of_input_vector : input_variants) - for (const auto& input_tensor : variant_of_input_vector) - if (!input_tensor) - return false; - if (input_variants.size() == 1) return node->evaluate(upper_output_values, *input_variants.begin()) && node->evaluate(lower_output_values, *input_variants.begin()); diff --git a/src/core/src/descriptor/tensor.cpp b/src/core/src/descriptor/tensor.cpp index 5a7a4c24a69f34..ed87ce606bf21a 100644 --- a/src/core/src/descriptor/tensor.cpp +++ b/src/core/src/descriptor/tensor.cpp @@ -100,10 +100,8 @@ const ov::Shape& ov::descriptor::Tensor::get_shape() const { size_t ov::descriptor::Tensor::size() const { const bool bitwidth_less_than_byte = m_element_type.bitwidth() < 8; - if (bitwidth_less_than_byte) { - return static_cast(ceil((1.0 * shape_size(get_shape()) * m_element_type.bitwidth()) / 8)); - } - return shape_size(get_shape()) * m_element_type.size(); + return bitwidth_less_than_byte ? (shape_size(get_shape()) * m_element_type.bitwidth() + 7) >> 3 + : (shape_size(get_shape()) * m_element_type.size()); } const std::unordered_set& ov::descriptor::Tensor::get_names() const { diff --git a/src/core/src/model.cpp b/src/core/src/model.cpp index df540d81f5554b..142514be45384b 100644 --- a/src/core/src/model.cpp +++ b/src/core/src/model.cpp @@ -955,7 +955,7 @@ bool ov::Model::has_rt_info(const std::vector& args) const { return false; if (i == args.size() - 1) break; - const ov::Any& rt_attr = get_rt_arg(info, args[i]); + const ov::Any rt_attr = get_rt_arg(info, args[i]); info = get_map_from_attr(rt_attr); } return true; diff --git a/src/core/src/node.cpp b/src/core/src/node.cpp index 70b18e710f46e3..2a9baaeb90d144 100644 --- a/src/core/src/node.cpp +++ b/src/core/src/node.cpp @@ -385,9 +385,13 @@ std::ostream& ov::Node::write_description(std::ostream& out, uint32_t depth) con if (depth == 0) { out << get_friendly_name(); } else { - OPENVINO_SUPPRESS_DEPRECATED_START - out << "v" << get_type_info().version << "::" << get_type_info().name << " " << get_friendly_name() << " ("; - OPENVINO_SUPPRESS_DEPRECATED_END + auto version = get_type_info().version_id; + if (version) + out << "v" << version << "::" << get_type_info().name << " " << get_friendly_name() << " ("; + else + out << "v" + << " " + << "::" << get_type_info().name << " " << get_friendly_name() << " ("; string sep = ""; for (const auto& arg : input_values()) { out << sep << arg; diff --git a/src/core/src/op/assign.cpp b/src/core/src/op/assign.cpp index dbf7b1f68aeef0..5ab1bc6e4682da 100644 --- a/src/core/src/op/assign.cpp +++ b/src/core/src/op/assign.cpp @@ -96,6 +96,7 @@ bool op::v6::Assign::evaluate(const HostTensorVector& outputs, const auto& variable_values = variable_context.get_variable_values(); + OPENVINO_SUPPRESS_DEPRECATED_START // automatically allocate memory if not provided by user if (variable_values.find(m_variable) == variable_values.end()) { auto host_tensor = @@ -106,6 +107,7 @@ bool op::v6::Assign::evaluate(const HostTensorVector& outputs, const auto var_value = variable_values.find(m_variable)->second; var_value->set_reset(false); const auto& buffer = var_value->get_value(); + OPENVINO_SUPPRESS_DEPRECATED_END buffer->set_unary(inputs[0]); outputs[0]->set_unary(inputs[0]); diff --git a/src/core/src/op/interpolate.cpp b/src/core/src/op/interpolate.cpp index 6bfd961fc35de8..b34d39bc60ec63 100644 --- a/src/core/src/op/interpolate.cpp +++ b/src/core/src/op/interpolate.cpp @@ -186,6 +186,21 @@ void ov::op::v4::Interpolate::validate_and_infer_types() { input_shapes = {input_shape, target_spatial_shape, scales, axes}; } + const auto interpolation_mode_check = [](const op::util::InterpolateBase::InterpolateMode mode) { + constexpr std::array allowed_modes = { + op::util::InterpolateBase::InterpolateMode::NEAREST, + op::util::InterpolateBase::InterpolateMode::LINEAR, + op::util::InterpolateBase::InterpolateMode::LINEAR_ONNX, + op::util::InterpolateBase::InterpolateMode::CUBIC}; + + return std::find(std::begin(allowed_modes), std::end(allowed_modes), mode) != std::end(allowed_modes); + }; + + NODE_VALIDATION_CHECK(this, + interpolation_mode_check(m_attrs.mode), + "Unsupported interpolation mode used with version 4 of the Interpolate op: ", + as_string(m_attrs.mode)); + util::correct_pads_attr(this, m_attrs.pads_begin, m_attrs.pads_end, input_shapes); shape_infer(this, m_attrs.pads_begin, m_attrs.pads_end, input_shapes, output_shapes, {}); set_output_type(0, get_input_element_type(0), output_shapes[0]); diff --git a/src/core/src/op/read_value.cpp b/src/core/src/op/read_value.cpp index ce2e155a87f7d2..84940c0c4cf150 100644 --- a/src/core/src/op/read_value.cpp +++ b/src/core/src/op/read_value.cpp @@ -108,7 +108,9 @@ bool op::v6::ReadValue::evaluate(const HostTensorVector& outputs, // initial value (inputs[0]) is not supported, use zeros auto zero_const = make_shared(inputs[0]->get_element_type(), inputs[0]->get_shape(), 0); auto zero_tensor = make_shared(zero_const); + OPENVINO_SUPPRESS_DEPRECATED_START const auto& input_tensor = use_context ? var_value->second->get_value() : zero_tensor; + OPENVINO_SUPPRESS_DEPRECATED_END outputs[0]->set_unary(input_tensor); void* input = input_tensor->get_data_ptr(); diff --git a/src/core/src/op/roi_pooling.cpp b/src/core/src/op/roi_pooling.cpp index d0baa803933db5..00ee8dacf46447 100644 --- a/src/core/src/op/roi_pooling.cpp +++ b/src/core/src/op/roi_pooling.cpp @@ -2,18 +2,22 @@ // SPDX-License-Identifier: Apache-2.0 // -#include "ngraph/op/roi_pooling.hpp" +#include "openvino/op/roi_pooling.hpp" #include "itt.hpp" +#include "openvino/core/validation_util.hpp" +#include "roi_pooling_shape_inference.hpp" using namespace std; -using namespace ngraph; -op::ROIPooling::ROIPooling(const Output& input, - const Output& coords, - const ov::Shape& output_size, - const float spatial_scale, - const string& method) +namespace ov { +namespace op { +namespace v0 { +ROIPooling::ROIPooling(const Output& input, + const Output& coords, + const ov::Shape& output_size, + const float spatial_scale, + const string& method) : Op({input, coords}), m_output_size(output_size), m_spatial_scale(spatial_scale), @@ -21,10 +25,10 @@ op::ROIPooling::ROIPooling(const Output& input, constructor_validate_and_infer_types(); } -void op::ROIPooling::validate_and_infer_types() { +void ROIPooling::validate_and_infer_types() { OV_OP_SCOPE(v0_ROIPooling_validate_and_infer_types); - auto feat_maps_et = get_input_element_type(0); - auto coords_et = get_input_element_type(1); + const auto& feat_maps_et = get_input_element_type(0); + const auto& coords_et = get_input_element_type(1); NODE_VALIDATION_CHECK(this, feat_maps_et.is_real() && coords_et.is_real(), "The data type for input and ROIs is expected to be a floating point type. Got: ", @@ -34,72 +38,16 @@ void op::ROIPooling::validate_and_infer_types() { NODE_VALIDATION_CHECK(this, feat_maps_et == coords_et, - "Type of feature maps (inputs) and rois is expected to be the same. Got: ", + "Type of feature maps (inputs) and ROIs is expected to be the same. Got: ", feat_maps_et, " and: ", coords_et); - NODE_VALIDATION_CHECK(this, - m_output_size.size() == 2, - "The dimension of pooled size is expected to be equal to 2. Got: ", - m_output_size.size()); - - NODE_VALIDATION_CHECK(this, - m_output_size[0] > 0 && m_output_size[1] > 0, - "Pooled size attributes pooled_h and pooled_w should should be " - "non-negative integers. Got: ", - m_output_size[0], - " and: ", - m_output_size[1], - "respectively"); - - NODE_VALIDATION_CHECK(this, - m_spatial_scale > 0, - "The spatial scale attribute should be a positive floating point number. Got: ", - m_spatial_scale); - - NODE_VALIDATION_CHECK(this, - m_method == "max" || m_method == "bilinear", - "Pooling method attribute should be either \'max\' or \'bilinear\'. Got: ", - m_method); + const auto output_shapes = shape_infer(this, get_node_input_partial_shapes(*this)); + set_output_type(0, feat_maps_et, output_shapes[0]); const auto& feat_maps_ps = get_input_partial_shape(0); - NODE_VALIDATION_CHECK(this, - feat_maps_ps.rank().compatible(4), - "Expected a 4D tensor for the feature maps input. Got: ", - feat_maps_ps); - const auto& coords_ps = get_input_partial_shape(1); - NODE_VALIDATION_CHECK(this, - coords_ps.rank().compatible(2), - "Expected a 2D tensor for the ROIs input with box coordinates. Got: ", - coords_ps); - - if (coords_ps.rank().is_static()) { - const auto coords_second_dim = coords_ps[1]; - NODE_VALIDATION_CHECK(this, - coords_second_dim.compatible(5), - "The second dimension of ROIs input should contain batch id and box coordinates. ", - "This dimension is expected to be equal to 5. Got: ", - coords_second_dim); - } - - // output shape should be {NUM_ROIS, C, pooled_h, pooled_w} - auto output_shape = ov::PartialShape{{Dimension::dynamic(), - Dimension::dynamic(), - Dimension{static_cast(m_output_size[0])}, - Dimension{static_cast(m_output_size[1])}}}; - - if (coords_ps.rank().is_static()) { - output_shape[0] = coords_ps[0]; - } - - if (feat_maps_ps.rank().is_static()) { - output_shape[1] = feat_maps_ps[1]; - } - - set_output_size(1); - set_output_type(0, feat_maps_et, output_shape); // if channel dimension, C, not known // feature maps input is used by shape specialization pass @@ -114,13 +62,13 @@ void op::ROIPooling::validate_and_infer_types() { } } -shared_ptr op::ROIPooling::clone_with_new_inputs(const OutputVector& new_args) const { +shared_ptr ROIPooling::clone_with_new_inputs(const OutputVector& new_args) const { OV_OP_SCOPE(v0_ROIPooling_clone_with_new_inputs); check_new_args_count(this, new_args); return make_shared(new_args.at(0), new_args.at(1), m_output_size, m_spatial_scale, m_method); } -bool op::ROIPooling::visit_attributes(AttributeVisitor& visitor) { +bool ROIPooling::visit_attributes(AttributeVisitor& visitor) { OV_OP_SCOPE(v0_ROIPooling_visit_attributes); visitor.on_attribute("output_size", m_output_size); visitor.on_attribute("pooled_h", m_output_size[0]); @@ -129,3 +77,21 @@ bool op::ROIPooling::visit_attributes(AttributeVisitor& visitor) { visitor.on_attribute("method", m_method); return true; } + +void ROIPooling::set_output_roi(Shape output_size) { + m_output_size = std::move(output_size); +} +const Shape& ROIPooling::get_output_roi() const { + return m_output_size; +} + +void ROIPooling::set_spatial_scale(float scale) { + m_spatial_scale = scale; +} + +void ROIPooling::set_method(std::string method_name) { + m_method = std::move(method_name); +} +} // namespace v0 +} // namespace op +} // namespace ov diff --git a/src/core/src/op/util/variable_value.cpp b/src/core/src/op/util/variable_value.cpp new file mode 100644 index 00000000000000..b126c7d9cb2d50 --- /dev/null +++ b/src/core/src/op/util/variable_value.cpp @@ -0,0 +1,143 @@ +// Copyright (C) 2018-2023 Intel Corporation +// SPDX-License-Identifier: Apache-2.0 +// + +#include "openvino/op/util/variable_value.hpp" + +#include + +#include "ngraph/node.hpp" +#include "ngraph/runtime/host_tensor.hpp" +#include "openvino/core/deprecated.hpp" +#include "openvino/core/shape.hpp" +#include "openvino/runtime/allocator.hpp" +#include "openvino/runtime/itensor.hpp" +#include "openvino/runtime/tensor.hpp" +#include "shape_util.hpp" + +namespace { + +class TensorWrapper : public ngraph::runtime::HostTensor { +public: + TensorWrapper(const ov::Tensor& tensor) + : ngraph::runtime::HostTensor(tensor.get_element_type(), tensor.get_shape(), tensor.data()), + tensor(tensor) {} + + ov::Tensor tensor; +}; + +/** + * @brief Tensor what contains HostTensorPtr inside + */ +class HostTensorWrapper : public ov::ITensor { +public: + ngraph::HostTensorPtr tensor; + + HostTensorWrapper(const ngraph::HostTensorPtr& tensor) : tensor{tensor}, m_type(tensor->get_element_type()) { + const auto& p_shape = tensor->get_partial_shape(); + if (p_shape.is_static()) { + m_shape = p_shape.to_shape(); + } else { + OPENVINO_SUPPRESS_DEPRECATED_START + m_shape = ov::util::make_dynamic_shape(); + OPENVINO_SUPPRESS_DEPRECATED_END + } + update_strides(); + } + + const ov::element::Type& get_element_type() const override { + return m_type; + } + + void set_shape(ov::Shape shape) override { + tensor->set_shape(shape); + m_shape = shape; + update_strides(); + } + + const ov::Shape& get_shape() const override { + return m_shape; + } + + const ov::Strides& get_strides() const override { + OPENVINO_ASSERT(get_element_type().bitwidth() >= 8, + "Could not get strides for types with bitwidths less then 8 bit. Tensor type: ", + get_element_type()); + return m_strides; + } + + size_t get_size() const override { + return ov::shape_size(m_shape); + } + + size_t get_byte_size() const override { + return get_size() * m_type.size(); + } + + void* data(const ov::element::Type& element_type) const override { + return tensor->get_data_ptr(); + } + +private: + ov::element::Type m_type; + ov::Shape m_shape; + ov::Strides m_strides; + + void update_strides() { + if (m_type.bitwidth() >= 8) { + m_strides.clear(); + m_strides.resize(m_shape.size()); + auto size = m_strides.size(); + for (size_t i = 0; i < size; i++) { + size_t value(m_type.size()); + size_t dim(m_shape[size - 1 - i]); + if (i) { + value = m_strides[size - i] * dim; + } + m_strides[size - i - 1] = value; + } + } + } +}; +} // namespace + +ov::op::util::VariableValue::VariableValue() = default; + +OPENVINO_SUPPRESS_DEPRECATED_START +ov::op::util::VariableValue::VariableValue(ngraph::HostTensorPtr value) + : m_value(ov::Tensor{std::make_shared(value), {}}) {} + +ov::op::util::VariableValue::VariableValue(ngraph::HostTensorPtr value, bool reset) + : m_reset(reset), + m_value(ov::Tensor{std::make_shared(value), {}}) {} + +ngraph::HostTensorPtr ov::op::util::VariableValue::get_value() const { + if (auto wrapper = std::dynamic_pointer_cast(m_value._impl)) + return wrapper->tensor; + return std::make_shared(m_value); +} + +void ov::op::util::VariableValue::set_value(const ngraph::HostTensorPtr& value) { + m_value = ov::Tensor{std::make_shared(value), {}}; +} +OPENVINO_SUPPRESS_DEPRECATED_END + +void ov::op::util::VariableValue::set_reset(bool reset) { + m_reset = reset; +} + +bool ov::op::util::VariableValue::get_reset() const { + return m_reset; +} + +ov::op::util::VariableValue::VariableValue(const ov::Tensor& value) : m_value(value) {} + +ov::op::util::VariableValue::VariableValue(const ov::Tensor& value, bool reset) : m_reset(reset), m_value(value) {} + +const ov::Tensor& ov::op::util::VariableValue::get_state() const { + return m_value; +} + +void ov::op::util::VariableValue::set_state(const ov::Tensor& value) { + m_value = value; +} diff --git a/src/core/src/pass/low_latency.cpp b/src/core/src/pass/low_latency.cpp index 455adc024cd382..5364b08fd8a88e 100644 --- a/src/core/src/pass/low_latency.cpp +++ b/src/core/src/pass/low_latency.cpp @@ -18,7 +18,7 @@ #include NGRAPH_SUPPRESS_DEPRECATED_START -NGRAPH_RTTI_DEFINITION(ngraph::pass::LowLatency, "LowLatency", 0); +NGRAPH_RTTI_DEFINITION(ngraph::pass::LowLatency, "LowLatency"); using namespace std; diff --git a/src/core/src/pass/pass.cpp b/src/core/src/pass/pass.cpp index adde933e3c5df8..6b246959ac3f13 100644 --- a/src/core/src/pass/pass.cpp +++ b/src/core/src/pass/pass.cpp @@ -51,29 +51,12 @@ void ov::pass::PassBase::set_callback(const param_callback& callback) { m_pass_config->set_callback(callback); } -namespace { -class RunLocker { -public: - RunLocker(bool& flag) : m_flag(flag) { - OPENVINO_ASSERT(m_flag == false, - "Cycle detected. run_on_model() or run_on_function() method should be overridden."); - m_flag = true; - } - ~RunLocker() { - m_flag = false; - } - -private: - bool& m_flag; -}; -} // namespace - // The symbols are requiered to be in cpp file to workaround RTTI issue on Android LLVM ov::pass::ModelPass::~ModelPass() = default; OPENVINO_SUPPRESS_DEPRECATED_START -NGRAPH_RTTI_DEFINITION(ngraph::pass::NodePass, "ngraph::pass::NodePass", 0); +NGRAPH_RTTI_DEFINITION(ngraph::pass::NodePass, "ngraph::pass::NodePass"); ngraph::pass::NodePass::~NodePass() = default; diff --git a/src/core/src/pass/serialize.cpp b/src/core/src/pass/serialize.cpp index 7103b75b71068e..dc7f634488bf5d 100644 --- a/src/core/src/pass/serialize.cpp +++ b/src/core/src/pass/serialize.cpp @@ -255,11 +255,6 @@ class XmlSerializer : public ov::AttributeVisitor { } } - if (ir_version < 11) { - // ops for serialized body function are provided in reversed order - std::reverse(output.begin(), output.end()); - } - return output; } @@ -836,7 +831,10 @@ void ngfunction_2_ir(pugi::xml_node& netXml, const bool exec_graph = is_exec_graph(model); auto sorted_ops = model.get_ordered_ops(); - if (version >= 11) { + + // get_ordered_ops() returns operations after a topological sort. The topological sort reverses order of Parameters + // and Results. So we need to put them into sorted_ops separately to ensure correct order of inputs and outputs. + { std::vector> result; result.reserve(sorted_ops.size()); for (const auto& param : model.get_parameters()) { @@ -1004,11 +1002,9 @@ void ngfunction_2_ir(pugi::xml_node& netXml, // WA for LSTMCellv0, peephole input shall not be serialized if (e.to_port == 6) { const auto& type_info = ordered_ops[e.to_layer]->get_type_info(); - OPENVINO_SUPPRESS_DEPRECATED_START - if (!strcmp(type_info.name, "LSTMCell") && type_info.version == 0) { + if (!strcmp(type_info.name, "LSTMCell")) { continue; } - OPENVINO_SUPPRESS_DEPRECATED_END } pugi::xml_node edge = edges.append_child("edge"); edge.append_attribute("from-layer").set_value(e.from_layer); diff --git a/src/core/src/pass/visualize_tree.cpp b/src/core/src/pass/visualize_tree.cpp index 70ee298b547e5e..c89decb3f42121 100644 --- a/src/core/src/pass/visualize_tree.cpp +++ b/src/core/src/pass/visualize_tree.cpp @@ -503,7 +503,9 @@ string pass::VisualizeTree::get_node_name(shared_ptr node) { if (node->get_friendly_name() != node->get_name()) { rc += "\\n" + (nvtmn ? string("name: ") : "") + node->get_name(); } - rc += "\\n" + (nvtmn ? string("type_name: ") : "") + std::string(node->get_type_name()); + const auto type_info = node->get_type_info(); + rc += "\\n" + (nvtmn ? string("type_name: ") : "") + std::string(type_info.version_id) + + "::" + std::string(type_info.name); static const bool nvttn = getenv_bool("OV_VISUALIZE_TREE_TENSORS_NAME"); if (nvttn) { diff --git a/src/core/src/runtime/itensor.cpp b/src/core/src/runtime/itensor.cpp index a880eca6a93d81..4800948c1a0f41 100644 --- a/src/core/src/runtime/itensor.cpp +++ b/src/core/src/runtime/itensor.cpp @@ -21,258 +21,4 @@ size_t ITensor::get_byte_size() const { return (get_size() * get_element_type().bitwidth() + 8 - 1) / 8; } -/** - * @brief View tensor to external memory - * The tensor doesn't own the external memory - */ -class ViewTensor : public ITensor { -public: - ViewTensor(const element::Type element_type, const Shape& shape, void* ptr) - : m_element_type{element_type}, - m_shape{shape}, - m_capacity{shape}, - m_ptr{ptr} { - OPENVINO_ASSERT(m_ptr != nullptr); - OPENVINO_ASSERT(m_element_type != element::undefined && m_element_type != element::dynamic); - update_strides(); - } - - void* data(const element::Type& element_type) const override { - if (element_type != element::undefined && element_type != element::dynamic) { - OPENVINO_ASSERT(element_type == get_element_type(), - "Tensor data with element type ", - get_element_type(), - ", is not representable as pointer to ", - element_type); - } - return m_ptr; - } - - const element::Type& get_element_type() const override { - return m_element_type; - } - - const Shape& get_shape() const override { - return m_shape; - } - - void set_shape(ov::Shape new_shape) override { - OPENVINO_ASSERT(shape_size(new_shape) <= ov::shape_size(m_capacity), "Could set new shape: ", new_shape); - m_shape = std::move(new_shape); - update_strides(); - } - - const Strides& get_strides() const override { - OPENVINO_ASSERT(m_element_type.bitwidth() >= 8, - "Could not get strides for types with bitwidths less then 8 bit. Tensor type: ", - m_element_type); - return m_strides; - } - -protected: - void update_strides() { - if (m_element_type.bitwidth() < 8) - return; - auto& shape = get_shape(); - m_strides.clear(); - if (!shape.empty()) { - m_strides.resize(shape.size()); - m_strides.back() = m_element_type.size(); - std::copy(shape.rbegin(), shape.rend() - 1, m_strides.rbegin() + 1); - std::partial_sum(m_strides.rbegin(), m_strides.rend(), m_strides.rbegin(), std::multiplies()); - } - } - - element::Type m_element_type; - Shape m_shape; - Shape m_capacity; - Strides m_strides; - void* m_ptr; -}; - -/** - * @brief View tensor on external memory with strides - */ -class StridedViewTensor : public ViewTensor { -public: - StridedViewTensor(const element::Type element_type, const Shape& shape, void* ptr, const Strides& strides) - : ViewTensor{element_type, shape, ptr} { - OPENVINO_ASSERT( - get_element_type().bitwidth() >= 8, - "Could not create strided access tensor for types with bitwidths less then 8 bit. Tensor type: ", - get_element_type()); - // Save default strides - auto shape_strides = m_strides; - // Change strides - m_strides = strides; - OPENVINO_ASSERT(m_shape.size() == m_strides.size()); - - for (size_t i = 0; i < m_strides.size(); ++i) { - OPENVINO_ASSERT(shape_strides[i] <= m_strides[i], - "shape stride: ", - shape_strides[i], - ", stride: ", - m_strides[i]); - OPENVINO_ASSERT((m_strides[i] % get_element_type().size()) == 0, - "shape stride: ", - shape_strides[i], - ", stride: ", - m_strides[i]); - if (i) { - OPENVINO_ASSERT(m_strides[i - 1] >= m_strides[i] * shape[i], - "Strides: ", - m_strides, - " are incompatible with shapes: ", - m_shape); - } - } - } - - void set_shape(ov::Shape new_shape) override { - OPENVINO_ASSERT(m_capacity.size() == new_shape.size(), - "Cannot set new shape: ", - new_shape, - " for tensor with strides! Shapes are not compatible."); - for (size_t i = 0; i < new_shape.size(); i++) { - OPENVINO_ASSERT(m_capacity[i] >= new_shape[i], - "Cannot set new shape: ", - new_shape, - " for tensor with strides! Dimension: ", - i, - " is not compatible."); - } - m_shape = std::move(new_shape); - } -}; - -/** - * @brief Creates view tensor on external memory - * - * @param element_type Tensor element type - * @param shape Tensor shape - * @param ptr pointer to external memoty - * @param byte_strides Tensor strides - * - * @return Shared pointer to tensor interface - */ -std::shared_ptr make_tensor(const element::Type element_type, - const Shape& shape, - void* ptr, - const Strides& byte_strides) { - return byte_strides.empty() ? std::make_shared(element_type, shape, ptr) - : std::make_shared(element_type, shape, ptr, byte_strides); -} - -/** - * @brief Tensor with allocated memory - * Tensor owns the memory - */ -class AllocatedTensor : public ViewTensor { -public: - AllocatedTensor(const element::Type element_type, const Shape& shape, const Allocator& allocator) - : ViewTensor{element_type, - shape, - [&] { - OPENVINO_ASSERT(allocator, "Allocator was not initialized"); - return const_cast(allocator).allocate(element_type.size() * shape_size(shape)); - }()}, - m_allocator{allocator} {} - - ~AllocatedTensor() { - m_allocator.deallocate(m_ptr, get_byte_size()); - } - - void set_shape(ov::Shape new_shape) override { - auto old_byte_size = get_byte_size(); - m_shape = std::move(new_shape); - if (get_byte_size() > old_byte_size) { - m_allocator.deallocate(m_ptr, old_byte_size); - m_ptr = m_allocator.allocate(get_byte_size()); - } - update_strides(); - } - -private: - Allocator m_allocator; -}; - -/** - * @brief Creates allocated tensor - * - * @param element_type Tensor element type - * @param shape Tensor shape - * @param allocator Tensor allocator - * - * @return Shared pointer to tensor interface - */ -std::shared_ptr make_tensor(const element::Type element_type, const Shape& shape, const Allocator& allocator) { - return std::make_shared(element_type, shape, allocator); -} - -/** - * @brief ROI tensor on other tensor - * ROI tensor holds the owner - */ -class RoiTensor : public ITensor { -public: - RoiTensor(const std::shared_ptr& owner, const Coordinate& begin, const Coordinate& end) : m_owner{owner} { - OPENVINO_ASSERT(owner->get_element_type().bitwidth() >= 8, - "ROI Tensor for types with bitwidths less then 8 bit is not implemented. Tensor type: ", - owner->get_element_type()); - auto owner_shape = owner->get_shape(); - OPENVINO_ASSERT(owner_shape.size() == begin.size()); - OPENVINO_ASSERT(begin.size() == end.size()); - m_shape.resize(begin.size()); - for (size_t i = 0; i < begin.size(); ++i) { - OPENVINO_ASSERT(begin[i] <= owner_shape[i]); - OPENVINO_ASSERT(end[i] <= owner_shape[i]); - m_shape[i] = end[i] - begin[i]; - OPENVINO_ASSERT(m_shape[i] <= owner_shape[i]); - } - auto& strides = get_strides(); - m_offset = std::inner_product(begin.begin(), begin.end(), strides.begin(), static_cast(0)); - } - - const element::Type& get_element_type() const override { - return m_owner->get_element_type(); - } - - const Strides& get_strides() const override { - return m_owner->get_strides(); - } - - const Shape& get_shape() const override { - return m_shape; - } - - void set_shape(ov::Shape new_shape) override { - OPENVINO_THROW("Shapes cannot be changed for ROI Tensor"); - } - - void* data(const element::Type& element_type) const override { - auto owner_data = m_owner->data(element_type); - return static_cast(owner_data) + m_offset; - } - -private: - std::shared_ptr m_owner; - size_t m_offset; - Shape m_shape; -}; - -/** - * @brief Creates ROI tensor - * - * @param other Tensor what owns the memory - * @param begin Begin coordinates - * @param end End coordinates - * - * @return Shared pointer to tensor interface - */ -std::shared_ptr make_tensor(const std::shared_ptr& other, - const Coordinate& begin, - const Coordinate& end) { - return std::make_shared(other, begin, end); -} - } // namespace ov diff --git a/src/core/src/type.cpp b/src/core/src/type.cpp index 3def4c0adde5f2..be48b68f1feb40 100644 --- a/src/core/src/type.cpp +++ b/src/core/src/type.cpp @@ -18,12 +18,9 @@ size_t DiscreteTypeInfo::hash() const { if (hash_value != 0) return hash_value; size_t name_hash = name ? std::hash()(std::string(name)) : 0; - OPENVINO_SUPPRESS_DEPRECATED_START - size_t version_hash = std::hash()(version); - OPENVINO_SUPPRESS_DEPRECATED_END size_t version_id_hash = version_id ? std::hash()(std::string(version_id)) : 0; - return ov::util::hash_combine(std::vector{name_hash, version_hash, version_id_hash}); + return ov::util::hash_combine(std::vector{name_hash, version_id_hash}); } size_t DiscreteTypeInfo::hash() { @@ -40,9 +37,7 @@ std::string DiscreteTypeInfo::get_version() const { if (version_id) { return std::string(version_id); } - OPENVINO_SUPPRESS_DEPRECATED_START - return std::to_string(version); - OPENVINO_SUPPRESS_DEPRECATED_END + return nullptr; } DiscreteTypeInfo::operator std::string() const { @@ -51,10 +46,7 @@ DiscreteTypeInfo::operator std::string() const { std::ostream& operator<<(std::ostream& s, const DiscreteTypeInfo& info) { std::string version_id = info.version_id ? info.version_id : "(empty)"; - OPENVINO_SUPPRESS_DEPRECATED_START - s << "DiscreteTypeInfo{name: " << info.name << ", version_id: " << version_id << ", old_version: " << info.version - << ", parent: "; - OPENVINO_SUPPRESS_DEPRECATED_END + s << "DiscreteTypeInfo{name: " << info.name << ", version_id: " << version_id << ", parent: "; if (!info.parent) s << info.parent; else @@ -66,10 +58,7 @@ std::ostream& operator<<(std::ostream& s, const DiscreteTypeInfo& info) { // parent is commented to fix type relaxed operations bool DiscreteTypeInfo::operator<(const DiscreteTypeInfo& b) const { - OPENVINO_SUPPRESS_DEPRECATED_START - if (version < b.version) - return true; - if (version == b.version && name != nullptr && b.name != nullptr) { + if (name != nullptr && b.name != nullptr) { int cmp_status = strcmp(name, b.name); if (cmp_status < 0) return true; @@ -81,15 +70,20 @@ bool DiscreteTypeInfo::operator<(const DiscreteTypeInfo& b) const { } } - OPENVINO_SUPPRESS_DEPRECATED_END return false; } bool DiscreteTypeInfo::operator==(const DiscreteTypeInfo& b) const { if (hash_value != 0 && b.hash_value != 0) return hash() == b.hash(); - OPENVINO_SUPPRESS_DEPRECATED_START - return version == b.version && strcmp(name, b.name) == 0; - OPENVINO_SUPPRESS_DEPRECATED_END + if (name != nullptr && b.name != nullptr) { + if (strcmp(name, b.name) == 0) { + std::string v_id(version_id == nullptr ? "" : version_id); + std::string bv_id(b.version_id == nullptr ? "" : b.version_id); + if (v_id == bv_id) + return true; + } + } + return false; } bool DiscreteTypeInfo::operator<=(const DiscreteTypeInfo& b) const { return *this == b || *this < b; diff --git a/src/core/tests/CMakeLists.txt b/src/core/tests/CMakeLists.txt index e1dd3e60b7c1d1..6cd2bc51af1259 100644 --- a/src/core/tests/CMakeLists.txt +++ b/src/core/tests/CMakeLists.txt @@ -2,11 +2,6 @@ # SPDX-License-Identifier: Apache-2.0 # -if(CMAKE_CXX_COMPILER_ID STREQUAL "MSVC") - # 'argument': conversion from 'size_t' to 'int', possible loss of data - ie_add_compiler_flags(/wd4267) - ie_add_compiler_flags(/wd4244) -endif() set(TARGET_NAME ov_core_unit_tests) diff --git a/src/core/tests/any.cpp b/src/core/tests/any.cpp index 4a5d46953f71ca..f66ae9720f0511 100644 --- a/src/core/tests/any.cpp +++ b/src/core/tests/any.cpp @@ -161,6 +161,187 @@ TEST_F(AnyTests, AnyAsMapOfAnys) { ASSERT_EQ(refMap["testParamString"].as(), testString); } +TEST_F(AnyTests, AnyAsMapOfMapOfAnys) { + std::map refMap1; + refMap1["testParamInt"] = 4; + refMap1["testParamString"] = "test"; + + std::map refMap2; + refMap2["testParamInt"] = 5; + refMap2["testParamString"] = "test2"; + + std::map refMap; + refMap["refMap1"] = refMap1; + refMap["refMap2"] = refMap2; + + Any p = refMap; + bool isMap = p.is>(); + ASSERT_TRUE(isMap); + auto testMap = p.as>(); + + ASSERT_NE(testMap.find("refMap1"), testMap.end()); + auto testMap1 = testMap.at("refMap1").as>(); + ASSERT_NE(testMap1.find("testParamInt"), testMap1.end()); + ASSERT_NE(testMap1.find("testParamString"), testMap1.end()); + + int testInt1 = testMap1["testParamInt"].as(); + std::string testString1 = testMap1["testParamString"].as(); + + ASSERT_EQ(refMap1["testParamInt"].as(), testInt1); + ASSERT_EQ(refMap1["testParamString"].as(), testString1); + + ASSERT_NE(testMap.find("refMap2"), testMap.end()); + auto testMap2 = testMap.at("refMap2").as>(); + ASSERT_NE(testMap2.find("testParamInt"), testMap2.end()); + ASSERT_NE(testMap2.find("testParamString"), testMap2.end()); + + int testInt2 = testMap2["testParamInt"].as(); + std::string testString2 = testMap2["testParamString"].as(); + + ASSERT_EQ(refMap2["testParamInt"].as(), testInt2); + ASSERT_EQ(refMap2["testParamString"].as(), testString2); +} + +TEST_F(AnyTests, AnyAsMapOfMapOfAnysFromString) { + const std::string string_props = "{map1:{prop1:1,prop2:2.0},map2:{prop1:value}}"; + ov::Any any(string_props); + + ov::AnyMap map; + ASSERT_TRUE(any.is()); + ASSERT_FALSE(any.is()); + ASSERT_NO_THROW(map = any.as()); + ASSERT_EQ(string_props, ov::Any(map).as()); + + // check map1 + using MapStrDouble = std::map; + MapStrDouble map1; + ASSERT_TRUE(map["map1"].is()); + ASSERT_FALSE(map["map1"].is()); + ASSERT_FALSE(map["map1"].is()); + ASSERT_NO_THROW(map1 = map["map1"].as()); + ASSERT_EQ(2, map1.size()); + + // check map1:prop1 + ASSERT_EQ(1.0, map1["prop1"]); + // check map1:prop2 + ASSERT_EQ(2.0, map1["prop2"]); + + // check map2 + ov::AnyMap map2; + ASSERT_TRUE(map["map2"].is()); + ASSERT_FALSE(map["map2"].is()); + ASSERT_NO_THROW(map2 = map["map2"].as()); + ASSERT_EQ(1, map2.size()); + + // check map1:prop1 + ASSERT_TRUE(map2["prop1"].is()); + ASSERT_FALSE(map2["prop1"].is()); + ASSERT_EQ("value", map2["prop1"].as()); +} + +TEST_F(AnyTests, AnyAsMapOfMapOfMapOfAnysFromString) { + const std::string string_props = "{map1:{subprop_map:{prop:value}},prop1:1,prop2:2.0}"; + ov::Any any(string_props); + + ov::AnyMap map; + ASSERT_TRUE(any.is()); + ASSERT_FALSE(any.is()); + ASSERT_NO_THROW(map = any.as()); + ASSERT_EQ(3, map.size()); + ASSERT_EQ(string_props, ov::Any(map).as()); + + // check prop1 + ASSERT_TRUE(map["prop1"].is()); + ASSERT_FALSE(map["prop1"].is()); + ASSERT_EQ("1", map["prop1"].as()); + ASSERT_EQ(1, map["prop1"].as()); + + // check prop2 + ASSERT_TRUE(map["prop2"].is()); + ASSERT_FALSE(map["prop2"].is()); + ASSERT_FALSE(map["prop2"].is()); + ASSERT_EQ("2.0", map["prop2"].as()); + ASSERT_EQ(2, map["prop2"].as()); + ASSERT_EQ(2.0, map["prop2"].as()); + + // check map1 + ov::AnyMap map1; + ASSERT_TRUE(map["map1"].is()); + ASSERT_FALSE(map["map1"].is()); + ASSERT_NO_THROW(map1 = map["map1"].as()); + + // check subprop + ov::AnyMap subprop_map; + ASSERT_TRUE(map1["subprop_map"].is()); + ASSERT_FALSE(map1["subprop_map"].is()); + ASSERT_NO_THROW(subprop_map = map1["subprop_map"].as()); + + // check prop + ASSERT_TRUE(subprop_map["prop"].is()); + ASSERT_FALSE(subprop_map["prop"].is()); + ASSERT_EQ("value", subprop_map["prop"].as()); +} + +TEST_F(AnyTests, AnyDoesNotShareValues) { + // simple types + { + Any a = 1; + Any b = a; + a = 2; + ASSERT_EQ(1, b.as()); + ASSERT_EQ(2, a.as()); + b = 3; + ASSERT_EQ(2, a.as()); + ASSERT_EQ(3, b.as()); + } + + // AnyMap's + { + AnyMap map{ + {"1", ov::Any(1)}, + {"2", ov::Any(2)}, + }; + + Any a = map; + + // check initial state + ASSERT_EQ(1, a.as()["1"].as()); + ASSERT_EQ(2, a.as()["2"].as()); + + map["1"] = 3; // change map + ASSERT_EQ(1, a.as()["1"].as()); // Any is not changed + + a.as()["2"] = 4; // change Any + ASSERT_EQ(2, map["2"].as()); // map is not changed + + // erase from Any's map + AnyMap from_any_map = a.as(); + from_any_map.erase(from_any_map.begin()); + ASSERT_EQ(2, map.size()); + + // erase from map + map.erase(map.find("2")); + ASSERT_NE(from_any_map.end(), from_any_map.find("2")); + ASSERT_EQ(4, a.as()["2"].as()); + } +} + +TEST_F(AnyTests, DISABLED_AnyMapSharesValues) { + AnyMap map{ + {"1", 1}, + {"2", 2}, + }; + + AnyMap copy_map = map; + + // check initial state + ASSERT_EQ(1, copy_map["1"].as()); + ASSERT_EQ(2, copy_map["2"].as()); + + map["1"].as() = 110; // change map + EXPECT_EQ(1, copy_map["1"].as()); // TODO: why value is changed here? +} + TEST_F(AnyTests, AnyNotEmpty) { Any p = 4; ASSERT_FALSE(p.empty()); @@ -401,7 +582,31 @@ TEST_F(AnyTests, PrintToMapOfAnys) { { Any p = refMap; ASSERT_NO_THROW(p.print(stream)); - ASSERT_EQ(stream.str(), std::string{"testParamInt 4 testParamString test"}); + ASSERT_EQ(stream.str(), std::string{"{testParamInt:4,testParamString:test}"}); + } +} + +TEST_F(AnyTests, PrintToMapOfMapsOfAnys) { + std::map refMap1; + refMap1["testParamInt"] = 4; + refMap1["testParamString"] = "test"; + + std::map refMap2; + refMap2["testParamInt"] = 5; + refMap2["testParamString"] = "test2"; + + std::map refMap; + refMap["refMap1"] = refMap1; + refMap["refMap2"] = refMap2; + + std::stringstream stream; + { + Any p = refMap; + ASSERT_NO_THROW(p.print(stream)); + ASSERT_EQ( + stream.str(), + std::string{ + "{refMap1:{testParamInt:4,testParamString:test},refMap2:{testParamInt:5,testParamString:test2}}"}); } } diff --git a/src/core/tests/bound_evaluate.cpp b/src/core/tests/bound_evaluate.cpp index cb855ddfa76e16..664f8ebcb810f7 100644 --- a/src/core/tests/bound_evaluate.cpp +++ b/src/core/tests/bound_evaluate.cpp @@ -51,3 +51,31 @@ TEST_F(EvaluateBoundTest, no_exception_when_node_has_output_with_dynamic_element EXPECT_NO_THROW(evaluate_both_bounds(fn_op)); } + +using BoundEvaluatorTest = ::testing::Test; +TEST(BoundEvaluatorTest, no_exception_on_single_bound) { + constexpr auto et = element::i32; + const auto s = Shape{1, 1}; + const auto a = std::make_shared(et, PartialShape{s}); + const auto b = Constant::create(et, s, {1}); + const auto sub = std::make_shared(a, b); + + int32_t a_l[1] = {1}; + a->get_output_tensor(0).set_lower_value(Tensor{et, s, a_l}); + + int32_t o_[1] = {INT32_MIN}; // initial value of output tensor is not needed, it's set to check whether changed + TensorVector output{{et, s, o_}}; + // evaluations won't be performed due to missing upper bound tensor of parameter a + ASSERT_NO_THROW(sub->evaluate_lower(output)); + EXPECT_EQ(o_[0], INT32_MIN); + ASSERT_NO_THROW(sub->evaluate_upper(output)); + EXPECT_EQ(o_[0], INT32_MIN); + + int32_t a_u[1] = {11}; + a->get_output_tensor(0).set_upper_value(Tensor{et, s, a_u}); + // now both bounds of sub node can be calculated + ASSERT_NO_THROW(sub->evaluate_lower(output)); + EXPECT_EQ(o_[0], 0); + ASSERT_NO_THROW(sub->evaluate_upper(output)); + EXPECT_EQ(o_[0], 10); +} diff --git a/src/core/tests/graph_rewrite.cpp b/src/core/tests/graph_rewrite.cpp index c7fb67d243c4c3..d85146b1ffd2d0 100644 --- a/src/core/tests/graph_rewrite.cpp +++ b/src/core/tests/graph_rewrite.cpp @@ -55,9 +55,9 @@ class Anchor : public ngraph::pass::GraphRewrite { Anchor() : GraphRewrite() {} }; -NGRAPH_RTTI_DEFINITION(TestPass, "TestPass", 0); -NGRAPH_RTTI_DEFINITION(Anchor, "Anchor", 0); -NGRAPH_RTTI_DEFINITION(GatherNodesPass, "GatherNodesPass", 0); +NGRAPH_RTTI_DEFINITION(TestPass, "TestPass"); +NGRAPH_RTTI_DEFINITION(Anchor, "Anchor"); +NGRAPH_RTTI_DEFINITION(GatherNodesPass, "GatherNodesPass"); std::shared_ptr get_function() { auto data = std::make_shared(ngraph::element::f32, ngraph::Shape{3, 1, 2}); @@ -165,7 +165,7 @@ class PrivateDivide : public ngraph::opset3::Divide { using ngraph::opset3::Divide::Divide; }; -NGRAPH_RTTI_DEFINITION(PrivateDivide, "PrivateDivide", 0, ngraph::opset3::Divide); +NGRAPH_RTTI_DEFINITION(PrivateDivide, "PrivateDivide", ngraph::opset3::Divide); std::shared_ptr get_derived_function() { auto data = std::make_shared(ngraph::element::f32, ngraph::Shape{3, 1, 2}); @@ -434,7 +434,7 @@ class CheckConsumers : public ngraph::pass::MatcherPass { } }; -NGRAPH_RTTI_DEFINITION(CheckConsumers, "CheckConsumers", 0); +NGRAPH_RTTI_DEFINITION(CheckConsumers, "CheckConsumers"); TEST(GraphRewriteTest, nodes_use_count) { auto f = get_function(); diff --git a/src/core/tests/opset.cpp b/src/core/tests/opset.cpp index cfc5cb7250b41a..289ae1188cea53 100644 --- a/src/core/tests/opset.cpp +++ b/src/core/tests/opset.cpp @@ -72,7 +72,7 @@ INSTANTIATE_TEST_SUITE_P(opset, class MyOpOld : public ov::op::Op { public: - static constexpr ov::DiscreteTypeInfo type_info{"MyOpOld", static_cast(0)}; + static constexpr ov::DiscreteTypeInfo type_info{"MyOpOld"}; const ov::DiscreteTypeInfo& get_type_info() const override { return type_info; } @@ -121,7 +121,7 @@ TEST(opset, custom_opset) { opset.insert(); opset.insert(); EXPECT_EQ(opset.get_types_info().size(), 3); - EXPECT_TRUE(opset.contains_type("MyOpNewFromOld")); - EXPECT_TRUE(opset.contains_type("MyOpNew")); - EXPECT_TRUE(opset.contains_type("MyOpIncorrect")); + EXPECT_TRUE(opset.contains_type(std::string("MyOpNewFromOld"))); + EXPECT_TRUE(opset.contains_type(std::string("MyOpNew"))); + EXPECT_TRUE(opset.contains_type(std::string("MyOpIncorrect"))); } diff --git a/src/core/tests/pass/serialization/deterministicity.cpp b/src/core/tests/pass/serialization/deterministicity.cpp index a6e465e7e44a29..2abfe1b6e53052 100644 --- a/src/core/tests/pass/serialization/deterministicity.cpp +++ b/src/core/tests/pass/serialization/deterministicity.cpp @@ -8,27 +8,29 @@ #include "common_test_utils/common_utils.hpp" #include "common_test_utils/file_utils.hpp" +#include "openvino/opsets/opset1.hpp" #include "openvino/pass/serialize.hpp" #include "openvino/util/file_util.hpp" #include "read_ir.hpp" #include "util/test_common.hpp" -class SerializationDeterministicityTest : public ov::test::TestsCommon { +class DeterministicityCommon { protected: - std::string m_out_xml_path_1; - std::string m_out_bin_path_1; - std::string m_out_xml_path_2; - std::string m_out_bin_path_2; + std::string m_out_xml_path_1{}; + std::string m_out_bin_path_1{}; + std::string m_out_xml_path_2{}; + std::string m_out_bin_path_2{}; + std::string filePrefix{}; - void SetUp() override { - std::string filePrefix = CommonTestUtils::generateTestFilePrefix(); + void SetupFileNames() { + filePrefix = CommonTestUtils::generateTestFilePrefix(); m_out_xml_path_1 = filePrefix + "1" + ".xml"; m_out_bin_path_1 = filePrefix + "1" + ".bin"; m_out_xml_path_2 = filePrefix + "2" + ".xml"; m_out_bin_path_2 = filePrefix + "2" + ".bin"; } - void TearDown() override { + void RemoveFiles() { std::remove(m_out_xml_path_1.c_str()); std::remove(m_out_xml_path_2.c_str()); std::remove(m_out_bin_path_1.c_str()); @@ -55,6 +57,17 @@ class SerializationDeterministicityTest : public ov::test::TestsCommon { } }; +class SerializationDeterministicityTest : public ov::test::TestsCommon, public DeterministicityCommon { +protected: + void SetUp() override { + SetupFileNames(); + } + + void TearDown() override { + RemoveFiles(); + } +}; + #ifdef ENABLE_OV_ONNX_FRONTEND TEST_F(SerializationDeterministicityTest, BasicModel) { @@ -130,3 +143,158 @@ TEST_F(SerializationDeterministicityTest, ModelWithConstants) { ASSERT_TRUE(files_equal(xml_1, xml_2)); ASSERT_TRUE(files_equal(bin_1, bin_2)); } + +class SerializationDeterministicityInputOutputTest : public testing::TestWithParam, + public DeterministicityCommon { +protected: + std::string input0Name{"input0"}; + std::string input1Name{"input1"}; + std::string output0Name{"output0"}; + std::string output1Name{"output1"}; + + std::string xmlFileName{}; + + void SetupFileNames() { + DeterministicityCommon::SetupFileNames(); + xmlFileName = filePrefix + "_TestModel.xml"; + } + + void RemoveFiles() { + DeterministicityCommon::RemoveFiles(); + std::remove(xmlFileName.c_str()); + } + + void SetUp() override { + SetupFileNames(); + } + + void TearDown() override { + RemoveFiles(); + } +}; + +TEST_P(SerializationDeterministicityInputOutputTest, FromOvModel) { + auto irVersion = GetParam(); + + std::shared_ptr modelRef; + { + auto parameter0 = std::make_shared(ov::element::f32, ov::Shape{1, 3, 22, 22}); + parameter0->set_friendly_name("input0"); + auto result0 = std::make_shared(parameter0); + result0->set_friendly_name("output0"); + auto parameter1 = std::make_shared(ov::element::f32, ov::Shape{1, 3, 22, 22}); + parameter1->set_friendly_name("input1"); + auto result1 = std::make_shared(parameter1); + result1->set_friendly_name("output1"); + modelRef = + std::make_shared(ov::NodeVector{result0, result1}, ov::ParameterVector{parameter0, parameter1}); + } + + auto& expected1 = modelRef; + ov::pass::Serialize(m_out_xml_path_1, m_out_bin_path_1, irVersion).run_on_model(modelRef); + auto expected2 = ov::test::readModel(m_out_xml_path_1, m_out_bin_path_1); + ov::pass::Serialize(m_out_xml_path_2, m_out_bin_path_2, irVersion).run_on_model(expected2); + + EXPECT_EQ(input0Name, expected1->input(0).get_node()->get_friendly_name()); + EXPECT_EQ(input1Name, expected1->input(1).get_node()->get_friendly_name()); + EXPECT_EQ(output0Name, expected1->output(0).get_node()->get_friendly_name()); + EXPECT_EQ(output1Name, expected1->output(1).get_node()->get_friendly_name()); + EXPECT_EQ(input0Name, expected2->input(0).get_node()->get_friendly_name()); + EXPECT_EQ(input1Name, expected2->input(1).get_node()->get_friendly_name()); + EXPECT_EQ(output0Name, expected2->output(0).get_node()->get_friendly_name()); + EXPECT_EQ(output1Name, expected2->output(1).get_node()->get_friendly_name()); + + std::ifstream xml_1(m_out_xml_path_1, std::ios::in | std::ios::binary); + std::ifstream xml_2(m_out_xml_path_2, std::ios::in | std::ios::binary); + EXPECT_TRUE(files_equal(xml_1, xml_2)); +} + +TEST_P(SerializationDeterministicityInputOutputTest, FromIrModel) { + auto irVersion = GetParam(); + + std::string irModel_1stPart = R"V0G0N( + + + + + + + 1 + 3 + 22 + 22 + + + + + + + + 1 + 3 + 22 + 22 + + + + + + + 1 + 3 + 22 + 22 + + + + + + + 1 + 3 + 22 + 22 + + + + + + + + + + +)V0G0N"; + std::string strVersion = irVersion == ov::pass::Serialize::Version::IR_V11 ? "11" : "10"; + std::string irModel = irModel_1stPart + strVersion + irModel_2ndPart; + + { + std::ofstream xmlFile; + xmlFile.open(xmlFileName); + xmlFile << irModel; + xmlFile.close(); + } + + auto expected1 = ov::test::readModel(xmlFileName, ""); + ov::pass::Serialize(m_out_xml_path_1, "", irVersion).run_on_model(expected1); + auto expected2 = ov::test::readModel(m_out_xml_path_1, ""); + ov::pass::Serialize(m_out_xml_path_2, "", irVersion).run_on_model(expected2); + + EXPECT_EQ(input0Name, expected1->input(0).get_node()->get_friendly_name()); + EXPECT_EQ(input1Name, expected1->input(1).get_node()->get_friendly_name()); + EXPECT_EQ(output0Name, expected1->output(0).get_node()->get_friendly_name()); + EXPECT_EQ(output1Name, expected1->output(1).get_node()->get_friendly_name()); + EXPECT_EQ(input0Name, expected2->input(0).get_node()->get_friendly_name()); + EXPECT_EQ(input1Name, expected2->input(1).get_node()->get_friendly_name()); + EXPECT_EQ(output0Name, expected2->output(0).get_node()->get_friendly_name()); + EXPECT_EQ(output1Name, expected2->output(1).get_node()->get_friendly_name()); + + std::ifstream xml_1(m_out_xml_path_1, std::ios::in | std::ios::binary); + std::ifstream xml_2(m_out_xml_path_2, std::ios::in | std::ios::binary); + EXPECT_TRUE(files_equal(xml_2, xml_1)); +} + +INSTANTIATE_TEST_CASE_P(DeterministicityInputOutput, + SerializationDeterministicityInputOutputTest, + ::testing::Values(ov::pass::Serialize::Version::IR_V10, ov::pass::Serialize::Version::IR_V11)); \ No newline at end of file diff --git a/src/core/tests/pass_config.cpp b/src/core/tests/pass_config.cpp index 3c9395aedc2bd2..e2c0d15e943eae 100644 --- a/src/core/tests/pass_config.cpp +++ b/src/core/tests/pass_config.cpp @@ -32,7 +32,7 @@ class RenameReLU : public ngraph::pass::MatcherPass { } }; -NGRAPH_RTTI_DEFINITION(RenameReLU, "RenameReLU", 0); +NGRAPH_RTTI_DEFINITION(RenameReLU, "RenameReLU"); class RenameSigmoid : public ngraph::pass::MatcherPass { public: @@ -50,7 +50,7 @@ class RenameSigmoid : public ngraph::pass::MatcherPass { } }; -NGRAPH_RTTI_DEFINITION(RenameSigmoid, "RenameSigmoid", 0); +NGRAPH_RTTI_DEFINITION(RenameSigmoid, "RenameSigmoid"); class TestFunctionPass : public ngraph::pass::FunctionPass { public: @@ -67,7 +67,7 @@ class TestFunctionPass : public ngraph::pass::FunctionPass { } }; -NGRAPH_RTTI_DEFINITION(TestFunctionPass, "TestFunctionPass", 0); +NGRAPH_RTTI_DEFINITION(TestFunctionPass, "TestFunctionPass"); class TestGraphRewritePass : public ngraph::pass::GraphRewrite { public: @@ -78,7 +78,7 @@ class TestGraphRewritePass : public ngraph::pass::GraphRewrite { } }; -NGRAPH_RTTI_DEFINITION(TestGraphRewritePass, "TestGraphRewritePass", 0); +NGRAPH_RTTI_DEFINITION(TestGraphRewritePass, "TestGraphRewritePass"); std::tuple, std::shared_ptr, std::shared_ptr> get_test_function() { auto data = std::make_shared(ngraph::element::f32, ngraph::Shape{3, 1, 2}); @@ -289,7 +289,7 @@ class TestNestedMatcher : public ngraph::pass::MatcherPass { } }; -NGRAPH_RTTI_DEFINITION(TestNestedMatcher, "TestNestedMatcher", 0); +NGRAPH_RTTI_DEFINITION(TestNestedMatcher, "TestNestedMatcher"); class TestNestedGraphRewrite : public pass::GraphRewrite { public: @@ -299,7 +299,7 @@ class TestNestedGraphRewrite : public pass::GraphRewrite { } }; -NGRAPH_RTTI_DEFINITION(TestNestedGraphRewrite, "TestNestedGraphRewrite", 0); +NGRAPH_RTTI_DEFINITION(TestNestedGraphRewrite, "TestNestedGraphRewrite"); TEST(PassConfig, EnableDisablePasses10) { std::shared_ptr f; diff --git a/src/core/tests/rtti.cpp b/src/core/tests/rtti.cpp index bb0362f52a366f..e5a1619e75d3ce 100644 --- a/src/core/tests/rtti.cpp +++ b/src/core/tests/rtti.cpp @@ -42,7 +42,7 @@ class OpTypeVersionParent : public OpType { class OpTypeVersionParentOld : public OpType { public: - OPENVINO_OP("OpTypeVersionParentOld", "my_version1", OpType, 1); + OPENVINO_OP("OpTypeVersionParentOld", "my_version1", OpType); OpTypeVersionParentOld() = default; std::shared_ptr clone_with_new_inputs(const OutputVector& inputs) const override { @@ -56,7 +56,6 @@ TEST(rtti, op_with_type) { auto type_info = op.get_type_info(); ASSERT_EQ(type_info, OpType::get_type_info_static()); ASSERT_EQ(strcmp(type_info.name, "OpType"), 0); - ASSERT_EQ(type_info.version, 0); ASSERT_EQ(strcmp(type_info.version_id, "extension"), 0); ASSERT_NE(type_info.parent, nullptr); ASSERT_EQ(*type_info.parent, ngraph::op::Op::get_type_info_static()); @@ -67,7 +66,6 @@ TEST(rtti, op_with_type_version) { auto type_info = op.get_type_info(); ASSERT_EQ(type_info, OpTypeVersion::get_type_info_static()); ASSERT_EQ(strcmp(type_info.name, "OpTypeVersion"), 0); - ASSERT_EQ(type_info.version, 0); ASSERT_EQ(strcmp(type_info.version_id, "my_version"), 0); ASSERT_NE(type_info.parent, nullptr); ASSERT_EQ(*type_info.parent, ngraph::op::Op::get_type_info_static()); @@ -78,7 +76,6 @@ TEST(rtti, op_with_type_version_parent) { auto type_info = op.get_type_info(); ASSERT_EQ(type_info, OpTypeVersionParent::get_type_info_static()); ASSERT_EQ(strcmp(type_info.name, "OpTypeVersionParent"), 0); - ASSERT_EQ(type_info.version, 0); ASSERT_EQ(strcmp(type_info.version_id, "my_version"), 0); ASSERT_NE(type_info.parent, nullptr); ASSERT_EQ(*type_info.parent, OpType::get_type_info_static()); @@ -90,7 +87,6 @@ TEST(rtti, op_with_type_version_parent_old) { ASSERT_EQ(type_info, OpTypeVersionParentOld::get_type_info_static()); ASSERT_EQ(strcmp(type_info.name, "OpTypeVersionParentOld"), 0); ASSERT_EQ(strcmp(type_info.version_id, "my_version1"), 0); - ASSERT_EQ(type_info.version, 1); ASSERT_NE(type_info.parent, nullptr); ASSERT_EQ(*type_info.parent, OpType::get_type_info_static()); } diff --git a/src/core/tests/type_info.cpp b/src/core/tests/type_info.cpp index 1beac34b3055c2..98c1f9c8e146ed 100644 --- a/src/core/tests/type_info.cpp +++ b/src/core/tests/type_info.cpp @@ -10,11 +10,11 @@ OPENVINO_SUPPRESS_DEPRECATED_START TEST(type_info, compare_old_type) { - ov::DiscreteTypeInfo type1("type1", static_cast(0)); - ov::DiscreteTypeInfo type2("type2", static_cast(0)); - ov::DiscreteTypeInfo type3("type1", 1ul); - ov::DiscreteTypeInfo type4("type3", static_cast(0), &type1); - ov::DiscreteTypeInfo type5("type3", static_cast(0), &type2); + ov::DiscreteTypeInfo type1("type1"); + ov::DiscreteTypeInfo type2("type2"); + ov::DiscreteTypeInfo type3("type1"); + ov::DiscreteTypeInfo type4("type3", &type1); + ov::DiscreteTypeInfo type5("type3", &type2); ASSERT_TRUE(type1 != type2); ASSERT_TRUE(type1 == type1); ASSERT_TRUE(type1 < type2); @@ -46,40 +46,37 @@ TEST(type_info, compare_new_type) { } TEST(type_info, compare_new_with_old_type) { - ov::DiscreteTypeInfo type1("type1", static_cast(0), "version1"); - ov::DiscreteTypeInfo type1_o("type1", static_cast(0)); + ov::DiscreteTypeInfo type1("type1", "version1"); + ov::DiscreteTypeInfo type1_o("type1", "version1"); ASSERT_TRUE(type1 == type1_o); } TEST(type_info, check_hash_value) { - const auto& hash_val = [](const char* name, const char* version_id, uint64_t version) -> size_t { + const auto& hash_val = [](const char* name, const char* version_id) -> size_t { size_t name_hash = name ? std::hash()(std::string(name)) : 0; - size_t version_hash = std::hash()(version); size_t version_id_hash = version_id ? std::hash()(std::string(version_id)) : 0; // don't use parent for hash calculation, it is not a part of type (yet) - return ov::util::hash_combine(std::vector{name_hash, version_hash, version_id_hash}); + return ov::util::hash_combine(std::vector{name_hash, version_id_hash}); }; - ov::DiscreteTypeInfo type("type1", 0, "version1"); - ov::DiscreteTypeInfo type_old("type1", 1); - ov::DiscreteTypeInfo type_with_version("type1", 1, "version1"); - ov::DiscreteTypeInfo type_empty_name("", static_cast(0)); - ov::DiscreteTypeInfo type_empty_ver("type", static_cast(0), ""); - EXPECT_EQ(hash_val(type.name, type.version_id, type.version), type.hash()); - EXPECT_EQ(hash_val(type_old.name, type_old.version_id, type_old.version), type_old.hash()); - EXPECT_EQ(hash_val(type_with_version.name, type_with_version.version_id, type_with_version.version), - type_with_version.hash()); - EXPECT_EQ(hash_val(type_empty_name.name, type_empty_name.version_id, type_empty_name.version), - type_empty_name.hash()); - EXPECT_EQ(hash_val(type_empty_ver.name, type_empty_ver.version_id, type_empty_ver.version), type_empty_ver.hash()); + ov::DiscreteTypeInfo type("type1", "version1"); + ov::DiscreteTypeInfo type_old("type1"); + ov::DiscreteTypeInfo type_with_version("type1", "version1"); + ov::DiscreteTypeInfo type_empty_name(""); + ov::DiscreteTypeInfo type_empty_ver("type", ""); + EXPECT_EQ(hash_val(type.name, type.version_id), type.hash()); + EXPECT_EQ(hash_val(type_old.name, type_old.version_id), type_old.hash()); + EXPECT_EQ(hash_val(type_with_version.name, type_with_version.version_id), type_with_version.hash()); + EXPECT_EQ(hash_val(type_empty_name.name, type_empty_name.version_id), type_empty_name.hash()); + EXPECT_EQ(hash_val(type_empty_ver.name, type_empty_ver.version_id), type_empty_ver.hash()); } TEST(type_info, find_in_map) { std::vector vector_names; - ov::DiscreteTypeInfo a("Mod", 1ul, "opset1"); - ov::DiscreteTypeInfo b("Prelu", static_cast(0), "opset1"); - ov::DiscreteTypeInfo c("Vector", static_cast(0)); - ov::DiscreteTypeInfo d("Mod", 1ul, "opset3"); - ov::DiscreteTypeInfo f("Mod", 2ul); + ov::DiscreteTypeInfo a("Mod", "opset1"); + ov::DiscreteTypeInfo b("Prelu", "opset1"); + ov::DiscreteTypeInfo c("Vector"); + ov::DiscreteTypeInfo d("Mod", "opset3"); + ov::DiscreteTypeInfo f("Mod", "opset2"); std::map test_map; test_map[a] = 1; @@ -94,20 +91,20 @@ TEST(type_info, find_in_map) { test_map[type] = 2; std::string name = type.name; vector_names.emplace_back(name); - ov::DiscreteTypeInfo t(vector_names.rbegin()->c_str(), 1000); - ov::DiscreteTypeInfo t2(vector_names.rbegin()->c_str(), static_cast(0)); + ov::DiscreteTypeInfo t(vector_names.rbegin()->c_str()); + ov::DiscreteTypeInfo t2(vector_names.rbegin()->c_str()); test_map[t] = 3; test_map[t2] = 4; std::string name1 = "a" + name; vector_names.emplace_back(name1); - ov::DiscreteTypeInfo t3(vector_names.rbegin()->c_str(), 1000); - ov::DiscreteTypeInfo t4(vector_names.rbegin()->c_str(), static_cast(0)); + ov::DiscreteTypeInfo t3(vector_names.rbegin()->c_str()); + ov::DiscreteTypeInfo t4(vector_names.rbegin()->c_str()); test_map[t3] = 5; test_map[t4] = 6; std::string name2 = name + "z"; vector_names.emplace_back(name2); - ov::DiscreteTypeInfo t5(vector_names.rbegin()->c_str(), 1000); - ov::DiscreteTypeInfo t6(vector_names.rbegin()->c_str(), static_cast(0)); + ov::DiscreteTypeInfo t5(vector_names.rbegin()->c_str()); + ov::DiscreteTypeInfo t6(vector_names.rbegin()->c_str()); test_map[t5] = 7; test_map[t6] = 8; } diff --git a/src/core/tests/type_prop/broadcast.cpp b/src/core/tests/type_prop/broadcast.cpp index e4a82aefb31285..e587512b4e7a70 100644 --- a/src/core/tests/type_prop/broadcast.cpp +++ b/src/core/tests/type_prop/broadcast.cpp @@ -613,7 +613,6 @@ TEST(type_prop, broadcast_v3_bidirectional_mode_string) { const auto broadcast_v3 = make_shared(arg, shape, "BIDIRECTIONAL"); ASSERT_EQ(broadcast_v3->get_broadcast_spec(), op::BroadcastType::BIDIRECTIONAL); - ASSERT_EQ(broadcast_v3->get_version(), 3); } TEST(type_prop, broadcast_v3_shape_unexpected_axes_mapping_input) { diff --git a/src/core/tests/type_prop/interpolate.cpp b/src/core/tests/type_prop/interpolate.cpp index b220ecd8a8f754..7f0f5ff3a5bb68 100644 --- a/src/core/tests/type_prop/interpolate.cpp +++ b/src/core/tests/type_prop/interpolate.cpp @@ -214,6 +214,28 @@ TEST(type_prop, interpolate_v4_interval_logic) { ASSERT_TRUE(interp->get_output_partial_shape(0).same_scheme(out_shape)); } +TEST(type_prop, interpolate_v4_incorrect_mode) { + const auto image = std::make_shared(element::f32, Shape{1, 3, 30, 60}); + const auto target_shape = std::make_shared(element::i32, Shape{2}); + const auto scales = op::Constant::create(element::f32, Shape{2}, {6.f, 12.f}); + const auto axes = op::Constant::create(element::i64, Shape{2}, {2, 3}); + + ov::op::util::InterpolateBase::InterpolateAttrs attrs; + attrs.shape_calculation_mode = ov::op::util::InterpolateBase::ShapeCalcMode::SCALES; + attrs.mode = ov::op::util::InterpolateBase::InterpolateMode::BICUBIC_PILLOW; + attrs.pads_begin = {0, 0, 0, 0}; + attrs.pads_end = {0, 0, 0, 0}; + + OV_EXPECT_THROW(auto interp = std::make_shared(image, target_shape, scales, axes, attrs), + ov::NodeValidationFailure, + HasSubstr("Unsupported interpolation mode used with version 4 of the Interpolate op")); + + attrs.mode = ov::op::util::InterpolateBase::InterpolateMode::BILINEAR_PILLOW; + OV_EXPECT_THROW(auto interp = std::make_shared(image, target_shape, scales, axes, attrs), + ov::NodeValidationFailure, + HasSubstr("Unsupported interpolation mode used with version 4 of the Interpolate op")); +} + TEST(type_prop, interpolate_v11_scales) { const auto image = std::make_shared(element::f32, Shape{1, 3, 30, 60}); const auto scales = op::Constant::create(element::f32, Shape{2}, {0.2f, 0.2f}); diff --git a/src/core/tests/type_prop/matmul.cpp b/src/core/tests/type_prop/matmul.cpp index f7b6116a259adf..327e8574c53301 100644 --- a/src/core/tests/type_prop/matmul.cpp +++ b/src/core/tests/type_prop/matmul.cpp @@ -528,7 +528,7 @@ TEST(type_prop, matmul_propagate_labels_on_interval_dims) { } TEST(type_prop, matmul_propagate_label_on_b_input_after_reshape) { - constexpr size_t my_label = 2; + constexpr ov::label_t my_label = 2; auto marked_dim = Dimension(2, 3); ov::DimensionTracker::set_label(marked_dim, my_label); diff --git a/src/core/tests/type_prop/roi_pooling.cpp b/src/core/tests/type_prop/roi_pooling.cpp index 0fa337a37ea5be..e86b52eef52641 100644 --- a/src/core/tests/type_prop/roi_pooling.cpp +++ b/src/core/tests/type_prop/roi_pooling.cpp @@ -2,109 +2,171 @@ // SPDX-License-Identifier: Apache-2.0 // +#include "common_test_utils/test_assertions.hpp" #include "gtest/gtest.h" -#include "ngraph/ngraph.hpp" +#include "openvino/opsets/opset11.hpp" +#include "type_prop.hpp" using namespace std; -using namespace ngraph; - -TEST(type_prop, roi_pooling_basic_shape_inference) { - const auto feat_maps = make_shared(element::f32, Shape{1, 3, 6, 6}); - const auto rois = make_shared(element::f32, Shape{4, 5}); - const auto op = make_shared(feat_maps, rois, Shape{2, 2}, 0.625f); - ASSERT_EQ(op->get_method(), "max"); - ASSERT_EQ(op->get_shape(), (Shape{4, 3, 2, 2})); +using namespace ov; +using namespace ov::opset11; +using namespace testing; + +class TypePropROIPoolingV0 : public TypePropOpTest { +protected: + float spatial_scale = 0.625f; + Shape pooling_roi_2x2{2, 2}; +}; + +TEST_F(TypePropROIPoolingV0, default_ctor) { + const auto feat_maps = make_shared(element::f32, PartialShape{{0, 3}, {1, 3}, {1, 6}, {1, 6}}); + const auto rois = make_shared(element::f32, PartialShape{{2, 4}, {1, 5}}); + + const auto op = make_op(); + op->set_arguments(OutputVector{feat_maps, rois}); + op->set_spatial_scale(spatial_scale); + op->set_method("max"); + op->set_output_roi({3, 4}); + op->validate_and_infer_types(); + + EXPECT_FLOAT_EQ(op->get_spatial_scale(), spatial_scale); + EXPECT_EQ(op->get_output_roi(), Shape({3, 4})); + EXPECT_EQ(op->get_method(), "max"); + EXPECT_EQ(op->get_input_size(), 2); + EXPECT_EQ(op->get_element_type(), element::f32); + EXPECT_EQ(static_cast(op.get())->get_output_size(), 1); + EXPECT_EQ(op->get_output_partial_shape(0), (PartialShape{{2, 4}, {1, 3}, 3, 4})); } -TEST(type_prop, roi_pooling_dynamic_channels_dim) { - const auto feat_maps = make_shared(element::f32, PartialShape{1, Dimension(), 6, 6}); - const auto rois = make_shared(element::f32, Shape{4, 5}); - const auto op = make_shared(feat_maps, rois, Shape{2, 2}, 0.625f, "max"); - ASSERT_TRUE(op->get_output_partial_shape(0).same_scheme(PartialShape{4, Dimension(), 2, 2})); +TEST_F(TypePropROIPoolingV0, basic_shape_inference) { + const auto feat_maps = make_shared(element::f32, Shape{1, 3, 6, 6}); + const auto rois = make_shared(element::f32, Shape{4, 5}); + const auto op = make_op(feat_maps, rois, pooling_roi_2x2, 0.625f); + + EXPECT_EQ(op->get_element_type(), element::f32); + EXPECT_EQ(op->get_method(), "max"); + EXPECT_EQ(op->get_shape(), (Shape{4, 3, 2, 2})); } -TEST(type_prop, roi_pooling_dynamic_num_rois_dim) { - const auto feat_maps = make_shared(element::f32, Shape{1, 3, 6, 6}); - const auto rois = make_shared(element::f32, PartialShape{Dimension(), 5}); - const auto op = make_shared(feat_maps, rois, Shape{2, 2}, 0.625f); - ASSERT_TRUE(op->get_output_partial_shape(0).same_scheme(PartialShape{Dimension(), 3, 2, 2})); +TEST_F(TypePropROIPoolingV0, dynamic_channels_dim) { + auto feat_shape = PartialShape{1, -1, 6, 6}; + auto rois_shape = PartialShape{4, 5}; + set_shape_labels(feat_shape, 10); + set_shape_labels(rois_shape, 20); + + const auto feat_maps = make_shared(element::f32, feat_shape); + const auto rois = make_shared(element::f32, rois_shape); + const auto op = make_op(feat_maps, rois, pooling_roi_2x2, spatial_scale, "max"); + + EXPECT_EQ(op->get_element_type(), element::f32); + EXPECT_EQ(op->get_output_partial_shape(0), (PartialShape{4, -1, 2, 2})); + EXPECT_THAT(get_shape_labels(op->get_output_partial_shape(0)), ElementsAre(20, 11, ov::no_label, ov::no_label)); } -TEST(type_prop, roi_pooling_dynamic_rank_feat_maps) { - const auto feat_maps = make_shared(element::f32, PartialShape::dynamic()); - const auto rois = make_shared(element::f32, Shape{4, 5}); - const auto op = make_shared(feat_maps, rois, Shape{2, 2}, 0.625f); - ASSERT_TRUE(op->get_output_partial_shape(0).same_scheme(PartialShape{4, Dimension(), 2, 2})); +TEST_F(TypePropROIPoolingV0, dynamic_num_rois_dim) { + auto feat_shape = PartialShape{1, 3, 6, 6}; + auto rois_shape = PartialShape{-1, 5}; + set_shape_labels(feat_shape, 10); + set_shape_labels(rois_shape, 20); + + const auto feat_maps = make_shared(element::f64, feat_shape); + const auto rois = make_shared(element::f64, rois_shape); + const auto op = make_op(feat_maps, rois, pooling_roi_2x2, spatial_scale, "bilinear"); + + EXPECT_EQ(op->get_element_type(), element::f64); + EXPECT_EQ(op->get_output_partial_shape(0), (PartialShape{-1, 3, 2, 2})); + EXPECT_THAT(get_shape_labels(op->get_output_partial_shape(0)), ElementsAre(20, 11, ov::no_label, ov::no_label)); } -TEST(type_prop, roi_pooling_dynamic_rank_rois) { - const auto feat_maps = make_shared(element::f32, Shape{1, 3, 6, 6}); - const auto rois = make_shared(element::f32, PartialShape::dynamic()); - const auto op = make_shared(feat_maps, rois, Shape{2, 2}, 0.625f); - ASSERT_TRUE(op->get_output_partial_shape(0).same_scheme(PartialShape{Dimension(), 3, 2, 2})); +TEST_F(TypePropROIPoolingV0, dynamic_rank_feat_maps) { + const auto feat_maps = make_shared(element::f16, PartialShape::dynamic()); + const auto rois = make_shared(element::f16, Shape{4, 5}); + const auto op = make_op(feat_maps, rois, pooling_roi_2x2, spatial_scale); + + EXPECT_EQ(op->get_element_type(), element::f16); + EXPECT_EQ(op->get_output_partial_shape(0), (PartialShape{4, -1, 2, 2})); + EXPECT_THAT(get_shape_labels(op->get_output_partial_shape(0)), Each(ov::no_label)); } -TEST(type_prop, roi_pooling_incompatible_input_rank) { - const auto feat_maps = make_shared(element::f32, Shape{1, 3, 2, 6, 6}); - const auto rois = make_shared(element::f32, Shape{3, 5}); - // feat_maps must be of rank 4 - ASSERT_THROW(const auto unused = make_shared(feat_maps, rois, Shape{2, 2}, 0.625f, "max"), - ngraph::NodeValidationFailure); +TEST_F(TypePropROIPoolingV0, dynamic_rank_feat_rois) { + const auto feat_maps = make_shared(element::f32, Shape{1, 3, 6, 6}); + const auto rois = make_shared(element::f32, PartialShape::dynamic()); + const auto op = make_op(feat_maps, rois, pooling_roi_2x2, spatial_scale); + + EXPECT_EQ(op->get_element_type(), element::f32); + EXPECT_EQ(op->get_output_partial_shape(0), (PartialShape{-1, 3, 2, 2})); + EXPECT_THAT(get_shape_labels(op->get_output_partial_shape(0)), Each(ov::no_label)); } -TEST(type_prop, roi_pooling_incompatible_pooling_shape) { - Shape pool_shape{2, 2, 2}; - const auto feat_maps = make_shared(element::f32, Shape{3, 2, 6, 6}); - const auto rois = make_shared(element::f32, Shape{3, 5}); - // pool_shape must be of rank 2 {pooled_h, pooled_w} - ASSERT_THROW(const auto unused = make_shared(feat_maps, rois, pool_shape, 0.625f, "max"), - ngraph::NodeValidationFailure); +TEST_F(TypePropROIPoolingV0, incompatible_input_rank) { + const auto feat_maps = make_shared(element::f32, Shape{1, 3, 6, 6, 6}); + const auto rois = make_shared(element::f32, PartialShape{3, 5}); + + OV_EXPECT_THROW(const auto op = make_op(feat_maps, rois, pooling_roi_2x2, spatial_scale, "max"), + NodeValidationFailure, + HasSubstr("Expected a 4D tensor for the feature maps input")); } -TEST(type_prop, roi_pooling_incompatible_rois_second_dim) { - const auto feat_maps = make_shared(element::f32, Shape{3, 2, 6, 6}); - const auto rois = make_shared(element::f32, Shape{3, 4}); - // the second dim of rois must be 5. [batch_id, x_1, y_1, x_2, y_2] - ASSERT_THROW(const auto unused = make_shared(feat_maps, rois, Shape{2, 2}, 0.625f, "max"), - ngraph::NodeValidationFailure); +TEST_F(TypePropROIPoolingV0, incompatible_pooling_shape) { + const auto feat_maps = make_shared(element::f32, Shape{3, 2, 6, 6}); + const auto rois = make_shared(element::f32, PartialShape{3, 5}); + + OV_EXPECT_THROW(const auto op = make_op(feat_maps, rois, Shape{2, 2, 2}, spatial_scale, "max"), + NodeValidationFailure, + HasSubstr("The dimension of pooled size is expected to be equal to 2")); +} + +TEST_F(TypePropROIPoolingV0, incompatible_rois_second_dim) { + const auto feat_maps = make_shared(element::f32, Shape{3, 2, 6, 6}); + const auto rois = make_shared(element::f32, PartialShape{3, 4}); + + OV_EXPECT_THROW(const auto op = make_op(feat_maps, rois, pooling_roi_2x2, spatial_scale, "max"), + NodeValidationFailure, + HasSubstr("The second dimension of ROIs input should contain batch id and box coordinates. This " + "dimension is expected to be equal to 5")); } -TEST(type_prop, roi_pooling_incompatible_feature_maps_element_type) { - const auto feat_maps = make_shared(element::i32, Shape{3, 2, 6, 6}); - const auto rois = make_shared(element::f32, Shape{3, 5}); - // feat_maps element type must be floating point type - ASSERT_THROW(const auto unused = make_shared(feat_maps, rois, Shape{2, 2}, 0.625f, "max"), - ngraph::NodeValidationFailure); +TEST_F(TypePropROIPoolingV0, incompatible_feature_maps_element_type) { + const auto feat_maps = make_shared(element::i32, Shape{3, 2, 6, 6}); + const auto rois = make_shared(element::f32, PartialShape{3, 5}); + + OV_EXPECT_THROW(const auto op = make_op(feat_maps, rois, pooling_roi_2x2, spatial_scale, "max"), + NodeValidationFailure, + HasSubstr("The data type for input and ROIs is expected to be a floating point type")); } -TEST(type_prop, roi_pooling_incompatible_rois_element_type) { - const auto feat_maps = make_shared(element::f32, Shape{3, 2, 6, 6}); - const auto rois = make_shared(element::f16, Shape{3, 5}); - // rois element type must be equal to feat_maps element type (floating point type) - ASSERT_THROW(const auto unused = make_shared(feat_maps, rois, Shape{2, 2}, 0.625f, "bilinear"), - ngraph::NodeValidationFailure); +TEST_F(TypePropROIPoolingV0, incompatible_rois_element_type) { + const auto feat_maps = make_shared(element::f32, Shape{3, 2, 6, 6}); + const auto rois = make_shared(element::i16, PartialShape{3, 5}); + + OV_EXPECT_THROW(const auto op = make_op(feat_maps, rois, pooling_roi_2x2, spatial_scale, "bilinear"), + NodeValidationFailure, + HasSubstr("The data type for input and ROIs is expected to be a floating point type")); } -TEST(type_prop, roi_pooling_invalid_pooling_method) { - const auto feat_maps = make_shared(element::f32, Shape{3, 2, 6, 6}); - const auto rois = make_shared(element::f16, Shape{3, 5}); - // ROIPooling method is invalid: not max nor bilinear - ASSERT_THROW(const auto unused = make_shared(feat_maps, rois, Shape{2, 2}, 0.625f, "invalid"), - ngraph::NodeValidationFailure); +TEST_F(TypePropROIPoolingV0, invalid_pooling_method) { + const auto feat_maps = make_shared(element::f32, Shape{3, 2, 6, 6}); + const auto rois = make_shared(element::f32, PartialShape{3, 5}); + + OV_EXPECT_THROW(const auto op = make_op(feat_maps, rois, pooling_roi_2x2, spatial_scale, "invalid"), + NodeValidationFailure, + HasSubstr("Pooling method attribute should be either \'max\' or \'bilinear\'")); } -TEST(type_prop, roi_pooling_invalid_spatial_scale) { - const auto feat_maps = make_shared(element::f32, Shape{3, 2, 6, 6}); - const auto rois = make_shared(element::f16, Shape{3, 5}); - // ROIPooling spatial scale attribute must be a positive floating point number - ASSERT_THROW(const auto unused = make_shared(feat_maps, rois, Shape{2, 2}, -0.625f, "max"), - ngraph::NodeValidationFailure); +TEST_F(TypePropROIPoolingV0, invalid_spatial_scale) { + const auto feat_maps = make_shared(element::f32, Shape{3, 2, 6, 6}); + const auto rois = make_shared(element::f32, PartialShape{3, 5}); + + OV_EXPECT_THROW(const auto op = make_op(feat_maps, rois, pooling_roi_2x2, -1.0f), + NodeValidationFailure, + HasSubstr("The spatial scale attribute should be a positive floating point number")); } -TEST(type_prop, roi_pooling_invalid_pooled_size) { - const auto feat_maps = make_shared(element::f32, Shape{3, 2, 6, 6}); - const auto rois = make_shared(element::f16, Shape{3, 5}); - // ROIPooling pooled_h and pooled_w must be non-negative integers - ASSERT_THROW(const auto unused = make_shared(feat_maps, rois, Shape{1, 0}, 0.625f, "max"), - ngraph::NodeValidationFailure); +TEST_F(TypePropROIPoolingV0, invalid_pooled_size) { + const auto feat_maps = make_shared(element::f32, Shape{3, 2, 6, 6}); + const auto rois = make_shared(element::f32, PartialShape{3, 5}); + + OV_EXPECT_THROW(const auto op = make_op(feat_maps, rois, Shape{1, 0}, spatial_scale), + NodeValidationFailure, + HasSubstr("Pooled size attributes pooled_h and pooled_w should should be positive integers")); } diff --git a/src/core/tests/visitors/op/interpolate.cpp b/src/core/tests/visitors/op/interpolate.cpp index 23e0178e5a8b2e..10fc680896b32d 100644 --- a/src/core/tests/visitors/op/interpolate.cpp +++ b/src/core/tests/visitors/op/interpolate.cpp @@ -81,7 +81,7 @@ TEST(attributes, interpolate_op4) { TEST(attributes, interpolate_op11) { NodeBuilder::get_ops().register_factory(); const auto img = make_shared(element::f32, Shape{1, 3, 32, 32}); - const auto scales = op::v0::Constant::create(element::f32, {1}, {1.0}); + const auto scales = op::v0::Constant::create(element::f32, {4}, {1.0, 1.0, 2.0, 2.0}); op::v11::Interpolate::InterpolateAttrs attrs; attrs.mode = op::v11::Interpolate::InterpolateMode::BILINEAR_PILLOW; diff --git a/src/core/tests/visitors/op/roi_pooling.cpp b/src/core/tests/visitors/op/roi_pooling.cpp index 8438a797728eb1..a5b49fe9cca3d2 100644 --- a/src/core/tests/visitors/op/roi_pooling.cpp +++ b/src/core/tests/visitors/op/roi_pooling.cpp @@ -25,7 +25,7 @@ TEST(attributes, roi_pooling_op) { NodeBuilder builder(op, {data, coords}); const auto g_op = ov::as_type_ptr(builder.create()); - EXPECT_EQ(g_op->get_output_size(), op->get_output_size()); + EXPECT_EQ(g_op->get_output_roi(), op->get_output_roi()); EXPECT_EQ(g_op->get_spatial_scale(), op->get_spatial_scale()); EXPECT_EQ(g_op->get_method(), op->get_method()); } diff --git a/src/frontends/ir/src/ir_deserializer.cpp b/src/frontends/ir/src/ir_deserializer.cpp index 72eaeb9b07acf4..0a4ece0aae5e63 100644 --- a/src/frontends/ir/src/ir_deserializer.cpp +++ b/src/frontends/ir/src/ir_deserializer.cpp @@ -771,7 +771,7 @@ std::shared_ptr XmlDeserializer::create_node( const std::string& type_name = translate_type_name(params.type); std::shared_ptr ngraphNode; - ov::DiscreteTypeInfo type(type_name.c_str(), 0, params.version.c_str()); + ov::DiscreteTypeInfo type(type_name.c_str(), params.version.c_str()); auto extensionIt = m_extensions.find(type); if (extensionIt != m_extensions.end()) { @@ -885,7 +885,7 @@ std::shared_ptr XmlDeserializer::create_node( item.print(ss); IE_THROW() << "rt_info attribute: " << attribute_name << " has no \"version\" field: " << ss.str(); } - const auto& type_info = ov::DiscreteTypeInfo(attribute_name.c_str(), 0, attribute_version.c_str()); + const auto& type_info = ov::DiscreteTypeInfo(attribute_name.c_str(), attribute_version.c_str()); auto attr = attrs_factory.create_by_type_info(type_info); if (!attr.empty()) { if (attr.is()) { diff --git a/src/frontends/onnx/frontend/CMakeLists.txt b/src/frontends/onnx/frontend/CMakeLists.txt index 2edeaae86ec594..db84dae67ddb36 100644 --- a/src/frontends/onnx/frontend/CMakeLists.txt +++ b/src/frontends/onnx/frontend/CMakeLists.txt @@ -2,12 +2,6 @@ # SPDX-License-Identifier: Apache-2.0 # -if(CMAKE_CXX_COMPILER_ID STREQUAL "MSVC") - ie_add_compiler_flags(/wd4267) - ie_add_compiler_flags(/wd4018) - ie_add_compiler_flags(/wd4244) -endif() - ov_add_frontend(NAME onnx LINKABLE_FRONTEND PROTOBUF_LITE diff --git a/src/frontends/onnx/frontend/src/core/graph.cpp b/src/frontends/onnx/frontend/src/core/graph.cpp index e940c172e87eec..5d46da8805efe1 100644 --- a/src/frontends/onnx/frontend/src/core/graph.cpp +++ b/src/frontends/onnx/frontend/src/core/graph.cpp @@ -328,7 +328,7 @@ std::shared_ptr Graph::create_function() { const auto& onnx_outputs = m_model->get_graph().output(); for (std::size_t i{0}; i < function->get_output_size(); ++i) { const auto& result_node = function->get_output_op(i); - const std::string onnx_output_name = onnx_outputs.Get(i).name(); + const std::string onnx_output_name = onnx_outputs.Get(static_cast(i)).name(); result_node->set_friendly_name(onnx_output_name + "/sink_port_0"); const auto& previous_operation = result_node->get_input_node_shared_ptr(0); previous_operation->set_friendly_name(onnx_output_name); @@ -386,7 +386,7 @@ OutputVector Graph::make_ng_nodes(const Node& onnx_node) { const size_t outputs_size = std::accumulate(std::begin(ng_subgraph_outputs), std::end(ng_subgraph_outputs), - 0, + static_cast(0), [](const size_t lhs, const Output& rhs) { return lhs + rhs.get_node()->get_output_size(); }); @@ -420,10 +420,11 @@ void Graph::set_friendly_names(const Node& onnx_node, const OutputVector& ng_sub const auto common_node = detail::common_node_for_all_outputs(ng_subgraph_outputs); - for (size_t i = 0; i < ng_subgraph_outputs.size(); ++i) { + const auto ng_subgraph_output_size = static_cast(ng_subgraph_outputs.size()); + for (int i = 0; i < ng_subgraph_output_size; ++i) { // Trailing optional outputs may not be specified in the ONNX model. // Other optional outputs should have name set to an empty string. - if (i >= onnx_node.get_outputs_size()) { + if (i >= static_cast(onnx_node.get_outputs_size())) { break; } diff --git a/src/frontends/onnx/frontend/src/editor.cpp b/src/frontends/onnx/frontend/src/editor.cpp index 56ba992a5c882b..af968ac16572d5 100644 --- a/src/frontends/onnx/frontend/src/editor.cpp +++ b/src/frontends/onnx/frontend/src/editor.cpp @@ -213,9 +213,10 @@ void graph_topological_sort(GraphProto* graph) { std::multimap output_name_to_node; GraphProto result; - for (int i = 0; i < graph->node().size(); ++i) { + const auto nodes_number = static_cast(graph->node().size()); + for (int i = 0; i < nodes_number; ++i) { for (const auto& output_name : graph->node(i).output()) { - output_name_to_node.emplace(output_name, graph->mutable_node(static_cast(i))); + output_name_to_node.emplace(output_name, graph->mutable_node(i)); } } auto get_node_by_out_name = [&output_name_to_node](const std::string& out_name) -> const NodeProto* { diff --git a/src/frontends/onnx/frontend/src/op/hardmax.cpp b/src/frontends/onnx/frontend/src/op/hardmax.cpp index eb9b421cd71ed6..5e726e2458b55d 100644 --- a/src/frontends/onnx/frontend/src/op/hardmax.cpp +++ b/src/frontends/onnx/frontend/src/op/hardmax.cpp @@ -4,6 +4,8 @@ #include "op/hardmax.hpp" +#include + #include "exceptions.hpp" #include "ngraph/builder/reshape.hpp" #include "ngraph/op/one_hot.hpp" @@ -37,11 +39,11 @@ OutputVector hardmax(const Node& node) { const auto indices_axis = 1; const auto topk = - std::make_shared(coerced_tensor, - default_opset::Constant::create(ngraph::element::i64, Shape{}, {1}), - indices_axis, - default_opset::TopK::Mode::MAX, - default_opset::TopK::SortType::NONE); + std::make_shared(coerced_tensor, + default_opset::Constant::create(ngraph::element::i64, Shape{}, {1}), + indices_axis, + ov::opset11::TopK::Mode::MAX, + ov::opset11::TopK::SortType::NONE); const auto on_value = default_opset::Constant::create(ngraph::element::i64, Shape{}, {1}); const auto off_value = default_opset::Constant::create(ngraph::element::i64, Shape{}, {0}); @@ -71,11 +73,11 @@ OutputVector hardmax(const Node& node) { row_size = ngraph::onnx_import::reshape::interpret_as_scalar(row_size); const auto topk = - std::make_shared(input, - default_opset::Constant::create(ngraph::element::i64, Shape{}, {1}), - axis, - default_opset::TopK::Mode::MAX, - default_opset::TopK::SortType::NONE); + std::make_shared(input, + default_opset::Constant::create(ngraph::element::i64, Shape{}, {1}), + axis, + ov::opset11::TopK::Mode::MAX, + ov::opset11::TopK::SortType::NONE); const auto on_value = default_opset::Constant::create(ngraph::element::i64, Shape{}, {1}); const auto off_value = default_opset::Constant::create(ngraph::element::i64, Shape{}, {0}); diff --git a/src/frontends/onnx/frontend/src/op/roi_align.cpp b/src/frontends/onnx/frontend/src/op/roi_align.cpp index 9b6959e126c38a..6773806fe993a7 100644 --- a/src/frontends/onnx/frontend/src/op/roi_align.cpp +++ b/src/frontends/onnx/frontend/src/op/roi_align.cpp @@ -68,9 +68,9 @@ OutputVector roi_align(const Node& node) { return {std::make_shared(data, rois, num_rois, - pooled_h, - pooled_w, - sampling_ratio, + static_cast(pooled_h), + static_cast(pooled_w), + static_cast(sampling_ratio), spatial_scale, pooling_mode, aligned_mode)}; diff --git a/src/frontends/onnx/frontend/src/op/topk.cpp b/src/frontends/onnx/frontend/src/op/topk.cpp index df884eaa54c0a9..dc33b103613df2 100644 --- a/src/frontends/onnx/frontend/src/op/topk.cpp +++ b/src/frontends/onnx/frontend/src/op/topk.cpp @@ -6,6 +6,7 @@ #include #include +#include #include "default_opset.hpp" #include "ngraph/node.hpp" @@ -37,13 +38,12 @@ OutputVector topk(const Node& node) { const auto k_node = node.get_attribute_as_constant("k"); const std::int64_t axis{node.get_attribute_value("axis", -1)}; - std::shared_ptr top_k = - std::make_shared(data, - k_node, - axis, - default_opset::TopK::Mode::MAX, - default_opset::TopK::SortType::SORT_VALUES, - element::i64); + std::shared_ptr top_k = std::make_shared(data, + k_node, + axis, + ov::opset11::TopK::Mode::MAX, + ov::opset11::TopK::SortType::SORT_VALUES, + element::i64); return {top_k->output(0), top_k->output(1)}; } @@ -55,13 +55,12 @@ OutputVector topk(const Node& node) { auto k = get_k(node); const std::int64_t axis{node.get_attribute_value("axis", -1)}; - std::shared_ptr top_k = - std::make_shared(data, - k, - axis, - default_opset::TopK::Mode::MAX, - default_opset::TopK::SortType::SORT_VALUES, - element::i64); + std::shared_ptr top_k = std::make_shared(data, + k, + axis, + ov::opset11::TopK::Mode::MAX, + ov::opset11::TopK::SortType::SORT_VALUES, + element::i64); return {top_k->output(0), top_k->output(1)}; } @@ -79,13 +78,13 @@ OutputVector topk(const Node& node) { const auto sorted = node.get_attribute_value("sorted", 1); // Map attribute values to nGraph enums - const auto sort_type = sorted ? default_opset::TopK::SortType::SORT_VALUES : default_opset::TopK::SortType::NONE; + const auto sort_type = sorted ? ov::opset11::TopK::SortType::SORT_VALUES : ov::opset11::TopK::SortType::NONE; const auto compute_max = static_cast(largest); - const auto mode = compute_max ? default_opset::TopK::Mode::MAX : default_opset::TopK::Mode::MIN; + const auto mode = compute_max ? ov::opset11::TopK::Mode::MAX : ov::opset11::TopK::Mode::MIN; std::shared_ptr top_k = - std::make_shared(data, k, axis, mode, sort_type, element::i64); + std::make_shared(data, k, axis, mode, sort_type, element::i64); return {top_k->output(0), top_k->output(1)}; } diff --git a/src/frontends/onnx/frontend/src/place.cpp b/src/frontends/onnx/frontend/src/place.cpp index 6b67cf7efcfd03..3430bf4e6b56cb 100644 --- a/src/frontends/onnx/frontend/src/place.cpp +++ b/src/frontends/onnx/frontend/src/place.cpp @@ -317,8 +317,8 @@ ov::frontend::Place::Ptr PlaceOp::get_input_port(const std::string& input_name) std::vector PlaceOp::get_consuming_ports() const { std::vector consuming_ports; - const auto out_ports_number = m_editor->get_output_ports(m_node).size(); - for (size_t out_idx = 0; out_idx < out_ports_number; ++out_idx) { + const auto out_ports_number = static_cast(m_editor->get_output_ports(m_node).size()); + for (int out_idx = 0; out_idx < out_ports_number; ++out_idx) { auto consuming_ops_out = get_output_port(out_idx)->get_consuming_ports(); consuming_ports.insert(consuming_ports.end(), consuming_ops_out.begin(), consuming_ops_out.end()); } diff --git a/src/frontends/onnx/frontend/src/utils/arg_min_max_factory.cpp b/src/frontends/onnx/frontend/src/utils/arg_min_max_factory.cpp index d5a3fdb827061f..9807367273e46f 100644 --- a/src/frontends/onnx/frontend/src/utils/arg_min_max_factory.cpp +++ b/src/frontends/onnx/frontend/src/utils/arg_min_max_factory.cpp @@ -4,6 +4,8 @@ #include "utils/arg_min_max_factory.hpp" +#include + #include "default_opset.hpp" #include "ngraph/opsets/opset1.hpp" #include "ngraph/validation_util.hpp" @@ -18,14 +20,14 @@ ArgMinMaxFactory::ArgMinMaxFactory(const Node& node) m_select_last_index{node.get_attribute_value("select_last_index", 0)} {} std::shared_ptr ArgMinMaxFactory::make_arg_max() const { - return make_topk_subgraph(default_opset::TopK::Mode::MAX); + return make_topk_subgraph(ov::opset11::TopK::Mode::MAX); } std::shared_ptr ArgMinMaxFactory::make_arg_min() const { - return make_topk_subgraph(default_opset::TopK::Mode::MIN); + return make_topk_subgraph(ov::opset11::TopK::Mode::MIN); } -std::shared_ptr ArgMinMaxFactory::make_topk_subgraph(default_opset::TopK::Mode mode) const { +std::shared_ptr ArgMinMaxFactory::make_topk_subgraph(ov::opset11::TopK::Mode mode) const { const auto k_node = default_opset::Constant::create(ngraph::element::i64, Shape{}, {1}); if (m_select_last_index == 1) { @@ -59,11 +61,11 @@ std::shared_ptr ArgMinMaxFactory::make_topk_subgraph(default_opset const auto axis_node = default_opset::Constant::create(ngraph::element::i64, Shape{1}, {normalized_axis}); const auto reverse = std::make_shared(m_input_node, axis_node, opset1::Reverse::Mode::INDEX); - const auto topk = std::make_shared(reverse, - k_node, - normalized_axis, - mode, - default_opset::TopK::SortType::NONE); + const auto topk = std::make_shared(reverse, + k_node, + normalized_axis, + mode, + ov::opset11::TopK::SortType::NONE); const auto data_shape = std::make_shared(m_input_node); const auto dims_on_axis = std::make_shared( @@ -88,7 +90,7 @@ std::shared_ptr ArgMinMaxFactory::make_topk_subgraph(default_opset } const auto topk = - std::make_shared(m_input_node, k_node, m_axis, mode, default_opset::TopK::SortType::NONE); + std::make_shared(m_input_node, k_node, m_axis, mode, ov::opset11::TopK::SortType::NONE); const auto result = std::make_shared(topk->output(1), element::i64); diff --git a/src/frontends/onnx/tests/CMakeLists.txt b/src/frontends/onnx/tests/CMakeLists.txt index ac41687b9b47f2..b6fed5f851ae43 100644 --- a/src/frontends/onnx/tests/CMakeLists.txt +++ b/src/frontends/onnx/tests/CMakeLists.txt @@ -6,13 +6,6 @@ set(CMAKE_INTERPROCEDURAL_OPTIMIZATION_RELEASE OFF) ov_try_use_gold_linker() -if(CMAKE_CXX_COMPILER_ID STREQUAL "MSVC") - ie_add_compiler_flags(/wd4244) - ie_add_compiler_flags(/wd4267) - ie_add_compiler_flags(/wd4305) - ie_add_compiler_flags(/wd4756) -endif() - message(STATUS "ONNX frontend test enabled") add_compile_definitions( diff --git a/src/frontends/onnx/tests/onnx_import.in.cpp b/src/frontends/onnx/tests/onnx_import.in.cpp index 91ce448c02c06f..b49861e1806fa7 100644 --- a/src/frontends/onnx/tests/onnx_import.in.cpp +++ b/src/frontends/onnx/tests/onnx_import.in.cpp @@ -469,7 +469,7 @@ NGRAPH_TEST(${BACKEND_NAME}, onnx_expand_function_greater_or_equal_inside_if) { // expected value == x * y std::vector x(40, 2); std::vector y(40); - std::iota(y.begin(), y.end(), -20); + std::iota(y.begin(), y.end(), -20.f); std::vector expected; std::transform(x.begin(), x.end(), y.begin(), std::back_inserter(expected), [](float i, float j) -> float { return i * j; @@ -489,21 +489,21 @@ NGRAPH_TEST(${BACKEND_NAME}, onnx_expand_context_dependent_function) { auto test_case = test::TestCase(function, s_device); test_case.add_input(Shape{3, 5}, - {0.54881352186203, - 0.7151893377304077, - 0.6027633547782898, - 0.5448831915855408, - 0.42365479469299316, - 0.6458941102027893, - 0.4375872015953064, - 0.891772985458374, - 0.9636627435684204, - 0.3834415078163147, - 0.7917250394821167, - 0.5288949012756348, - 0.5680445432662964, - 0.9255966544151306, - 0.07103605568408966}); + {0.54881352186203f, + 0.7151893377304077f, + 0.6027633547782898f, + 0.5448831915855408f, + 0.42365479469299316f, + 0.6458941102027893f, + 0.4375872015953064f, + 0.891772985458374f, + 0.9636627435684204f, + 0.3834415078163147f, + 0.7917250394821167f, + 0.5288949012756348f, + 0.5680445432662964f, + 0.9255966544151306f, + 0.07103605568408966f}); test_case.add_input(Shape{3}, {1, 4, 3}); test_case.add_expected_output(Shape{}, {1}); test_case.run(); @@ -797,27 +797,27 @@ NGRAPH_TEST(${BACKEND_NAME}, onnx_model_softmax_1D) { auto test_case = test::TestCase(function, s_device); test_case.add_input({-1.0, 0.0, 1.0}); - test_case.add_expected_output({0.09003058, 0.24472848, 0.66524094}); + test_case.add_expected_output({0.09003058f, 0.24472848f, 0.66524094f}); test_case.run(); } namespace { // common input for all Softmax 3D test cases (Shape = {3,4,5}) // clang-format off const std::vector SOFTMAX_INPUT = { - 2.75793882, -0.50841322, 0.82013929, -0.62409912, -0.96136118, - 0.21004745, 1.38337255, 1.19030397, 2.0940445, -0.03551657, - -0.78686039, 1.992782, 0.04300319, -0.29230777, -0.56797112, - -1.26732165, -0.61935399, 0.57670432, 0.92844898, 2.82469233, - - 0.98721677, -0.05100663, -1.21178917, -0.17530157, 1.40051805, - -0.13259761, -1.14313018, 0.2673723, -0.87996154, 1.29053106, - 1.55, 0.8396538, 1.20729817, 0.23727845, -0.89113606, - -1.70909842, 0.26460363, -0.70566808, 2.383518, 1.07024615, - - -1.21722605, 0.82919357, 0.55765697, 0.12657686, 0.63432172, - 0.75425957, -2.43721014, -1.24478184, 2.65316853, 1.19509542, - -0.95523998, 0.5149006, -0.01151649, 0.68327026, -0.4589638, - -0.46554745, 0.21055324, 0.39266729, 2.05098086, 1.83207919}; + 2.75793882f, -0.50841322f, 0.82013929f, -0.62409912f, -0.96136118f, + 0.21004745f, 1.38337255f, 1.19030397f, 2.0940445f, -0.03551657f, + -0.78686039f, 1.992782f, 0.04300319f, -0.29230777f, -0.56797112f, + -1.26732165f, -0.61935399f, 0.57670432f, 0.92844898f, 2.82469233f, + + 0.98721677f, -0.05100663f, -1.21178917f, -0.17530157f, 1.40051805f, + -0.13259761f, -1.14313018f, 0.2673723f, -0.87996154f, 1.29053106f, + 1.55f, 0.8396538f, 1.20729817f, 0.23727845f, -0.89113606f, + -1.70909842f, 0.26460363f, -0.70566808f, 2.383518f, 1.07024615f, + + -1.21722605f, 0.82919357f, 0.55765697f, 0.12657686f, 0.63432172f, + 0.75425957f, -2.43721014f, -1.24478184f, 2.65316853f, 1.19509542f, + -0.95523998f, 0.5149006f, -0.01151649f, 0.68327026f, -0.4589638f, + -0.46554745f, 0.21055324f, 0.39266729f, 2.05098086f, 1.83207919f}; } // namespace // clang-format on @@ -831,20 +831,20 @@ NGRAPH_TEST(${BACKEND_NAME}, onnx_model_softmax_axis_0) { // clang-format off test_case.add_expected_output( Shape{3, 4, 5}, - {0.09683057, 0.00369363, 0.01394559, 0.00329012, 0.00234823, - 0.00757665, 0.02449322, 0.02019284, 0.04985249, 0.00592694, - 0.00279593, 0.04505148, 0.00641108, 0.00458466, 0.00348007, - 0.00172928, 0.00330577, 0.01093237, 0.01554086, 0.10351497, - - 0.01648154, 0.00583583, 0.00182802, 0.00515374, 0.02491679, - 0.00537859, 0.00195794, 0.00802367, 0.00254737, 0.0223216, - 0.02893419, 0.0142204, 0.02053893, 0.00778581, 0.00251907, - 0.00111174, 0.00800149, 0.0030324, 0.06658917, 0.0179084, - - 0.00181811, 0.01407243, 0.01072611, 0.0069699, 0.01158077, - 0.01305647, 0.00053677, 0.0017687, 0.08719896, 0.02028982, - 0.00236265, 0.01027717, 0.0060709, 0.01216173, 0.00388087, - 0.00385541, 0.00758048, 0.00909469, 0.04775123, 0.03836337}); + {0.09683057f, 0.00369363f, 0.01394559f, 0.00329012f, 0.00234823f, + 0.00757665f, 0.02449322f, 0.02019284f, 0.04985249f, 0.00592694f, + 0.00279593f, 0.04505148f, 0.00641108f, 0.00458466f, 0.00348007f, + 0.00172928f, 0.00330577f, 0.01093237f, 0.01554086f, 0.10351497f, + + 0.01648154f, 0.00583583f, 0.00182802f, 0.00515374f, 0.02491679f, + 0.00537859f, 0.00195794f, 0.00802367f, 0.00254737f, 0.0223216f, + 0.02893419f, 0.0142204f, 0.02053893f, 0.00778581f, 0.00251907f, + 0.00111174f, 0.00800149f, 0.0030324f, 0.06658917f, 0.0179084f, + + 0.00181811f, 0.01407243f, 0.01072611f, 0.0069699f, 0.01158077f, + 0.01305647f, 0.00053677f, 0.0017687f, 0.08719896f, 0.02028982f, + 0.00236265f, 0.01027717f, 0.0060709f, 0.01216173f, 0.00388087f, + 0.00385541f, 0.00758048f, 0.00909469f, 0.04775123f, 0.03836337f}); // clang-format on test_case.run(6); @@ -860,20 +860,20 @@ NGRAPH_TEST(${BACKEND_NAME}, onnx_model_softmax_axis_1) { // clang-format off test_case.add_expected_output( Shape{3, 4, 5}, - {0.22757064, 0.00868076, 0.03277484, 0.00773243, 0.0055188, - 0.0178066, 0.05756383, 0.04745709, 0.11716303, 0.01392945, - 0.00657097, 0.10587974, 0.01506727, 0.01077484, 0.00817884, - 0.00406413, 0.00776921, 0.0256932, 0.03652405, 0.24328028, - - 0.06217413, 0.02201481, 0.00689594, 0.01944171, 0.09399488, - 0.02028993, 0.00738604, 0.03026811, 0.00960958, 0.08420492, - 0.10914991, 0.05364435, 0.07748005, 0.02937079, 0.0095028, - 0.00419387, 0.03018442, 0.01143929, 0.2511977, 0.06755678, - - 0.00587593, 0.04548053, 0.0346656, 0.02252594, 0.03742775, - 0.04219705, 0.00173478, 0.00571623, 0.2818174, 0.06557446, - 0.00763582, 0.03321466, 0.01962049, 0.03930537, 0.01254255, - 0.01246025, 0.02449929, 0.02939305, 0.15432668, 0.12398617}); + {0.22757064f, 0.00868076f, 0.03277484f, 0.00773243f, 0.0055188f, + 0.0178066f, 0.05756383f, 0.04745709f, 0.11716303f, 0.01392945f, + 0.00657097f, 0.10587974f, 0.01506727f, 0.01077484f, 0.00817884f, + 0.00406413f, 0.00776921f, 0.0256932f, 0.03652405f, 0.24328028f, + + 0.06217413f, 0.02201481f, 0.00689594f, 0.01944171f, 0.09399488f, + 0.02028993f, 0.00738604f, 0.03026811f, 0.00960958f, 0.08420492f, + 0.10914991f, 0.05364435f, 0.07748005f, 0.02937079f, 0.0095028f, + 0.00419387f, 0.03018442f, 0.01143929f, 0.2511977f, 0.06755678f, + + 0.00587593f, 0.04548053f, 0.0346656f, 0.02252594f, 0.03742775f, + 0.04219705f, 0.00173478f, 0.00571623f, 0.2818174f, 0.06557446f, + 0.00763582f, 0.03321466f, 0.01962049f, 0.03930537f, 0.01254255f, + 0.01246025f, 0.02449929f, 0.02939305f, 0.15432668f, 0.12398617f}); // clang-format on test_case.run(4); @@ -890,20 +890,20 @@ NGRAPH_TEST(${BACKEND_NAME}, onnx_model_softmax_axis_1_opset11) { // clang-format off test_case.add_expected_output( Shape{3, 4, 5}, - {0.88890495, 0.04825497, 0.27088348, 0.04490523, 0.02037154, - 0.06955369, 0.31998834, 0.39223197, 0.68041159, 0.05141776, - 0.02566661, 0.5885689, 0.12453075, 0.06257374, 0.03019055, - 0.01587475, 0.0431878, 0.21235381, 0.21210944, 0.89802015, - - 0.31752626, 0.19442629, 0.0546935, 0.06279221, 0.36823282, - 0.10362164, 0.06523066, 0.24006419, 0.03103672, 0.32987983, - 0.55743381, 0.473766, 0.61451431, 0.09486084, 0.03722801, - 0.02141829, 0.26657706, 0.090728, 0.81131024, 0.26465935, - - 0.08619648, 0.43343993, 0.3877785, 0.04523505, 0.15625437, - 0.61900597, 0.01653285, 0.06394322, 0.56592636, 0.27376196, - 0.11201305, 0.31654337, 0.21947994, 0.07893034, 0.05236297, - 0.18278451, 0.23348385, 0.32879834, 0.30990825, 0.5176207}); + {0.88890495f, 0.04825497f, 0.27088348f, 0.04490523f, 0.02037154f, + 0.06955369f, 0.31998834f, 0.39223197f, 0.68041159f, 0.05141776f, + 0.02566661f, 0.5885689f, 0.12453075f, 0.06257374f, 0.03019055f, + 0.01587475f, 0.0431878f, 0.21235381f, 0.21210944f, 0.89802015f, + + 0.31752626f, 0.19442629f, 0.0546935f, 0.06279221f, 0.36823282f, + 0.10362164f, 0.06523066f, 0.24006419f, 0.03103672f, 0.32987983f, + 0.55743381f, 0.473766f, 0.61451431f, 0.09486084f, 0.03722801f, + 0.02141829f, 0.26657706f, 0.090728f, 0.81131024f, 0.26465935f, + + 0.08619648f, 0.43343993f, 0.3877785f, 0.04523505f, 0.15625437f, + 0.61900597f, 0.01653285f, 0.06394322f, 0.56592636f, 0.27376196f, + 0.11201305f, 0.31654337f, 0.21947994f, 0.07893034f, 0.05236297f, + 0.18278451f, 0.23348385f, 0.32879834f, 0.30990825f, 0.5176207f}); // clang-format on test_case.run(4); @@ -920,20 +920,20 @@ NGRAPH_TEST(${BACKEND_NAME}, onnx_model_softmax_axis_negative_1_opset11) { // clang-format off test_case.add_expected_output( Shape{3, 4, 5}, - {0.80619484, 0.03075256, 0.1161086, 0.027393, 0.01955098, - 0.07012683, 0.22670066, 0.18689778, 0.4614171, 0.05485764, - 0.04486171, 0.7228683, 0.10286818, 0.07356264, 0.05583908, - 0.01280724, 0.02448298, 0.08096659, 0.11509769, 0.76664555, - - 0.30399805, 0.10764059, 0.03371745, 0.09505949, 0.4595844, - 0.13369875, 0.04866969, 0.19944906, 0.0633215, 0.554861, - 0.39101103, 0.19217177, 0.27755913, 0.10521588, 0.03404216, - 0.01150354, 0.08279411, 0.03137731, 0.6890207, 0.18530433, - - 0.0402528, 0.31156224, 0.23747502, 0.15431291, 0.25639707, - 0.10627912, 0.00436928, 0.01439711, 0.7097961, 0.16515835, - 0.06798343, 0.29571748, 0.17468554, 0.34994435, 0.11166911, - 0.03615172, 0.07108136, 0.08527993, 0.4477579, 0.35972902}); + {0.80619484f, 0.03075256f, 0.1161086f, 0.027393f, 0.01955098f, + 0.07012683f, 0.22670066f, 0.18689778f, 0.4614171f, 0.05485764f, + 0.04486171f, 0.7228683f, 0.10286818f, 0.07356264f, 0.05583908f, + 0.01280724f, 0.02448298f, 0.08096659f, 0.11509769f, 0.76664555f, + + 0.30399805f, 0.10764059f, 0.03371745f, 0.09505949f, 0.4595844f, + 0.13369875f, 0.04866969f, 0.19944906f, 0.0633215f, 0.554861f, + 0.39101103f, 0.19217177f, 0.27755913f, 0.10521588f, 0.03404216f, + 0.01150354f, 0.08279411f, 0.03137731f, 0.6890207f, 0.18530433f, + + 0.0402528f, 0.31156224f, 0.23747502f, 0.15431291f, 0.25639707f, + 0.10627912f, 0.00436928f, 0.01439711f, 0.7097961f, 0.16515835f, + 0.06798343f, 0.29571748f, 0.17468554f, 0.34994435f, 0.11166911f, + 0.03615172f, 0.07108136f, 0.08527993f, 0.4477579f, 0.35972902f}); // clang-format on test_case.run(6); @@ -950,20 +950,20 @@ NGRAPH_TEST(${BACKEND_NAME}, onnx_model_softmax_axis_negative_1_opset13) { // clang-format off test_case.add_expected_output( Shape{3, 4, 5}, - {0.80619484, 0.03075256, 0.1161086, 0.027393, 0.01955098, - 0.07012683, 0.22670066, 0.18689778, 0.4614171, 0.05485764, - 0.04486171, 0.7228683, 0.10286818, 0.07356264, 0.05583908, - 0.01280724, 0.02448298, 0.08096659, 0.11509769, 0.76664555, - - 0.30399805, 0.10764059, 0.03371745, 0.09505949, 0.4595844, - 0.13369875, 0.04866969, 0.19944906, 0.0633215, 0.554861, - 0.39101103, 0.19217177, 0.27755913, 0.10521588, 0.03404216, - 0.01150354, 0.08279411, 0.03137731, 0.6890207, 0.18530433, - - 0.0402528, 0.31156224, 0.23747502, 0.15431291, 0.25639707, - 0.10627912, 0.00436928, 0.01439711, 0.7097961, 0.16515835, - 0.06798343, 0.29571748, 0.17468554, 0.34994435, 0.11166911, - 0.03615172, 0.07108136, 0.08527993, 0.4477579, 0.35972902}); + {0.80619484f, 0.03075256f, 0.1161086f, 0.027393f, 0.01955098f, + 0.07012683f, 0.22670066f, 0.18689778f, 0.4614171f, 0.05485764f, + 0.04486171f, 0.7228683f, 0.10286818f, 0.07356264f, 0.05583908f, + 0.01280724f, 0.02448298f, 0.08096659f, 0.11509769f, 0.76664555f, + + 0.30399805f, 0.10764059f, 0.03371745f, 0.09505949f, 0.4595844f, + 0.13369875f, 0.04866969f, 0.19944906f, 0.0633215f, 0.554861f, + 0.39101103f, 0.19217177f, 0.27755913f, 0.10521588f, 0.03404216f, + 0.01150354f, 0.08279411f, 0.03137731f, 0.6890207f, 0.18530433f, + + 0.0402528f, 0.31156224f, 0.23747502f, 0.15431291f, 0.25639707f, + 0.10627912f, 0.00436928f, 0.01439711f, 0.7097961f, 0.16515835f, + 0.06798343f, 0.29571748f, 0.17468554f, 0.34994435f, 0.11166911f, + 0.03615172f, 0.07108136f, 0.08527993f, 0.4477579f, 0.35972902f}); // clang-format on test_case.run(6); @@ -1786,7 +1786,7 @@ NGRAPH_TEST(${BACKEND_NAME}, onnx_resize11_down_sizes_cubic_half_pixel) { test_case.add_input(input_data); test_case.add_expected_output( expected_output_shape, - {1.6307871, 3.0046299, 4.3784733, 7.1261587, 8.5, 9.873844, 12.621532, 13.995373, 15.369216}); + {1.6307871f, 3.0046299f, 4.3784733f, 7.1261587f, 8.5f, 9.873844f, 12.621532f, 13.995373f, 15.369216f}); test_case.run_with_tolerance_as_fp(2.0e-5f); } @@ -1848,18 +1848,18 @@ NGRAPH_TEST(${BACKEND_NAME}, onnx_resize11_up_sizes_cubic_half_pixel) { test_case.add_input(input_data); test_case.add_expected_output( expected_output_shape, - {0.45507922f, 0.64057922f, 0.97157922f, 1.42257922f, 1.90732922, 2.22332922f, 2.70807922f, 3.15907922f, - 3.49007922f, 3.67557922, 1.39437963f, 1.57987963f, 1.91087963f, 2.36187963f, 2.84662963, 3.16262963f, - 3.64737963f, 4.09837963f, 4.42937963f, 4.61487963, 2.95130693f, 3.13680693f, 3.46780693f, 3.91880693f, - 4.40355693, 4.71955693f, 5.20430693f, 5.65530693f, 5.98630693f, 6.17180693, 5.20525069f, 5.39075069f, - 5.72175069f, 6.17275069f, 6.65750069, 6.97350069f, 7.45825069f, 7.90925069f, 8.24025069f, 8.42575069, - 6.88975f, 7.07525f, 7.40625f, 7.85725f, 8.342, 8.658f, 9.14275f, 9.59375f, - 9.92475f, 10.11025f, 8.57424931f, 8.75974931f, 9.09074931f, 9.54174931f, 10.02649931, 10.34249931f, - 10.82724931f, 11.27824931f, 11.60924931f, 11.79474931, 10.82819307f, 11.01369307f, 11.34469307f, 11.79569307f, - 12.28044307, 12.59644307f, 13.08119307f, 13.53219307f, 13.86319307f, 14.04869307, 12.38512037f, 12.57062037f, - 12.90162037f, 13.35262037f, 13.83737037, 14.15337037f, 14.63812037f, 15.08912037f, 15.42012037f, 15.60562037, - 13.32442078f, 13.50992078f, 13.84092078f, 14.29192078f, 14.77667078, 15.09267078f, 15.57742078f, 16.02842078f, - 16.35942078f, 16.54492078}); + {0.45507922f, 0.64057922f, 0.97157922f, 1.42257922f, 1.90732922f, 2.22332922f, 2.70807922f, 3.15907922f, + 3.49007922f, 3.67557922f, 1.39437963f, 1.57987963f, 1.91087963f, 2.36187963f, 2.84662963f, 3.16262963f, + 3.64737963f, 4.09837963f, 4.42937963f, 4.61487963f, 2.95130693f, 3.13680693f, 3.46780693f, 3.91880693f, + 4.40355693f, 4.71955693f, 5.20430693f, 5.65530693f, 5.98630693f, 6.17180693f, 5.20525069f, 5.39075069f, + 5.72175069f, 6.17275069f, 6.65750069f, 6.97350069f, 7.45825069f, 7.90925069f, 8.24025069f, 8.42575069f, + 6.88975f, 7.07525f, 7.40625f, 7.85725f, 8.342f, 8.658f, 9.14275f, 9.59375f, + 9.92475f, 10.11025f, 8.57424931f, 8.75974931f, 9.09074931f, 9.54174931f, 10.02649931f, 10.34249931f, + 10.82724931f, 11.27824931f, 11.60924931f, 11.79474931f, 10.82819307f, 11.01369307f, 11.34469307f, 11.79569307f, + 12.28044307f, 12.59644307f, 13.08119307f, 13.53219307f, 13.86319307f, 14.04869307f, 12.38512037f, 12.57062037f, + 12.90162037f, 13.35262037f, 13.83737037f, 14.15337037f, 14.63812037f, 15.08912037f, 15.42012037f, 15.60562037f, + 13.32442078f, 13.50992078f, 13.84092078f, 14.29192078f, 14.77667078f, 15.09267078f, 15.57742078f, 16.02842078f, + 16.35942078f, 16.54492078f}); test_case.run_with_tolerance_as_fp(2.0e-5f); } @@ -1891,18 +1891,18 @@ NGRAPH_TEST(${BACKEND_NAME}, onnx_resize11_up_sizes_cubic_half_pixel_dynamic_siz test_case.add_input(std::vector{1, 1, 9, 10}); // sizes test_case.add_expected_output( expected_output_shape, - {0.45507922f, 0.64057922f, 0.97157922f, 1.42257922f, 1.90732922, 2.22332922f, 2.70807922f, 3.15907922f, - 3.49007922f, 3.67557922, 1.39437963f, 1.57987963f, 1.91087963f, 2.36187963f, 2.84662963, 3.16262963f, - 3.64737963f, 4.09837963f, 4.42937963f, 4.61487963, 2.95130693f, 3.13680693f, 3.46780693f, 3.91880693f, - 4.40355693, 4.71955693f, 5.20430693f, 5.65530693f, 5.98630693f, 6.17180693, 5.20525069f, 5.39075069f, - 5.72175069f, 6.17275069f, 6.65750069, 6.97350069f, 7.45825069f, 7.90925069f, 8.24025069f, 8.42575069, - 6.88975f, 7.07525f, 7.40625f, 7.85725f, 8.342, 8.658f, 9.14275f, 9.59375f, - 9.92475f, 10.11025f, 8.57424931f, 8.75974931f, 9.09074931f, 9.54174931f, 10.02649931, 10.34249931f, - 10.82724931f, 11.27824931f, 11.60924931f, 11.79474931, 10.82819307f, 11.01369307f, 11.34469307f, 11.79569307f, - 12.28044307, 12.59644307f, 13.08119307f, 13.53219307f, 13.86319307f, 14.04869307, 12.38512037f, 12.57062037f, - 12.90162037f, 13.35262037f, 13.83737037, 14.15337037f, 14.63812037f, 15.08912037f, 15.42012037f, 15.60562037, - 13.32442078f, 13.50992078f, 13.84092078f, 14.29192078f, 14.77667078, 15.09267078f, 15.57742078f, 16.02842078f, - 16.35942078f, 16.54492078}); + {0.45507922f, 0.64057922f, 0.97157922f, 1.42257922f, 1.90732922f, 2.22332922f, 2.70807922f, 3.15907922f, + 3.49007922f, 3.67557922f, 1.39437963f, 1.57987963f, 1.91087963f, 2.36187963f, 2.84662963f, 3.16262963f, + 3.64737963f, 4.09837963f, 4.42937963f, 4.61487963f, 2.95130693f, 3.13680693f, 3.46780693f, 3.91880693f, + 4.40355693f, 4.71955693f, 5.20430693f, 5.65530693f, 5.98630693f, 6.17180693f, 5.20525069f, 5.39075069f, + 5.72175069f, 6.17275069f, 6.65750069f, 6.97350069f, 7.45825069f, 7.90925069f, 8.24025069f, 8.42575069f, + 6.88975f, 7.07525f, 7.40625f, 7.85725f, 8.342f, 8.658f, 9.14275f, 9.59375f, + 9.92475f, 10.11025f, 8.57424931f, 8.75974931f, 9.09074931f, 9.54174931f, 10.02649931f, 10.34249931f, + 10.82724931f, 11.27824931f, 11.60924931f, 11.79474931f, 10.82819307f, 11.01369307f, 11.34469307f, 11.79569307f, + 12.28044307f, 12.59644307f, 13.08119307f, 13.53219307f, 13.86319307f, 14.04869307f, 12.38512037f, 12.57062037f, + 12.90162037f, 13.35262037f, 13.83737037f, 14.15337037f, 14.63812037f, 15.08912037f, 15.42012037f, 15.60562037f, + 13.32442078f, 13.50992078f, 13.84092078f, 14.29192078f, 14.77667078f, 15.09267078f, 15.57742078f, 16.02842078f, + 16.35942078f, 16.54492078f}); test_case.run_with_tolerance_as_fp(2.0e-5f); } @@ -2177,7 +2177,7 @@ NGRAPH_TEST(${BACKEND_NAME}, onnx_model_prelu_batch_nd_elementwise) { // Shape{2, 3, 4, 5} std::vector slope(shape_size(Shape{2, 3, 4, 5})); - std::iota(std::begin(slope), std::end(slope), 0); + std::iota(std::begin(slope), std::end(slope), 0.f); inputs.emplace_back(slope); // Shape{2, 3, 4, 5} @@ -2426,19 +2426,19 @@ NGRAPH_TEST(${BACKEND_NAME}, onnx_model_softplus) { -FLT_MAX}}; const auto inf = std::numeric_limits::infinity(); - std::vector output{0.3132616579532623291, - 0.6931471824645996094, - 1.313261628150939941, - 10.0000457763671875, - 100.0, - 0.0, - 1000.0, - 0.0, - 0.6931471824645996094, - 0.6931471824645996094, - 0.6931471824645996094, + std::vector output{0.3132616579532623291f, + 0.6931471824645996094f, + 1.313261628150939941f, + 10.0000457763671875f, + 100.0f, + 0.0f, + 1000.0f, + 0.0f, + 0.6931471824645996094f, + 0.6931471824645996094f, + 0.6931471824645996094f, inf, - 0.0}; + 0.0f}; auto test_case = test::TestCase(function, s_device); test_case.add_multiple_inputs(inputs); @@ -2507,7 +2507,7 @@ NGRAPH_TEST(${BACKEND_NAME}, onnx_model_argmax_float) { file_util::path_join(CommonTestUtils::getExecutableDirectory(), SERIALIZED_ZOO, "onnx/argmax_float.onnx")); auto test_case = test::TestCase(function, s_device); - test_case.add_input({4, 0.1, 2, 3, -3, 1, -0.9, 0, 1, 2, 3, 0}); + test_case.add_input({4.f, 0.1f, 2.f, 3.f, -3.f, 1.f, -0.9f, 0.f, 1.f, 2.f, 3.f, 0.f}); test_case.add_expected_output({0, 3, 0}); test_case.run(); } @@ -2517,7 +2517,7 @@ NGRAPH_TEST(${BACKEND_NAME}, onnx_model_argmin_float) { file_util::path_join(CommonTestUtils::getExecutableDirectory(), SERIALIZED_ZOO, "onnx/argmin_float.onnx")); auto test_case = test::TestCase(function, s_device); - test_case.add_input({4, 0.1, 2, 3, -3, 1, -0.9, 0, 1, 2, 3, 0}); + test_case.add_input({4.f, 0.1f, 2.f, 3.f, -3.f, 1.f, -0.9f, 0.f, 1.f, 2.f, 3.f, 0.f}); test_case.add_expected_output({1, 1, 0, 2}); test_case.run(); } @@ -2528,7 +2528,7 @@ NGRAPH_TEST(${BACKEND_NAME}, onnx_model_argmax_select_last_index) { "onnx/argmax_select_last_index.onnx")); auto test_case = test::TestCase(function, s_device); - test_case.add_input(Shape{4, 3}, {1, 1, 1, 0.5, 3, 4, 0.5, 1, 1.1, 0, 3, 0}); + test_case.add_input(Shape{4, 3}, {1.f, 1.f, 1.f, 0.5f, 3.f, 4.f, 0.5f, 1.f, 1.1f, 0.f, 3.f, 0.f}); test_case.add_expected_output(Shape{1, 3}, {0, 3, 1}); test_case.run(); } @@ -2539,7 +2539,7 @@ NGRAPH_TEST(${BACKEND_NAME}, onnx_model_argmin_select_last_index) { "onnx/argmin_select_last_index.onnx")); auto test_case = test::TestCase(function, s_device); - test_case.add_input(Shape{4, 3}, {1, 1, 1, 2, 3, 4, 2, 1, 1.1, 3, 3, 8}); + test_case.add_input(Shape{4, 3}, {1.f, 1.f, 1.f, 2.f, 3.f, 4.f, 2.f, 1.f, 1.1f, 3.f, 3.f, 8.f}); test_case.add_expected_output(Shape{4}, {2, 0, 1, 1}); test_case.run(); } @@ -2736,10 +2736,11 @@ NGRAPH_TEST(${BACKEND_NAME}, onnx_model_one_hot_with_axis) { auto function = onnx_import::import_onnx_model( file_util::path_join(CommonTestUtils::getExecutableDirectory(), SERIALIZED_ZOO, "onnx/one_hot_axis.onnx")); - Inputs inputs{{1.0, 9.0, 2.0, 4.0}, {1.0, 3.0}}; - std::vector expected_output{{1.0, 1.0, 3.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, - 1.0, 1.0, 1.0, 1.0, 1.0, 3.0, 1.0, 1.0, 1.0, 1.0, 3.0, 1.0, 1.0, 1.0, - 1.0, 3.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0}}; + Inputs inputs{{1.0f, 9.0f, 2.0f, 4.0f}, {1.0f, 3.0f}}; + std::vector expected_output{{1.0f, 1.0f, 3.0f, 1.0f, 1.0f, 1.0f, 1.0f, 1.0f, 1.0f, 1.0f, + 1.0f, 1.0f, 1.0f, 1.0f, 1.0f, 1.0f, 1.0f, 1.0f, 1.0f, 3.0f, + 1.0f, 1.0f, 1.0f, 1.0f, 3.0f, 1.0f, 1.0f, 1.0f, 1.0f, 3.0f, + 1.0f, 1.0f, 1.0f, 1.0f, 1.0f, 1.0f, 1.0f, 1.0f, 1.0f, 1.0f}}; auto test_case = test::TestCase(function, s_device); test_case.add_multiple_inputs(inputs); @@ -2851,7 +2852,7 @@ NGRAPH_TEST(${BACKEND_NAME}, onnx_model_lp_norm_p1) { Shape data_shape{2, 3, 4}; std::vector data(shape_size(data_shape)); - std::iota(std::begin(data), std::end(data), 1); + std::iota(std::begin(data), std::end(data), 1.f); auto test_case = test::TestCase(function, s_device); test_case.add_input(data); @@ -2870,7 +2871,7 @@ NGRAPH_TEST(${BACKEND_NAME}, onnx_model_lp_norm_p2) { Shape data_shape{2, 3, 4}; std::vector data(shape_size(data_shape)); - std::iota(std::begin(data), std::end(data), 1); + std::iota(std::begin(data), std::end(data), 1.f); auto test_case = test::TestCase(function, s_device); test_case.add_input(data); @@ -2889,7 +2890,7 @@ NGRAPH_TEST(${BACKEND_NAME}, onnx_model_lp_norm_default) { Shape data_shape{2, 3, 4}; std::vector data(shape_size(data_shape)); - std::iota(std::begin(data), std::end(data), 1); + std::iota(std::begin(data), std::end(data), 1.f); auto test_case = test::TestCase(function, s_device); test_case.add_input(data); @@ -2909,7 +2910,7 @@ NGRAPH_TEST(${BACKEND_NAME}, onnx_model_lp_norm_default_dynamic) { Shape data_shape{2, 3, 4}; std::vector data(shape_size(data_shape)); - std::iota(std::begin(data), std::end(data), 1); + std::iota(std::begin(data), std::end(data), 1.f); auto test_case = test::TestCase(function, s_device); test_case.add_input(data_shape, data); @@ -2928,7 +2929,7 @@ NGRAPH_TEST(${BACKEND_NAME}, onnx_model_instance_normalization) { Shape data_shape{1, 2, 3, 4}; std::vector data(shape_size(data_shape)); - std::iota(std::begin(data), std::end(data), 1); + std::iota(std::begin(data), std::end(data), 1.f); auto test_case = test::TestCase(function, s_device); @@ -2953,7 +2954,7 @@ NGRAPH_TEST(${BACKEND_NAME}, onnx_instance_normalization_dynamic) { std::vector input_data{1.f, 2.f, 3.f}; test_case.add_input(Shape{1, 3, 1, 1}, input_data); test_case.add_expected_output(Shape{1, 3, 1, 1}, - {0.3341970741748809814, 0.3321160078048706055, 0.3407136797904968262}); + {0.3341970741748809814f, 0.3321160078048706055f, 0.3407136797904968262f}); test_case.run(); } @@ -3121,9 +3122,9 @@ NGRAPH_TEST(${BACKEND_NAME}, onnx_model_mod_sign_fmod_f32) { file_util::path_join(CommonTestUtils::getExecutableDirectory(), SERIALIZED_ZOO, "onnx/mod_sign_fmod_f32.onnx")); auto test_case = test::TestCase(function, s_device); - test_case.add_input({-4.3, 7.2, 5.0, 4.3, -7.2, 8.0}); - test_case.add_input({2.1, -3.4, 8.0, -2.1, 3.4, 5.0}); - test_case.add_expected_output(Shape{6}, {-0.10000038, 0.39999962, 5., 0.10000038, -0.39999962, 3.}); + test_case.add_input({-4.3f, 7.2f, 5.0f, 4.3f, -7.2f, 8.0f}); + test_case.add_input({2.1f, -3.4f, 8.0f, -2.1f, 3.4f, 5.0f}); + test_case.add_expected_output(Shape{6}, {-0.10000038f, 0.39999962f, 5.f, 0.10000038f, -0.39999962f, 3.f}); test_case.run(); } @@ -3280,22 +3281,22 @@ NGRAPH_TEST(${BACKEND_NAME}, onnx_model_gather_float_2D_neg_indices) { // clang-format off test_case.add_input(Shape{3, 3}, - { 0.0, 0.1, 0.2, - 1.0, 1.1, 1.2, - 2.0, 2.1, 2.2 }); + { 0.0f, 0.1f, 0.2f, + 1.0f, 1.1f, 1.2f, + 2.0f, 2.1f, 2.2f }); test_case.add_input(Shape{2, 2}, { -1, -2, -3, -2 }); test_case.add_expected_output(Shape{3, 2, 2}, { - 0.2, 0.1, - 0.0, 0.1, + 0.2f, 0.1f, + 0.0f, 0.1f, - 1.2, 1.1, - 1.0, 1.1, + 1.2f, 1.1f, + 1.0f, 1.1f, - 2.2, 2.1, - 2.0, 2.1 }); + 2.2f, 2.1f, + 2.0f, 2.1f }); // clang-format on test_case.run(); @@ -3633,10 +3634,10 @@ NGRAPH_TEST(${BACKEND_NAME}, onnx_upsample8_nearest_infer) { const Shape expected_output_shape{1, 1, 4, 6}; auto test_case = test::TestCase(function, s_device); - test_case.add_input({1.0, 2.0, 3.0, 4.0}); + test_case.add_input({1.0f, 2.0f, 3.0f, 4.0f}); test_case.add_expected_output(expected_output_shape, - {1.0, 1.0, 1.0, 2.0, 2.0, 2.0, 1.0, 1.0, 1.0, 2.0, 2.0, 2.0, - 3.0, 3.0, 3.0, 4.0, 4.0, 4.0, 3.0, 3.0, 3.0, 4.0, 4.0, 4.0}); + {1.0f, 1.0f, 1.0f, 2.0f, 2.0f, 2.0f, 1.0f, 1.0f, 1.0f, 2.0f, 2.0f, 2.0f, + 3.0f, 3.0f, 3.0f, 4.0f, 4.0f, 4.0f, 3.0f, 3.0f, 3.0f, 4.0f, 4.0f, 4.0f}); test_case.run(); } @@ -3650,10 +3651,10 @@ NGRAPH_TEST(${BACKEND_NAME}, onnx_upsample8_linear_infer) { const Shape expected_output_shape{1, 1, 4, 4}; auto test_case = test::TestCase(function, s_device); - test_case.add_input({1.0, 2.0, 3.0, 4.0}); + test_case.add_input({1.0f, 2.0f, 3.0f, 4.0f}); test_case.add_expected_output( expected_output_shape, - {1.0, 1.5, 2.0, 2.0, 2.0, 2.5, 3.0, 3.0, 3.0, 3.5, 4.0, 4.0, 3.0, 3.5, 4.0, 4.0}); + {1.0f, 1.5f, 2.0f, 2.0f, 2.0f, 2.5f, 3.0f, 3.0f, 3.0f, 3.5f, 4.0f, 4.0f, 3.0f, 3.5f, 4.0f, 4.0f}); test_case.run(); } @@ -3669,10 +3670,10 @@ NGRAPH_TEST(${BACKEND_NAME}, onnx_upsample9_scales_const_nearest_infer) { const Shape expected_output_shape{1, 1, 4, 6}; auto test_case = test::TestCase(function, s_device); - test_case.add_input({1.0, 2.0, 3.0, 4.0}); + test_case.add_input({1.0f, 2.0f, 3.0f, 4.0f}); test_case.add_expected_output(expected_output_shape, - {1.0, 1.0, 1.0, 2.0, 2.0, 2.0, 1.0, 1.0, 1.0, 2.0, 2.0, 2.0, - 3.0, 3.0, 3.0, 4.0, 4.0, 4.0, 3.0, 3.0, 3.0, 4.0, 4.0, 4.0}); + {1.0f, 1.0f, 1.0f, 2.0f, 2.0f, 2.0f, 1.0f, 1.0f, 1.0f, 2.0f, 2.0f, 2.0f, + 3.0f, 3.0f, 3.0f, 4.0f, 4.0f, 4.0f, 3.0f, 3.0f, 3.0f, 4.0f, 4.0f, 4.0f}); test_case.run(); } @@ -3688,10 +3689,10 @@ NGRAPH_TEST(${BACKEND_NAME}, onnx_upsample9_scales_const_linear_infer) { const Shape expected_output_shape{1, 1, 4, 4}; auto test_case = test::TestCase(function, s_device); - test_case.add_input({1.0, 2.0, 3.0, 4.0}); + test_case.add_input({1.0f, 2.0f, 3.0f, 4.0f}); test_case.add_expected_output( expected_output_shape, - {1.0, 1.5, 2.0, 2.0, 2.0, 2.5, 3.0, 3.0, 3.0, 3.5, 4.0, 4.0, 3.0, 3.5, 4.0, 4.0}); + {1.0f, 1.5f, 2.0f, 2.0f, 2.0f, 2.5f, 3.0f, 3.0f, 3.0f, 3.5f, 4.0f, 4.0f, 3.0f, 3.5f, 4.0f, 4.0f}); test_case.run(); } @@ -3700,8 +3701,8 @@ NGRAPH_TEST(${BACKEND_NAME}, onnx_image_scaler) { file_util::path_join(CommonTestUtils::getExecutableDirectory(), SERIALIZED_ZOO, "onnx/image_scaler.onnx")); auto test_case = test::TestCase(function, s_device); - test_case.add_input({1.0, 2.0, 3.0, 4.0, 10.0, 20.0, 30.0, 40.0}); - test_case.add_expected_output(Shape{1, 2, 2, 2}, {12.0, 14.0, 16.0, 18.0, 21.0, 41.0, 61.0, 81.0}); + test_case.add_input({1.0f, 2.0f, 3.0f, 4.0f, 10.0f, 20.0f, 30.0f, 40.0f}); + test_case.add_expected_output(Shape{1, 2, 2, 2}, {12.0f, 14.0f, 16.0f, 18.0f, 21.0f, 41.0f, 61.0f, 81.0f}); test_case.run(); } @@ -3710,7 +3711,7 @@ NGRAPH_TEST(${BACKEND_NAME}, onnx_size_op_single) { file_util::path_join(CommonTestUtils::getExecutableDirectory(), SERIALIZED_ZOO, "onnx/size_op_single.onnx")); auto test_case = test::TestCase(function, s_device); - test_case.add_input(Shape{2, 3}, {1.0, 2.0, 3.0, 4.0, 5.0, 6.0}); + test_case.add_input(Shape{2, 3}, {1.0f, 2.0f, 3.0f, 4.0f, 5.0f, 6.0f}); test_case.add_expected_output(Shape{}, {6}); test_case.run(); } @@ -3720,7 +3721,7 @@ NGRAPH_TEST(${BACKEND_NAME}, onnx_size_op_graph_end) { file_util::path_join(CommonTestUtils::getExecutableDirectory(), SERIALIZED_ZOO, "onnx/size_op_graph_end.onnx")); auto test_case = test::TestCase(function, s_device); - test_case.add_input({1.0, 2.0, 3.0, 4.0}); + test_case.add_input({1.0f, 2.0f, 3.0f, 4.0f}); test_case.add_expected_output(Shape{}, {4}); test_case.run(); } @@ -3731,8 +3732,8 @@ NGRAPH_TEST(${BACKEND_NAME}, onnx_size_op_graph_middle) { "onnx/size_op_graph_middle.onnx")); auto test_case = test::TestCase(function, s_device); - test_case.add_input({1.0, 2.0, 3.0, 4.0}); - test_case.add_expected_output(Shape{}, {4.0}); + test_case.add_input({1.0f, 2.0f, 3.0f, 4.0f}); + test_case.add_expected_output(Shape{}, {4.0f}); test_case.run(); } @@ -3743,11 +3744,11 @@ NGRAPH_TEST(${BACKEND_NAME}, onnx_size_op_on_input_graph_middle) { "onnx/size_op_on_input_graph_middle.onnx")); auto test_case = test::TestCase(function, s_device); - test_case.add_input(Shape{1, 2, 4, 1, 3}, {0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., - 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0.}); + test_case.add_input(Shape{1, 2, 4, 1, 3}, {0.f, 0.f, 0.f, 0.f, 0.f, 0.f, 0.f, 0.f, 0.f, 0.f, 0.f, 0.f, + 0.f, 0.f, 0.f, 0.f, 0.f, 0.f, 0.f, 0.f, 0.f, 0.f, 0.f, 0.f}); test_case.add_expected_output(Shape{1, 2, 4, 1, 3}, - {24., 24., 24., 24., 24., 24., 24., 24., 24., 24., 24., 24., - 24., 24., 24., 24., 24., 24., 24., 24., 24., 24., 24., 24.}); + {24.f, 24.f, 24.f, 24.f, 24.f, 24.f, 24.f, 24.f, 24.f, 24.f, 24.f, 24.f, + 24.f, 24.f, 24.f, 24.f, 24.f, 24.f, 24.f, 24.f, 24.f, 24.f, 24.f, 24.f}); test_case.run(); } @@ -3818,36 +3819,39 @@ NGRAPH_TEST(${BACKEND_NAME}, onnx_roialign16_avg_out_half_pixel) { auto test_case = test::TestCase(function, s_device); test_case.add_input( - {1.1, 2.2, 3.3, 4.4, 5.5, 6.6, 7.7, 8.8, 9.9, 11., 12.1, 13.2, 14.3, 15.4, 16.5, 17.6, - 18.7, 19.8, 20.9, 22., 23.1, 24.2, 25.3, 26.4, 27.5, 28.6, 29.7, 30.8, 31.9, 33., 34.1, 35.2, - 36.3, 37.4, 38.5, 39.6, 40.7, 41.8, 42.9, 44., 45.1, 46.2, 47.3, 48.4, 49.5, 50.6, 51.7, 52.8, - 53.9, 55., 56.1, 57.2, 58.3, 59.4, 60.5, 61.6, 62.7, 63.8, 64.9, 66., 67.1, 68.2, 69.3, 70.4, - 71.5, 72.6, 73.7, 74.8, 75.9, 77., 78.1, 79.2, 80.3, 81.4, 82.5, 83.6, 84.7, 85.8, 86.9, 88., - 89.1, 90.2, 91.3, 92.4, 93.5, 94.6, 95.7, 96.8, 97.9, 99., 100.1, 101.2, 102.3, 103.4, 104.5, 105.6, - 106.7, 107.8, 108.9, 110., 111.1, 112.2, 113.3, 114.4, 115.5, 116.6, 117.7, 118.8, 119.9, 121., 122.1, 123.2, - 124.3, 125.4, 126.5, 127.6, 128.7, 129.8, 130.9, 132., 133.1, 134.2, 135.3, 136.4, 137.5, 138.6, 139.7, 140.8, - 141.9, 143., 144.1, 145.2, 146.3, 147.4, 148.5, 149.6, 150.7, 151.8, 152.9, 154., 155.1, 156.2, 157.3, 158.4, - 159.5, 160.6, 161.7, 162.8, 163.9, 165., 166.1, 167.2, 168.3, 169.4, 170.5, 171.6, 172.7, 173.8, 174.9, 176., - 177.1, 178.2, 179.3, 180.4, 181.5, 182.6, 183.7, 184.8, 185.9, 187., 188.1, 189.2, 190.3, 191.4, 192.5, 193.6, - 194.7, 195.8, 196.9, 198., 199.1, 200.2, 201.3, 202.4, 203.5, 204.6, 205.7, 206.8, 207.9, 209., 210.1, 211.2, - 212.3, 213.4, 214.5, 215.6, 216.7, 217.8, 218.9, 220., 221.1, 222.2, 223.3, 224.4, 225.5, 226.6, 227.7, 228.8, - 229.9, 231., 232.1, 233.2, 234.3, 235.4, 236.5, 237.6}); - - test_case.add_input({0, 0, 0.75, 2.2, 1.2, 0.5, 2.8, 1.9, 0, 3, 0, 3}); + {1.1f, 2.2f, 3.3f, 4.4f, 5.5f, 6.6f, 7.7f, 8.8f, 9.9f, 11.f, 12.1f, 13.2f, 14.3f, 15.4f, + 16.5f, 17.6f, 18.7f, 19.8f, 20.9f, 22.f, 23.1f, 24.2f, 25.3f, 26.4f, 27.5f, 28.6f, 29.7f, 30.8f, + 31.9f, 33.f, 34.1f, 35.2f, 36.3f, 37.4f, 38.5f, 39.6f, 40.7f, 41.8f, 42.9f, 44.f, 45.1f, 46.2f, + 47.3f, 48.4f, 49.5f, 50.6f, 51.7f, 52.8f, 53.9f, 55.f, 56.1f, 57.2f, 58.3f, 59.4f, 60.5f, 61.6f, + 62.7f, 63.8f, 64.9f, 66.f, 67.1f, 68.2f, 69.3f, 70.4f, 71.5f, 72.6f, 73.7f, 74.8f, 75.9f, 77.f, + 78.1f, 79.2f, 80.3f, 81.4f, 82.5f, 83.6f, 84.7f, 85.8f, 86.9f, 88.f, 89.1f, 90.2f, 91.3f, 92.4f, + 93.5f, 94.6f, 95.7f, 96.8f, 97.9f, 99.f, 100.1f, 101.2f, 102.3f, 103.4f, 104.5f, 105.6f, 106.7f, 107.8f, + 108.9f, 110.f, 111.1f, 112.2f, 113.3f, 114.4f, 115.5f, 116.6f, 117.7f, 118.8f, 119.9f, 121.f, 122.1f, 123.2f, + 124.3f, 125.4f, 126.5f, 127.6f, 128.7f, 129.8f, 130.9f, 132.f, 133.1f, 134.2f, 135.3f, 136.4f, 137.5f, 138.6f, + 139.7f, 140.8f, 141.9f, 143.f, 144.1f, 145.2f, 146.3f, 147.4f, 148.5f, 149.6f, 150.7f, 151.8f, 152.9f, 154.f, + 155.1f, 156.2f, 157.3f, 158.4f, 159.5f, 160.6f, 161.7f, 162.8f, 163.9f, 165.f, 166.1f, 167.2f, 168.3f, 169.4f, + 170.5f, 171.6f, 172.7f, 173.8f, 174.9f, 176.f, 177.1f, 178.2f, 179.3f, 180.4f, 181.5f, 182.6f, 183.7f, 184.8f, + 185.9f, 187.f, 188.1f, 189.2f, 190.3f, 191.4f, 192.5f, 193.6f, 194.7f, 195.8f, 196.9f, 198.f, 199.1f, 200.2f, + 201.3f, 202.4f, 203.5f, 204.6f, 205.7f, 206.8f, 207.9f, 209.f, 210.1f, 211.2f, 212.3f, 213.4f, 214.5f, 215.6f, + 216.7f, 217.8f, 218.9f, 220.f, 221.1f, 222.2f, 223.3f, 224.4f, 225.5f, 226.6f, 227.7f, 228.8f, 229.9f, 231.f, + 232.1f, 233.2f, 234.3f, 235.4f, 236.5f, 237.6f}); + + test_case.add_input({0.f, 0.f, 0.75f, 2.2f, 1.2f, 0.5f, 2.8f, 1.9f, 0.f, 3.f, 0.f, 3.f}); test_case.add_input({0, 2, 1}); test_case.add_expected_output( Shape{3, 2, 4, 4}, - {2.145, 2.42, 2.6950002, 2.9700003, 3.96, 4.235, 4.51, 4.7850003, 5.775, 6.05, - 6.325, 6.6000004, 7.59, 7.8650007, 8.14, 8.415001, 41.745003, 42.019997, 42.295, 42.57, - 43.56, 43.835, 44.11, 44.385002, 45.375, 45.65, 45.925003, 46.200005, 47.190002, 47.465004, - 47.74, 48.015, 162.77249, 163.0475, 163.32251, 163.5975, 164.42252, 164.69751, 164.9725, 165.2475, - 166.07251, 166.3475, 166.6225, 166.8975, 167.72249, 167.9975, 168.27249, 168.5475, 202.3725, 202.6475, - 202.9225, 203.19751, 204.02252, 204.2975, 204.57251, 204.8475, 205.6725, 205.94751, 206.2225, 206.4975, - 207.32251, 207.5975, 207.8725, 208.1475, 91.162506, 91.4375, 91.7125, 91.9875, 92.8125, 93.0875, - 93.3625, 93.6375, 94.4625, 94.7375, 95.0125, 95.28749, 96.1125, 96.3875, 96.6625, 96.9375, - 130.76251, 131.0375, 131.3125, 131.5875, 132.4125, 132.6875, 132.9625, 133.2375, 134.0625, 134.33751, - 134.6125, 134.88751, 135.7125, 135.9875, 136.26251, 136.53749}); + {2.145f, 2.42f, 2.6950002f, 2.9700003f, 3.96f, 4.235f, 4.51f, 4.7850003f, 5.775f, + 6.05f, 6.325f, 6.6000004f, 7.59f, 7.8650007f, 8.14f, 8.415001f, 41.745003f, 42.019997f, + 42.295f, 42.57f, 43.56f, 43.835f, 44.11f, 44.385002f, 45.375f, 45.65f, 45.925003f, + 46.200005f, 47.190002f, 47.465004f, 47.74f, 48.015f, 162.77249f, 163.0475f, 163.32251f, 163.5975f, + 164.42252f, 164.69751f, 164.9725f, 165.2475f, 166.07251f, 166.3475f, 166.6225f, 166.8975f, 167.72249f, + 167.9975f, 168.27249f, 168.5475f, 202.3725f, 202.6475f, 202.9225f, 203.19751f, 204.02252f, 204.2975f, + 204.57251f, 204.8475f, 205.6725f, 205.94751f, 206.2225f, 206.4975f, 207.32251f, 207.5975f, 207.8725f, + 208.1475f, 91.162506f, 91.4375f, 91.7125f, 91.9875f, 92.8125f, 93.0875f, 93.3625f, 93.6375f, + 94.4625f, 94.7375f, 95.0125f, 95.28749f, 96.1125f, 96.3875f, 96.6625f, 96.9375f, 130.76251f, + 131.0375f, 131.3125f, 131.5875f, 132.4125f, 132.6875f, 132.9625f, 133.2375f, 134.0625f, 134.33751f, + 134.6125f, 134.88751f, 135.7125f, 135.9875f, 136.26251f, 136.53749f}); test_case.run(); } @@ -3858,36 +3862,40 @@ NGRAPH_TEST(${BACKEND_NAME}, onnx_roialign16_avg_half_pixel) { auto test_case = test::TestCase(function, s_device); test_case.add_input( - {1.1, 2.2, 3.3, 4.4, 5.5, 6.6, 7.7, 8.8, 9.9, 11., 12.1, 13.2, 14.3, 15.4, 16.5, 17.6, - 18.7, 19.8, 20.9, 22., 23.1, 24.2, 25.3, 26.4, 27.5, 28.6, 29.7, 30.8, 31.9, 33., 34.1, 35.2, - 36.3, 37.4, 38.5, 39.6, 40.7, 41.8, 42.9, 44., 45.1, 46.2, 47.3, 48.4, 49.5, 50.6, 51.7, 52.8, - 53.9, 55., 56.1, 57.2, 58.3, 59.4, 60.5, 61.6, 62.7, 63.8, 64.9, 66., 67.1, 68.2, 69.3, 70.4, - 71.5, 72.6, 73.7, 74.8, 75.9, 77., 78.1, 79.2, 80.3, 81.4, 82.5, 83.6, 84.7, 85.8, 86.9, 88., - 89.1, 90.2, 91.3, 92.4, 93.5, 94.6, 95.7, 96.8, 97.9, 99., 100.1, 101.2, 102.3, 103.4, 104.5, 105.6, - 106.7, 107.8, 108.9, 110., 111.1, 112.2, 113.3, 114.4, 115.5, 116.6, 117.7, 118.8, 119.9, 121., 122.1, 123.2, - 124.3, 125.4, 126.5, 127.6, 128.7, 129.8, 130.9, 132., 133.1, 134.2, 135.3, 136.4, 137.5, 138.6, 139.7, 140.8, - 141.9, 143., 144.1, 145.2, 146.3, 147.4, 148.5, 149.6, 150.7, 151.8, 152.9, 154., 155.1, 156.2, 157.3, 158.4, - 159.5, 160.6, 161.7, 162.8, 163.9, 165., 166.1, 167.2, 168.3, 169.4, 170.5, 171.6, 172.7, 173.8, 174.9, 176., - 177.1, 178.2, 179.3, 180.4, 181.5, 182.6, 183.7, 184.8, 185.9, 187., 188.1, 189.2, 190.3, 191.4, 192.5, 193.6, - 194.7, 195.8, 196.9, 198., 199.1, 200.2, 201.3, 202.4, 203.5, 204.6, 205.7, 206.8, 207.9, 209., 210.1, 211.2, - 212.3, 213.4, 214.5, 215.6, 216.7, 217.8, 218.9, 220., 221.1, 222.2, 223.3, 224.4, 225.5, 226.6, 227.7, 228.8, - 229.9, 231., 232.1, 233.2, 234.3, 235.4, 236.5, 237.6}); - - test_case.add_input({0, 0, 0.75, 2.2, 1.2, 0.5, 2.8, 1.9, 0, 3, 0, 3}); + {1.1f, 2.2f, 3.3f, 4.4f, 5.5f, 6.6f, 7.7f, 8.8f, 9.9f, 11.f, 12.1f, 13.2f, 14.3f, + 15.4f, 16.5f, 17.6f, 18.7f, 19.8f, 20.9f, 22.f, 23.1f, 24.2f, 25.3f, 26.4f, 27.5f, 28.6f, + 29.7f, 30.8f, 31.9f, 33.f, 34.1f, 35.2f, 36.3f, 37.4f, 38.5f, 39.6f, 40.7f, 41.8f, 42.9f, + 44.f, 45.1f, 46.2f, 47.3f, 48.4f, 49.5f, 50.6f, 51.7f, 52.8f, 53.9f, 55.f, 56.1f, 57.2f, + 58.3f, 59.4f, 60.5f, 61.6f, 62.7f, 63.8f, 64.9f, 66.f, 67.1f, 68.2f, 69.3f, 70.4f, 71.5f, + 72.6f, 73.7f, 74.8f, 75.9f, 77.f, 78.1f, 79.2f, 80.3f, 81.4f, 82.5f, 83.6f, 84.7f, 85.8f, + 86.9f, 88.f, 89.1f, 90.2f, 91.3f, 92.4f, 93.5f, 94.6f, 95.7f, 96.8f, 97.9f, 99.f, 100.1f, + 101.2f, 102.3f, 103.4f, 104.5f, 105.6f, 106.7f, 107.8f, 108.9f, 110.f, 111.1f, 112.2f, 113.3f, 114.4f, + 115.5f, 116.6f, 117.7f, 118.8f, 119.9f, 121.f, 122.1f, 123.2f, 124.3f, 125.4f, 126.5f, 127.6f, 128.7f, + 129.8f, 130.9f, 132.f, 133.1f, 134.2f, 135.3f, 136.4f, 137.5f, 138.6f, 139.7f, 140.8f, 141.9f, 143.f, + 144.1f, 145.2f, 146.3f, 147.4f, 148.5f, 149.6f, 150.7f, 151.8f, 152.9f, 154.f, 155.1f, 156.2f, 157.3f, + 158.4f, 159.5f, 160.6f, 161.7f, 162.8f, 163.9f, 165.f, 166.1f, 167.2f, 168.3f, 169.4f, 170.5f, 171.6f, + 172.7f, 173.8f, 174.9f, 176.f, 177.1f, 178.2f, 179.3f, 180.4f, 181.5f, 182.6f, 183.7f, 184.8f, 185.9f, + 187.198f, 188.1f, 189.2f, 190.3f, 191.4f, 192.5f, 193.6f, 194.7f, 195.8f, 196.9f, 198.f, 199.1f, 200.2f, + 201.3f, 202.4f, 203.5f, 204.6f, 205.7f, 206.8f, 207.9f, 209.f, 210.1f, 211.2f, 212.3f, 213.4f, 214.5f, + 215.6f, 216.7f, 217.8f, 218.9f, 220.f, 221.1f, 222.2f, 223.3f, 224.4f, 225.5f, 226.6f, 227.7f, 228.8f, + 229.9f, 231.f, 232.1f, 233.2f, 234.3f, 235.4f, 236.5f, 237.6f}); + + test_case.add_input({0.f, 0.f, 0.75f, 2.2f, 1.2f, 0.5f, 2.8f, 1.9f, 0.f, 3.f, 0.f, 3.f}); test_case.add_input({0, 2, 1}); test_case.add_expected_output( Shape{3, 2, 4, 4}, - {1.1, 1.1, 1.1, 1.1, 1.1, 1.1, 1.1, 1.1, 2.3375, 2.3375, - 2.3375, 2.3375, 4.1525, 4.1525, 4.1525, 4.1525, 40.7, 40.7, 40.7, 40.7, - 40.7, 40.7, 40.7, 40.7, 41.9375, 41.9375, 41.9375, 41.9375, 43.7525, 43.7525, - 43.7525, 43.7525, 159.72, 159.94, 160.16, 160.38, 159.90562, 160.12563, 160.34563, 160.56563, - 160.9575, 161.1775, 161.3975, 161.61751, 162.1125, 162.3325, 162.55249, 162.77249, 199.32, 199.54001, - 199.76001, 199.97998, 199.50562, 199.72563, 199.94562, 200.16562, 200.5575, 200.7775, 200.9975, 201.2175, - 201.7125, 201.93251, 202.1525, 202.37251, 86.9, 86.9, 86.9, 86.9, 86.9, 86.9, - 86.9, 86.9, 86.9, 86.9, 86.9, 86.9, 86.9, 86.9, 86.9, 86.9, - 126.5, 126.5, 126.5, 126.5, 126.5, 126.5, 126.5, 126.5, 126.5, 126.5, - 126.5, 126.5, 126.5, 126.5, 126.5, 126.5}); + {1.1f, 1.1f, 1.1f, 1.1f, 1.1f, 1.1f, 1.1f, 1.1f, 2.3375f, + 2.3375f, 2.3375f, 2.3375f, 4.1525f, 4.1525f, 4.1525f, 4.1525f, 40.7f, 40.7f, + 40.7f, 40.7f, 40.7f, 40.7f, 40.7f, 40.7f, 41.9375f, 41.9375f, 41.9375f, + 41.9375f, 43.7525f, 43.7525f, 43.7525f, 43.7525f, 159.72f, 159.94f, 160.16f, 160.38f, + 159.90562f, 160.12563f, 160.34563f, 160.56563f, 160.9575f, 161.1775f, 161.3975f, 161.61751f, 162.1125f, + 162.3325f, 162.55249f, 162.77249f, 199.32f, 199.54001f, 199.76001f, 199.97998f, 199.50562f, 199.72563f, + 199.94562f, 200.16562f, 200.5575f, 200.7775f, 200.9975f, 201.2175f, 201.7125f, 201.93251f, 202.1525f, + 202.37251f, 86.9f, 86.9f, 86.9f, 86.9f, 86.9f, 86.9f, 86.9f, 86.9f, + 86.9f, 86.9f, 86.9f, 86.9f, 86.9f, 86.9f, 86.9f, 86.9f, 126.5f, + 126.5f, 126.5f, 126.5f, 126.5f, 126.5f, 126.5f, 126.5f, 126.5f, 126.5f, + 126.5f, 126.5f, 126.5f, 126.5f, 126.5f, 126.5f}); test_case.run_with_tolerance_as_fp(0.01f); } @@ -3898,9 +3906,9 @@ NGRAPH_TEST(${BACKEND_NAME}, quant_dequant_pattern) { auto test_case = test::TestCase(function, s_device); // scale == 3.0 // zero point == 10 - test_case.add_input({9.0, 10.0, 15.0, 20.0, 30.0}); - test_case.add_input({1}); - test_case.add_expected_output(Shape{5}, {9.0, 9.0, 15.0, 21.0, 30.0}); + test_case.add_input({9.0f, 10.0f, 15.0f, 20.0f, 30.0f}); + test_case.add_input({1.f}); + test_case.add_expected_output(Shape{5}, {9.0f, 9.0f, 15.0f, 21.0f, 30.0f}); test_case.run(); } @@ -3912,9 +3920,9 @@ NGRAPH_TEST(${BACKEND_NAME}, quant_dequant_pattern_axis) { // axis = 1 // scale == {2.0, 3.0, 4.0} // zero point == {10, 20, 30} - test_case.add_input({1.0, 2.0, 3.0, 10.0, 20.0, 30.0, 40.0, 50.0, 100.0}); - test_case.add_expected_output(Shape{3, 3}, {0, 3, 4, 10, 21, 32, 40, 51, 100}); - test_case.add_input({1}); + test_case.add_input({1.0f, 2.0f, 3.0f, 10.0f, 20.0f, 30.0f, 40.0f, 50.0f, 100.0f}); + test_case.add_expected_output(Shape{3, 3}, {0.f, 3.f, 4.f, 10.f, 21.f, 32.f, 40.f, 51.f, 100.f}); + test_case.add_input({1.f}); test_case.run(); } @@ -3923,8 +3931,8 @@ NGRAPH_TEST(${BACKEND_NAME}, onnx_model_logsoftmax_0D) { file_util::path_join(CommonTestUtils::getExecutableDirectory(), SERIALIZED_ZOO, "onnx/softmax_0D.onnx")); auto test_case = test::TestCase(function, s_device); - test_case.add_input({3.141592}); - test_case.add_expected_output({0.0}); + test_case.add_input({3.141592f}); + test_case.add_expected_output({0.0f}); test_case.run(); } @@ -3934,7 +3942,7 @@ NGRAPH_TEST(${BACKEND_NAME}, onnx_model_logsoftmax_1D) { auto test_case = test::TestCase(function, s_device); test_case.add_input({-1.0f, 0.0f, 1.0f}); - test_case.add_expected_output(Shape{3}, {-2.4076061, -1.407606, -0.407606}); + test_case.add_expected_output(Shape{3}, {-2.4076061f, -1.407606f, -0.407606f}); test_case.run(); } @@ -3944,7 +3952,7 @@ NGRAPH_TEST(${BACKEND_NAME}, onnx_model_logsoftmax13_1D) { auto test_case = test::TestCase(function, s_device); test_case.add_input({-1.0f, 0.0f, 1.0f}); - test_case.add_expected_output(Shape{3}, {-2.4076061, -1.407606, -0.407606}); + test_case.add_expected_output(Shape{3}, {-2.4076061f, -1.407606f, -0.407606f}); test_case.run(); } @@ -3953,10 +3961,10 @@ NGRAPH_TEST(${BACKEND_NAME}, onnx_model_logsoftmax13_2D) { file_util::path_join(CommonTestUtils::getExecutableDirectory(), SERIALIZED_ZOO, "onnx/logsoftmax13_2D.onnx")); auto test_case = test::TestCase(function, s_device); - test_case.add_input({0.0f, 1.0f, 2.0f, 3.0f, 10000, 10001, 10002, 10003}); + test_case.add_input({0.0f, 1.0f, 2.0f, 3.0f, 10000.f, 10001.f, 10002.f, 10003.f}); test_case.add_expected_output( Shape{2, 4}, - {-3.4401896, -2.4401896, -1.4401896, -0.44018966, -3.4401896, -2.4401896, -1.4401896, -0.44018966}); + {-3.4401896f, -2.4401896f, -1.4401896f, -0.44018966f, -3.4401896f, -2.4401896f, -1.4401896f, -0.44018966f}); test_case.run_with_tolerance_as_fp(); } @@ -4004,7 +4012,7 @@ NGRAPH_TEST(${BACKEND_NAME}, onnx_model_mul_v6_broadcast_axis_1) { Shape shape{1, 3, 2, 2}; std::vector A(shape_size(shape)); - std::iota(A.begin(), A.end(), 1); + std::iota(A.begin(), A.end(), 1.f); test_case.add_input(A); test_case.add_input({3.0f, 4.0f, 5.0f}); test_case.add_expected_output( @@ -4036,7 +4044,7 @@ NGRAPH_TEST(${BACKEND_NAME}, onnx_model_mul_v6_broadcast_no_axis) { Shape shape{2, 2}; std::vector A(shape_size(shape)); - std::iota(A.begin(), A.end(), 1); + std::iota(A.begin(), A.end(), 1.f); test_case.add_input(A); test_case.add_input({3.0f}); test_case.add_expected_output(shape, {3.0f, 6.0f, 9.0f, 12.0f}); @@ -4061,7 +4069,7 @@ NGRAPH_TEST(${BACKEND_NAME}, onnx_model_mul_v7_broadcast) { Shape shape{1, 2, 3}; std::vector A(shape_size(shape)); - std::iota(A.begin(), A.end(), 1); + std::iota(A.begin(), A.end(), 1.f); test_case.add_input(A); test_case.add_input({3.0f, 4.0f, 5.0f}); test_case.add_expected_output(shape, {3.0f, 8.0f, 15.0f, 12.0f, 20.0f, 30.0f}); @@ -4076,7 +4084,7 @@ NGRAPH_TEST(${BACKEND_NAME}, onnx_model_add_v6_broadcast_axis_1) { Shape shape{1, 3, 2, 2}; std::vector A(shape_size(shape)); - std::iota(A.begin(), A.end(), 1); + std::iota(A.begin(), A.end(), 1.f); test_case.add_input(A); test_case.add_input({3.0f, 4.0f, 5.0f}); test_case.add_expected_output( @@ -4107,7 +4115,7 @@ NGRAPH_TEST(${BACKEND_NAME}, onnx_model_add_v6_broadcast_no_axis) { Shape shape{2, 2}; std::vector A(shape_size(shape)); - std::iota(A.begin(), A.end(), 1); + std::iota(A.begin(), A.end(), 1.f); test_case.add_input(A); test_case.add_input({3.0f}); test_case.add_expected_output(shape, {4.0f, 5.0f, 6.0f, 7.0f}); @@ -4133,7 +4141,7 @@ NGRAPH_TEST(${BACKEND_NAME}, onnx_model_sub_v6_broadcast_axis_1) { Shape shape{1, 3, 2, 2}; std::vector A(shape_size(shape)); - std::iota(A.begin(), A.end(), 1); + std::iota(A.begin(), A.end(), 1.f); test_case.add_input(A); test_case.add_input({3.0f, 4.0f, 5.0f}); test_case.add_expected_output(shape, @@ -4164,7 +4172,7 @@ NGRAPH_TEST(${BACKEND_NAME}, onnx_model_sub_v6_broadcast_no_axis) { Shape shape{2, 2}; std::vector A(shape_size(shape)); - std::iota(A.begin(), A.end(), 1); + std::iota(A.begin(), A.end(), 1.f); test_case.add_input(A); test_case.add_input({3.0f}); test_case.add_expected_output(shape, {-2.0f, -1.0f, 0.0f, 1.0f}); @@ -4189,7 +4197,7 @@ NGRAPH_TEST(${BACKEND_NAME}, onnx_model_sub_v7_broadcast) { Shape shape{1, 2, 3}; std::vector A(shape_size(shape)); - std::iota(A.begin(), A.end(), 1); + std::iota(A.begin(), A.end(), 1.f); test_case.add_input(A); test_case.add_input({3.0f, 4.0f, 5.0f}); test_case.add_expected_output(shape, {-2.0f, -2.0f, -2.0f, 1.0f, 1.0f, 1.0f}); @@ -4204,7 +4212,7 @@ NGRAPH_TEST(${BACKEND_NAME}, onnx_model_div_v6_broadcast_axis_1) { Shape shape{1, 3, 2, 2}; std::vector A(shape_size(shape)); - std::iota(A.begin(), A.end(), 1); + std::iota(A.begin(), A.end(), 1.f); test_case.add_input(A); test_case.add_input({3.0f, 4.0f, 5.0f}); test_case.add_expected_output( @@ -4237,7 +4245,7 @@ NGRAPH_TEST(${BACKEND_NAME}, onnx_model_div_v6_broadcast_no_axis) { Shape shape{2, 2}; std::vector A(shape_size(shape)); - std::iota(A.begin(), A.end(), 1); + std::iota(A.begin(), A.end(), 1.f); test_case.add_input(A); test_case.add_input({2.0f}); test_case.add_expected_output(shape, {0.5f, 1.0f, 1.5f, 2.0f}); @@ -4262,7 +4270,7 @@ NGRAPH_TEST(${BACKEND_NAME}, onnx_model_div_v7_broadcast) { Shape shape{1, 2, 3}; std::vector A(shape_size(shape)); - std::iota(A.begin(), A.end(), 1); + std::iota(A.begin(), A.end(), 1.f); test_case.add_input(A); test_case.add_input({3.0f, 4.0f, 5.0f}); test_case.add_expected_output(shape, {0.3333333f, 0.5f, 0.6f, 1.3333333f, 1.25f, 1.2f}); @@ -4299,7 +4307,7 @@ NGRAPH_TEST(${BACKEND_NAME}, onnx_clip_no_min_no_max) { "onnx/clip_no_min_no_max.onnx")); auto test_case = test::TestCase(function, s_device); - const std::vector data{-1.6, -0.1, 10., 0., -10., 1.99, 2.015, 3.}; + const std::vector data{-1.6f, -0.1f, 10.f, 0.f, -10.f, 1.99f, 2.015f, 3.f}; test_case.add_input(data); @@ -4315,12 +4323,12 @@ NGRAPH_TEST(${BACKEND_NAME}, onnx_clip_no_min_no_max_inf) { auto test_case = test::TestCase(function, s_device); const std::vector data{std::numeric_limits::infinity(), -std::numeric_limits::infinity(), - static_cast(std::numeric_limits::max()), + static_cast(std::numeric_limits::max()), std::numeric_limits::min(), std::numeric_limits::max(), std::numeric_limits::lowest(), - 0, - -1}; + 0.f, + -1.f}; const std::vector expected_output{std::numeric_limits::max(), std::numeric_limits::lowest(), @@ -4328,13 +4336,13 @@ NGRAPH_TEST(${BACKEND_NAME}, onnx_clip_no_min_no_max_inf) { std::numeric_limits::min(), std::numeric_limits::max(), std::numeric_limits::lowest(), - 0, - -1}; + 0.f, + -1.f}; test_case.add_input(data); test_case.add_expected_output(Shape{2, 4}, expected_output); - test_case.run_with_tolerance_as_fp(0); + test_case.run_with_tolerance_as_fp(0.f); } NGRAPH_TEST(${BACKEND_NAME}, onnx_clip_no_min_set_max) { @@ -4343,9 +4351,9 @@ NGRAPH_TEST(${BACKEND_NAME}, onnx_clip_no_min_set_max) { "onnx/clip_no_min_set_max.onnx")); auto test_case = test::TestCase(function, s_device); - const std::vector data{-1.6, -0.1, 10., 0., -10., 1.99, 2.015, 3.}; - const std::vector max_val{2.01}; - const std::vector output{-1.6, -0.1, 2.01, 0., -10., 1.99, 2.01, 2.01}; + const std::vector data{-1.6f, -0.1f, 10.f, 0.f, -10.f, 1.99f, 2.015f, 3.f}; + const std::vector max_val{2.01f}; + const std::vector output{-1.6f, -0.1f, 2.01f, 0.f, -10.f, 1.99f, 2.01f, 2.01f}; test_case.add_input(data); test_case.add_input(max_val); @@ -4360,9 +4368,9 @@ NGRAPH_TEST(${BACKEND_NAME}, onnx_clip_set_min_no_max) { "onnx/clip_set_min_no_max.onnx")); auto test_case = test::TestCase(function, s_device); - const std::vector data{-1.6, -0.1, 10., 0., -10., 1.99, 2.015, 3.}; - const std::vector min_val{-1.59}; - const std::vector output{-1.59, -0.1, 10., 0., -1.59, 1.99, 2.015, 3.}; + const std::vector data{-1.6f, -0.1f, 10.f, 0.f, -10.f, 1.99f, 2.015f, 3.f}; + const std::vector min_val{-1.59f}; + const std::vector output{-1.59f, -0.1f, 10.f, 0.f, -1.59f, 1.99f, 2.015f, 3.f}; test_case.add_input(data); test_case.add_input(min_val); @@ -4408,8 +4416,8 @@ NGRAPH_TEST(${BACKEND_NAME}, onnx_clip_set_min_no_max_initializers) { "onnx/clip_set_min_no_max_initializers.onnx")); auto test_case = test::TestCase(function, s_device); - const std::vector data{-1.6, -0.1, 10., 0., -10., 1.99, 2.015, 3.}; - const std::vector output{-1.59, -0.1, 10., 0., -1.59, 1.99, 2.015, 3.}; + const std::vector data{-1.6f, -0.1f, 10.f, 0.f, -10.f, 1.99f, 2.015f, 3.f}; + const std::vector output{-1.59f, -0.1f, 10.f, 0.f, -1.59f, 1.99f, 2.015f, 3.f}; test_case.add_input(data); @@ -4423,10 +4431,10 @@ NGRAPH_TEST(${BACKEND_NAME}, onnx_clip_set_min_set_max) { "onnx/clip_set_min_set_max.onnx")); auto test_case = test::TestCase(function, s_device); - const std::vector data{-1.6, -0.1, 10., 0., -10., 1.99, 2.015, 3.}; - const std::vector min_val{-1.59}; - const std::vector max_val{2.01}; - const std::vector output{-1.59, -0.1, 2.01, 0., -1.59, 1.99, 2.01, 2.01}; + const std::vector data{-1.6f, -0.1f, 10.f, 0.f, -10.f, 1.99f, 2.015f, 3.f}; + const std::vector min_val{-1.59f}; + const std::vector max_val{2.01f}; + const std::vector output{-1.59f, -0.1f, 2.01f, 0.f, -1.59f, 1.99f, 2.01f, 2.01f}; test_case.add_input(data); test_case.add_input(min_val); @@ -4442,8 +4450,8 @@ NGRAPH_TEST(${BACKEND_NAME}, onnx_clip_set_min_set_max_initializers) { "onnx/clip_set_min_set_max_initializers.onnx")); auto test_case = test::TestCase(function, s_device); - const std::vector data{-1.6, -0.1, 10., 0., -10., 1.99, 2.015, 3.}; - const std::vector output{-1.59, -0.1, 2.01, 0., -1.59, 1.99, 2.01, 2.01}; + const std::vector data{-1.6f, -0.1f, 10.f, 0.f, -10.f, 1.99f, 2.015f, 3.f}; + const std::vector output{-1.59f, -0.1f, 2.01f, 0.f, -1.59f, 1.99f, 2.01f, 2.01f}; test_case.add_input(data); @@ -4456,16 +4464,16 @@ NGRAPH_TEST(${BACKEND_NAME}, onnx_mvn_v6) { file_util::path_join(CommonTestUtils::getExecutableDirectory(), SERIALIZED_ZOO, "onnx/mvn_v6.onnx")); auto test_case = test::TestCase(function, s_device); - test_case.add_input({0.8439683, 0.5665144, 0.05836735, 0.02916367, 0.12964272, 0.5060197, 0.79538304, - 0.9411346, 0.9546573, 0.17730942, 0.46192095, 0.26480448, 0.6746842, 0.01665257, - 0.62473077, 0.9240844, 0.9722341, 0.11965699, 0.41356155, 0.9129373, 0.59330076, - 0.81929934, 0.7862604, 0.11799799, 0.69248444, 0.54119414, 0.07513223}); + test_case.add_input({0.8439683f, 0.5665144f, 0.05836735f, 0.02916367f, 0.12964272f, 0.5060197f, 0.79538304f, + 0.9411346f, 0.9546573f, 0.17730942f, 0.46192095f, 0.26480448f, 0.6746842f, 0.01665257f, + 0.62473077f, 0.9240844f, 0.9722341f, 0.11965699f, 0.41356155f, 0.9129373f, 0.59330076f, + 0.81929934f, 0.7862604f, 0.11799799f, 0.69248444f, 0.54119414f, 0.07513223f}); test_case.add_expected_output( Shape{3, 3, 3, 1}, - {1.3546423, 0.33053496, -1.5450814, -1.2106764, -0.8925952, 0.29888135, 0.38083088, - 0.81808794, 0.85865635, -1.1060555, -0.05552877, -0.78310335, 0.83281356, -1.250282, - 0.67467856, 0.7669372, 0.9113869, -1.6463585, -0.23402764, 1.6092131, 0.42940593, - 1.2906139, 1.1860244, -0.92945826, 0.0721334, -0.38174, -1.7799333}); + {1.3546423f, 0.33053496f, -1.5450814f, -1.2106764f, -0.8925952f, 0.29888135f, 0.38083088f, + 0.81808794f, 0.85865635f, -1.1060555f, -0.05552877f, -0.78310335f, 0.83281356f, -1.250282f, + 0.67467856f, 0.7669372f, 0.9113869f, -1.6463585f, -0.23402764f, 1.6092131f, 0.42940593f, + 1.2906139f, 1.1860244f, -0.92945826f, 0.0721334f, -0.38174f, -1.7799333f}); test_case.run(); } @@ -4578,17 +4586,17 @@ NGRAPH_TEST(${BACKEND_NAME}, onnx_dropout12_not_const_training_mode) { NGRAPH_TEST(${BACKEND_NAME}, onnx_multiple_slices_last_layer) { std::vector data(1 * 30 * 320 * 320); - std::fill(data.begin(), data.end(), 1); + std::fill(data.begin(), data.end(), 1.f); const auto function = onnx_import::import_onnx_model(file_util::path_join(CommonTestUtils::getExecutableDirectory(), SERIALIZED_ZOO, "onnx/multiple_slices_last_layer.onnx")); auto test_case = test::TestCase(function, s_device); std::vector o1(1 * 320 * 320 * 21); - std::fill(o1.begin(), o1.end(), 1); + std::fill(o1.begin(), o1.end(), 1.f); std::vector o2(1 * 320 * 320 * 9); - std::fill(o2.begin(), o2.end(), 1); + std::fill(o2.begin(), o2.end(), 1.f); test_case.add_input(data); test_case.add_expected_output(Shape{1, 320, 320, 21}, o1); @@ -4613,23 +4621,23 @@ NGRAPH_TEST(${BACKEND_NAME}, onnx_softmax_crossentropy_loss_mean) { "onnx/softmax_crossentropy_loss_mean.onnx")); auto test_case = test::TestCase(function, s_device); - test_case.add_input({0.54881352186203, - 0.7151893377304077, - 0.6027633547782898, - 0.5448831915855408, - 0.42365479469299316, - 0.6458941102027893, - 0.4375872015953064, - 0.891772985458374, - 0.9636627435684204, - 0.3834415078163147, - 0.7917250394821167, - 0.5288949012756348, - 0.5680445432662964, - 0.9255966544151306, - 0.07103605568408966}); + test_case.add_input({0.54881352186203f, + 0.7151893377304077f, + 0.6027633547782898f, + 0.5448831915855408f, + 0.42365479469299316f, + 0.6458941102027893f, + 0.4375872015953064f, + 0.891772985458374f, + 0.9636627435684204f, + 0.3834415078163147f, + 0.7917250394821167f, + 0.5288949012756348f, + 0.5680445432662964f, + 0.9255966544151306f, + 0.07103605568408966f}); test_case.add_input({1, 4, 3}); - test_case.add_expected_output(Shape{}, {1.561384797096252441}); + test_case.add_expected_output(Shape{}, {1.561384797096252441f}); test_case.run(); } @@ -4640,15 +4648,15 @@ NGRAPH_TEST(${BACKEND_NAME}, onnx_negativelog_likelihood_loss) { auto test_case = test::TestCase(function, s_device); test_case.add_input({ - 0.54881352186203, 0.7151893377304077, 0.6027633547782898, 0.5448831915855408, 0.42365479469299316, - 0.6458941102027893, 0.4375872015953064, 0.891772985458374, 0.9636627435684204, 0.3834415078163147, - 0.7917250394821167, 0.5288949012756348, 0.5680445432662964, 0.9255966544151306, 0.07103605568408966, - 0.08712930232286453, 0.020218396559357643, 0.832619845867157, 0.7781567573547363, 0.8700121641159058, - 0.978618323802948, 0.7991585731506348, 0.4614793658256531, 0.7805292010307312, 0.11827442795038223, - 0.6399210095405579, 0.14335328340530396, 0.9446688890457153, 0.5218483209609985, 0.4146619439125061, + 0.54881352186203f, 0.7151893377304077f, 0.6027633547782898f, 0.5448831915855408f, 0.42365479469299316f, + 0.6458941102027893f, 0.4375872015953064f, 0.891772985458374f, 0.9636627435684204f, 0.3834415078163147f, + 0.7917250394821167f, 0.5288949012756348f, 0.5680445432662964f, 0.9255966544151306f, 0.07103605568408966f, + 0.08712930232286453f, 0.020218396559357643f, 0.832619845867157f, 0.7781567573547363f, 0.8700121641159058f, + 0.978618323802948f, 0.7991585731506348f, 0.4614793658256531f, 0.7805292010307312f, 0.11827442795038223f, + 0.6399210095405579f, 0.14335328340530396f, 0.9446688890457153f, 0.5218483209609985f, 0.4146619439125061f, }); test_case.add_input({3, 3, 2, 4, 2, 0}); - test_case.add_expected_output(Shape{}, {-0.531306922435760498}); + test_case.add_expected_output(Shape{}, {-0.531306922435760498f}); test_case.run(); } @@ -4958,19 +4966,19 @@ NGRAPH_TEST(${BACKEND_NAME}, onnx_einsum_sum) { file_util::path_join(CommonTestUtils::getExecutableDirectory(), SERIALIZED_ZOO, "onnx/einsum_sum.onnx")); auto test_case = test::TestCase(function, s_device); test_case.add_input(Shape{3, 4}, - {1.764052345967664, - 0.4001572083672233, - 0.9787379841057392, - 2.240893199201458, - 1.8675579901499675, - -0.977277879876411, - 0.9500884175255894, - -0.1513572082976979, - -0.10321885179355784, - 0.41059850193837233, - 0.144043571160878, - 1.454273506962975}); - test_case.add_expected_output(Shape{3}, {5.3838407376420845, 1.689011319501448, 1.9056967282686674}); + {1.764052345967664f, + 0.4001572083672233f, + 0.9787379841057392f, + 2.240893199201458f, + 1.8675579901499675f, + -0.977277879876411f, + 0.9500884175255894f, + -0.1513572082976979f, + -0.10321885179355784f, + 0.41059850193837233f, + 0.144043571160878f, + 1.454273506962975f}); + test_case.add_expected_output(Shape{3}, {5.3838407376420845f, 1.689011319501448f, 1.9056967282686674f}); test_case.run(); } @@ -5074,7 +5082,7 @@ NGRAPH_TEST(${BACKEND_NAME}, onnx_model_random_uniform) { file_util::path_join(CommonTestUtils::getExecutableDirectory(), SERIALIZED_ZOO, "onnx/random_uniform.onnx")); auto test_case = test::TestCase(function, s_device); - test_case.add_expected_output(Shape{2, 2}, {43.45518, 48.67585, 42.227386, 40.86294}); + test_case.add_expected_output(Shape{2, 2}, {43.45518f, 48.67585f, 42.227386f, 40.86294f}); test_case.run(); } @@ -5085,7 +5093,7 @@ NGRAPH_TEST(${BACKEND_NAME}, onnx_model_random_uniform_like) { auto test_case = test::TestCase(function, s_device); test_case.add_input(Shape{2, 2}, {41, 42, 43, 44}); - test_case.add_expected_output(Shape{2, 2}, {43.45518, 48.67585, 42.227386, 40.86294}); + test_case.add_expected_output(Shape{2, 2}, {43.45518f, 48.67585f, 42.227386f, 40.86294f}); test_case.run(); } @@ -5094,7 +5102,7 @@ NGRAPH_TEST(${BACKEND_NAME}, onnx_model_random_normal) { file_util::path_join(CommonTestUtils::getExecutableDirectory(), SERIALIZED_ZOO, "onnx/random_normal.onnx")); auto test_case = test::TestCase(function, s_device); - test_case.add_expected_output(Shape{2, 2}, {13.459274, 41.75028, -19.311913, 131.79282}); + test_case.add_expected_output(Shape{2, 2}, {13.459274f, 41.75028f, -19.311913f, 131.79282f}); test_case.run(); } @@ -5105,50 +5113,50 @@ NGRAPH_TEST(${BACKEND_NAME}, onnx_model_random_normal_like) { auto test_case = test::TestCase(function, s_device); test_case.add_input(Shape{2, 2}, {0, 0, 0, 0}); - test_case.add_expected_output(Shape{2, 2}, {13.459274, 41.75028, -19.311913, 131.79282}); + test_case.add_expected_output(Shape{2, 2}, {13.459274f, 41.75028f, -19.311913f, 131.79282f}); test_case.run(); } -NGRAPH_TEST(${BACKEND_NAME}, onnx_aten_embedding_bag_packed_sum_2in) { +NGRAPH_TEST(${BACKEND_NAME}, onnx_aten_embedding_bag_packed_sum_2fin) { const auto function = onnx_import::import_onnx_model(file_util::path_join(CommonTestUtils::getExecutableDirectory(), SERIALIZED_ZOO, "onnx/aten_embedding_sum_packed_2in.onnx")); auto test_case = test::TestCase(function, s_device); - test_case.add_input(Shape{5, 2}, {-0.2, -0.6, -0.1, -0.4, -1.9, -1.8, -1., 1.5, 0.8, -0.7}); + test_case.add_input(Shape{5, 2}, {-0.2f, -0.6f, -0.1f, -0.4f, -1.9f, -1.8f, -1.f, 1.5f, 0.8f, -0.7f}); test_case.add_input(Shape{3, 2}, {0, 2, 1, 2, 3, 4}); // indices - test_case.add_expected_output(Shape{3, 2}, {-2.1, -2.4, -2., -2.2, -0.19999999, 0.8}); + test_case.add_expected_output(Shape{3, 2}, {-2.1f, -2.4f, -2.f, -2.2f, -0.19999999f, 0.8f}); test_case.run(); } -NGRAPH_TEST(${BACKEND_NAME}, onnx_aten_embedding_bag_packed_sum_3in_offsets_none) { +NGRAPH_TEST(${BACKEND_NAME}, onnx_aten_embedding_bag_packed_sum_3fin_offsets_none) { const auto function = onnx_import::import_onnx_model(file_util::path_join(CommonTestUtils::getExecutableDirectory(), SERIALIZED_ZOO, "onnx/aten_embedding_sum_packed_3in_offset_none.onnx")); auto test_case = test::TestCase(function, s_device); - test_case.add_input(Shape{5, 2}, {-0.2, -0.6, -0.1, -0.4, -1.9, -1.8, -1., 1.5, 0.8, -0.7}); + test_case.add_input(Shape{5, 2}, {-0.2f, -0.6f, -0.1f, -0.4f, -1.9f, -1.8f, -1.f, 1.5f, 0.8f, -0.7f}); test_case.add_input(Shape{3, 2}, {0, 2, 1, 2, 3, 4}); // indices - test_case.add_expected_output(Shape{3, 2}, {-2.1, -2.4, -2., -2.2, -0.19999999, 0.8}); + test_case.add_expected_output(Shape{3, 2}, {-2.1f, -2.4f, -2.f, -2.2f, -0.19999999f, 0.8f}); test_case.run(); } -NGRAPH_TEST(${BACKEND_NAME}, onnx_aten_embedding_bag_packed_sum_4in_per_sample_weights) { +NGRAPH_TEST(${BACKEND_NAME}, onnx_aten_embedding_bag_packed_sum_4fin_per_sample_weights) { const auto function = onnx_import::import_onnx_model( file_util::path_join(CommonTestUtils::getExecutableDirectory(), SERIALIZED_ZOO, "onnx/aten_embedding_sum_packed_4in_per_sample_weights.onnx")); auto test_case = test::TestCase(function, s_device); - test_case.add_input(Shape{5, 2}, {-0.2, -0.6, -0.1, -0.4, -1.9, -1.8, -1., 1.5, 0.8, -0.7}); - test_case.add_input(Shape{3, 2}, {0, 2, 1, 2, 3, 4}); // indices - test_case.add_input(Shape{3, 2}, {0.5, 0.5, 0.5, 0.5, 0.5, 0.5}); // per_sample_weights + test_case.add_input(Shape{5, 2}, {-0.2f, -0.6f, -0.1f, -0.4f, -1.9f, -1.8f, -1.f, 1.5f, 0.8f, -0.7f}); + test_case.add_input(Shape{3, 2}, {0, 2, 1, 2, 3, 4}); // indices + test_case.add_input(Shape{3, 2}, {0.5f, 0.5f, 0.5f, 0.5f, 0.5f, 0.5f}); // per_sample_weights - test_case.add_expected_output(Shape{3, 2}, {-1.05, -1.2, -1., -1.1, -0.09999999, 0.4}); + test_case.add_expected_output(Shape{3, 2}, {-1.05f, -1.2f, -1.f, -1.1f, -0.09999999f, 0.4f}); test_case.run(); } @@ -5159,10 +5167,10 @@ NGRAPH_TEST(${BACKEND_NAME}, onnx_aten_embedding_bag_packed_sum_4in_two_none) { "onnx/aten_embedding_sum_packed_4in_two_none.onnx")); auto test_case = test::TestCase(function, s_device); - test_case.add_input(Shape{5, 2}, {-0.2, -0.6, -0.1, -0.4, -1.9, -1.8, -1., 1.5, 0.8, -0.7}); + test_case.add_input(Shape{5, 2}, {-0.2f, -0.6f, -0.1f, -0.4f, -1.9f, -1.8f, -1.f, 1.5f, 0.8f, -0.7f}); test_case.add_input(Shape{3, 2}, {0, 2, 1, 2, 3, 4}); // indices - test_case.add_expected_output(Shape{3, 2}, {-2.1, -2.4, -2., -2.2, -0.19999999, 0.8}); + test_case.add_expected_output(Shape{3, 2}, {-2.1f, -2.4f, -2.f, -2.2f, -0.19999999f, 0.8f}); test_case.run(); } @@ -5173,11 +5181,11 @@ NGRAPH_TEST(${BACKEND_NAME}, onnx_aten_embedding_bag_offsets_sum_3in) { "onnx/aten_embedding_sum_offset_3in.onnx")); auto test_case = test::TestCase(function, s_device); - test_case.add_input(Shape{5, 2}, {-0.2, -0.6, -0.1, -0.4, -1.9, -1.8, -1., 1.5, 0.8, -0.7}); + test_case.add_input(Shape{5, 2}, {-0.2f, -0.6f, -0.1f, -0.4f, -1.9f, -1.8f, -1.f, 1.5f, 0.8f, -0.7f}); test_case.add_input(Shape{4}, {0, 2, 3, 4}); // indices test_case.add_input(Shape{3}, {0, 2, 2}); // offsets - test_case.add_expected_output(Shape{3, 2}, {-2.1, -2.4, 0, 0, -0.2, 0.8}); + test_case.add_expected_output(Shape{3, 2}, {-2.1f, -2.4f, 0.f, 0.f, -0.2f, 0.8f}); test_case.run(); } @@ -5188,12 +5196,12 @@ NGRAPH_TEST(${BACKEND_NAME}, onnx_aten_embedding_bag_offsets_sum_4in) { "onnx/aten_embedding_sum_offset_4in.onnx")); auto test_case = test::TestCase(function, s_device); - test_case.add_input(Shape{5, 2}, {-0.2, -0.6, -0.1, -0.4, -1.9, -1.8, -1., 1.5, 0.8, -0.7}); - test_case.add_input(Shape{4}, {0, 2, 3, 4}); // indices - test_case.add_input(Shape{3}, {0, 2, 2}); // offsets - test_case.add_input(Shape{4}, {0.5, 0.5, 0.5, 0.5}); // per_sample_weights + test_case.add_input(Shape{5, 2}, {-0.2f, -0.6f, -0.1f, -0.4f, -1.9f, -1.8f, -1.f, 1.5f, 0.8f, -0.7f}); + test_case.add_input(Shape{4}, {0, 2, 3, 4}); // indices + test_case.add_input(Shape{3}, {0, 2, 2}); // offsets + test_case.add_input(Shape{4}, {0.5f, 0.5f, 0.5f, 0.5f}); // per_sample_weights - test_case.add_expected_output(Shape{3, 2}, {-1.05, -1.2, 0., 0., -0.09999999, 0.4}); + test_case.add_expected_output(Shape{3, 2}, {-1.05f, -1.2f, 0.f, 0.f, -0.09999999f, 0.4f}); test_case.run(); } @@ -5208,11 +5216,11 @@ NGRAPH_TEST(${BACKEND_NAME}, onnx_aten_embedding_bag_many_node_outputs) { EXPECT_EQ(function->get_results().size(), 1); auto test_case = test::TestCase(function, s_device); - test_case.add_input(Shape{5, 2}, {-0.2, -0.6, -0.1, -0.4, -1.9, -1.8, -1., 1.5, 0.8, -0.7}); + test_case.add_input(Shape{5, 2}, {-0.2f, -0.6f, -0.1f, -0.4f, -1.9f, -1.8f, -1.f, 1.5f, 0.8f, -0.7f}); test_case.add_input(Shape{4}, {0, 2, 3, 4}); // indices test_case.add_input(Shape{3}, {0, 2, 2}); // offsets - test_case.add_expected_output(Shape{3, 2}, {-2.1, -2.4, 0, 0, -0.2, 0.8}); + test_case.add_expected_output(Shape{3, 2}, {-2.1f, -2.4f, 0.f, 0.f, -0.2f, 0.8f}); test_case.run(); } @@ -5388,13 +5396,14 @@ NGRAPH_TEST(${BACKEND_NAME}, onnx_scan15_fib_like_input_rev) { auto test_case = test::TestCase(function, s_device); test_case.add_input(Shape{}, {0}); test_case.add_input(Shape{}, {1}); - test_case.add_input(Shape{10}, std::vector{0., 0.1, 0.2, 0.3, 0.4, 0.5, 0.6, 0.7, 0.8, 0.9}); + test_case.add_input(Shape{10}, + std::vector{0.f, 0.1f, 0.2f, 0.3f, 0.4f, 0.5f, 0.6f, 0.7f, 0.8f, 0.9f}); - test_case.add_expected_output(Shape{}, {0.14897026}); - test_case.add_expected_output(Shape{}, {0.}); + test_case.add_expected_output(Shape{}, {0.14897026f}); + test_case.add_expected_output(Shape{}, {0.f}); test_case.add_expected_output( Shape{10}, - {0.9, 1.52, 1.694, 1.9284, 1.8112, 1.4958401, 0.9921121, 0.49759045, 0.14897026, 0.}); + {0.9f, 1.52f, 1.694f, 1.9284f, 1.8112f, 1.4958401f, 0.9921121f, 0.49759045f, 0.14897026f, 0.f}); test_case.run(); } @@ -5407,13 +5416,14 @@ NGRAPH_TEST(${BACKEND_NAME}, onnx_scan15_fib_like_input_out_rev) { auto test_case = test::TestCase(function, s_device); test_case.add_input(Shape{}, {0}); test_case.add_input(Shape{}, {1}); - test_case.add_input(Shape{10}, std::vector{0., 0.1, 0.2, 0.3, 0.4, 0.5, 0.6, 0.7, 0.8, 0.9}); + test_case.add_input(Shape{10}, + std::vector{0.f, 0.1f, 0.2f, 0.3f, 0.4f, 0.5f, 0.6f, 0.7f, 0.8f, 0.9f}); - test_case.add_expected_output(Shape{}, {0.14897026}); + test_case.add_expected_output(Shape{}, {0.14897026f}); test_case.add_expected_output(Shape{}, {0.}); test_case.add_expected_output( Shape{10}, - {0., 0.14897026, 0.49759045, 0.9921121, 1.4958401, 1.8112, 1.9284, 1.694, 1.52, 0.9}); + {0.f, 0.14897026f, 0.49759045f, 0.9921121f, 1.4958401f, 1.8112f, 1.9284f, 1.694f, 1.52f, 0.9f}); test_case.run(); } @@ -5435,27 +5445,29 @@ NGRAPH_TEST(${BACKEND_NAME}, onnx_scan15_ND_mixed_ones) { test_case.run(); } -NGRAPH_TEST(${BACKEND_NAME}, onnx_scan15_ND_mixed_vals) { +NGRAPH_TEST(${BACKEND_NAME}, onnx_scan15f_ND_mixed_vals) { const auto function = onnx_import::import_onnx_model( file_util::path_join(CommonTestUtils::getExecutableDirectory(), SERIALIZED_ZOO, "onnx/scan15_ND_mixed.onnx")); auto test_case = test::TestCase(function, s_device); - test_case.add_input(Shape{1, 3, 2}, {0, 0, 0, 0, 0, 0}); - test_case.add_input(Shape{1, 3, 2}, {1, 1, 1, 1, 1, 1}); - std::vector sequence_vals{0.1, 0.2, 0.3, 0.4, 0.5, 0.6, 0.7, 0.8, 0.9, 1., 1.1, 1.2, 1.3, 1.4, 1.5, - 1.6, 1.7, 1.8, 1.9, 2., 2.1, 2.2, 2.3, 2.4, 2.5, 2.6, 2.7, 2.8, 2.9, 3.}; + test_case.add_input(Shape{1, 3, 2}, {0.f, 0.f, 0.f, 0.f, 0.f, 0.f}); + test_case.add_input(Shape{1, 3, 2}, {1.f, 1.f, 1.f, 1.f, 1.f, 1.f}); + std::vector sequence_vals{0.1f, 0.2f, 0.3f, 0.4f, 0.5f, 0.6f, 0.7f, 0.8f, 0.9f, 1.f, + 1.1f, 1.2f, 1.3f, 1.4f, 1.5f, 1.6f, 1.7f, 1.8f, 1.9f, 2.f, + 2.1f, 2.2f, 2.3f, 2.4f, 2.5f, 2.6f, 2.7f, 2.8f, 2.9f, 3.f}; test_case.add_input(Shape{1, 3, 5, 2}, sequence_vals); // multiply factor (reverse) test_case.add_input(Shape{1, 5, 3, 2}, sequence_vals); // div factor test_case.add_expected_output(Shape{1, 3, 2}, - {2.7327938, 2.1428573, 21.070545, 16.92727, 49.765778, 41.444443}); + {2.7327938f, 2.1428573f, 21.070545f, 16.92727f, 49.765778f, 41.444443f}); test_case.add_expected_output(Shape{1, 3, 2}, - {0.40161943, 0.5274726, 16.80789, 14.025973, 59.98805, 50.518517}); + {0.40161943f, 0.5274726f, 16.80789f, 14.025973f, 59.98805f, 50.518517f}); test_case.add_expected_output( Shape{1, 3, 2, 5}, - {0.40161943, 2.7327938, 7.3076925, 10., 9., 0.5274726, 2.1428573, 4.714286, 6., 5., - 16.80789, 21.070545, 20.185184, 13.851851, 6.333333, 14.025973, 16.92727, 15.799998, 10.799999, 5., - 59.98805, 49.765778, 33.074867, 16.690908, 5.8, 50.518517, 41.444443, 27.444445, 14., 5.}); + {0.40161943f, 2.7327938f, 7.3076925f, 10.f, 9.f, 0.5274726f, 2.1428573f, 4.714286f, + 6.f, 5.f, 16.80789f, 21.070545f, 20.185184f, 13.851851f, 6.333333f, 14.025973f, + 16.92727f, 15.799998f, 10.799999f, 5.f, 59.98805f, 49.765778f, 33.074867f, 16.690908f, + 5.8f, 50.518517f, 41.444443f, 27.444445f, 14.f, 5.f}); test_case.run(); } @@ -5466,22 +5478,24 @@ NGRAPH_TEST(${BACKEND_NAME}, onnx_scan15_ND_mixed_vals_neg_axes) { "onnx/scan15_ND_mixed_neg_axes.onnx")); auto test_case = test::TestCase(function, s_device); - test_case.add_input(Shape{1, 3, 2}, {0, 0, 0, 0, 0, 0}); - test_case.add_input(Shape{1, 3, 2}, {1, 1, 1, 1, 1, 1}); - std::vector sequence_vals{0.1, 0.2, 0.3, 0.4, 0.5, 0.6, 0.7, 0.8, 0.9, 1., 1.1, 1.2, 1.3, 1.4, 1.5, - 1.6, 1.7, 1.8, 1.9, 2., 2.1, 2.2, 2.3, 2.4, 2.5, 2.6, 2.7, 2.8, 2.9, 3.}; + test_case.add_input(Shape{1, 3, 2}, {0.f, 0.f, 0.f, 0.f, 0.f, 0.f}); + test_case.add_input(Shape{1, 3, 2}, {1.f, 1.f, 1.f, 1.f, 1.f, 1.f}); + std::vector sequence_vals{0.1f, 0.2f, 0.3f, 0.4f, 0.5f, 0.6f, 0.7f, 0.8f, 0.9f, 1.f, + 1.1f, 1.2f, 1.3f, 1.4f, 1.5f, 1.6f, 1.7f, 1.8f, 1.9f, 2.f, + 2.1f, 2.2f, 2.3f, 2.4f, 2.5f, 2.6f, 2.7f, 2.8f, 2.9f, 3.f}; test_case.add_input(Shape{1, 3, 5, 2}, sequence_vals); // multiply factor (reverse) test_case.add_input(Shape{1, 5, 3, 2}, sequence_vals); // div factor test_case.add_expected_output(Shape{1, 3, 2}, - {2.7327938, 2.1428573, 21.070545, 16.92727, 49.765778, 41.444443}); + {2.7327938f, 2.1428573f, 21.070545f, 16.92727f, 49.765778f, 41.444443f}); test_case.add_expected_output(Shape{1, 3, 2}, - {0.40161943, 0.5274726, 16.80789, 14.025973, 59.98805, 50.518517}); + {0.40161943f, 0.5274726f, 16.80789f, 14.025973f, 59.98805f, 50.518517f}); test_case.add_expected_output( Shape{1, 3, 2, 5}, - {0.40161943, 2.7327938, 7.3076925, 10., 9., 0.5274726, 2.1428573, 4.714286, 6., 5., - 16.80789, 21.070545, 20.185184, 13.851851, 6.333333, 14.025973, 16.92727, 15.799998, 10.799999, 5., - 59.98805, 49.765778, 33.074867, 16.690908, 5.8, 50.518517, 41.444443, 27.444445, 14., 5.}); + {0.40161943f, 2.7327938f, 7.3076925f, 10.f, 9.f, 0.5274726f, 2.1428573f, 4.714286f, + 6.f, 5.f, 16.80789f, 21.070545f, 20.185184f, 13.851851f, 6.333333f, 14.025973f, + 16.92727f, 15.799998f, 10.799999f, 5.f, 59.98805f, 49.765778f, 33.074867f, 16.690908f, + 5.8f, 50.518517f, 41.444443f, 27.444445f, 14.f, 5.f}); test_case.run(); } @@ -5490,22 +5504,24 @@ NGRAPH_TEST(${BACKEND_NAME}, onnx_scan15_dyn_rank_vals) { file_util::path_join(CommonTestUtils::getExecutableDirectory(), SERIALIZED_ZOO, "onnx/scan15_dyn_rank.onnx")); auto test_case = test::TestCase(function, s_device); - test_case.add_input(Shape{1, 3, 2}, {0, 0, 0, 0, 0, 0}); - test_case.add_input(Shape{1, 3, 2}, {1, 1, 1, 1, 1, 1}); - std::vector sequence_vals{0.1, 0.2, 0.3, 0.4, 0.5, 0.6, 0.7, 0.8, 0.9, 1., 1.1, 1.2, 1.3, 1.4, 1.5, - 1.6, 1.7, 1.8, 1.9, 2., 2.1, 2.2, 2.3, 2.4, 2.5, 2.6, 2.7, 2.8, 2.9, 3.}; + test_case.add_input(Shape{1, 3, 2}, {0.f, 0.f, 0.f, 0.f, 0.f, 0.f}); + test_case.add_input(Shape{1, 3, 2}, {1.f, 1.f, 1.f, 1.f, 1.f, 1.f}); + std::vector sequence_vals{0.1f, 0.2f, 0.3f, 0.4f, 0.5f, 0.6f, 0.7f, 0.8f, 0.9f, 1.f, + 1.1f, 1.2f, 1.3f, 1.4f, 1.5f, 1.6f, 1.7f, 1.8f, 1.9f, 2.f, + 2.1f, 2.2f, 2.3f, 2.4f, 2.5f, 2.6f, 2.7f, 2.8f, 2.9f, 3.f}; test_case.add_input(Shape{1, 3, 5, 2}, sequence_vals); // multiply factor (reverse) test_case.add_input(Shape{1, 5, 3, 2}, sequence_vals); // div factor test_case.add_expected_output(Shape{1, 3, 2}, - {2.7327938, 2.1428573, 21.070545, 16.92727, 49.765778, 41.444443}); + {2.7327938f, 2.1428573f, 21.070545f, 16.92727f, 49.765778f, 41.444443f}); test_case.add_expected_output(Shape{1, 3, 2}, - {0.40161943, 0.5274726, 16.80789, 14.025973, 59.98805, 50.518517}); + {0.40161943f, 0.5274726f, 16.80789f, 14.025973f, 59.98805f, 50.518517f}); test_case.add_expected_output( Shape{1, 3, 2, 5}, - {0.40161943, 2.7327938, 7.3076925, 10., 9., 0.5274726, 2.1428573, 4.714286, 6., 5., - 16.80789, 21.070545, 20.185184, 13.851851, 6.333333, 14.025973, 16.92727, 15.799998, 10.799999, 5., - 59.98805, 49.765778, 33.074867, 16.690908, 5.8, 50.518517, 41.444443, 27.444445, 14., 5.}); + {0.40161943f, 2.7327938f, 7.3076925f, 10.f, 9.f, 0.5274726f, 2.1428573f, 4.714286f, + 6.f, 5.f, 16.80789f, 21.070545f, 20.185184f, 13.851851f, 6.333333f, 14.025973f, + 16.92727f, 15.799998f, 10.799999f, 5.f, 59.98805f, 49.765778f, 33.074867f, 16.690908f, + 5.8f, 50.518517f, 41.444443f, 27.444445f, 14.f, 5.f}); test_case.run(); } @@ -5529,43 +5545,46 @@ NGRAPH_TEST(${BACKEND_NAME}, onnx_scan15_ND_b4_input_rev_vals) { "onnx/scan15_ND_b4_input_rev.onnx")); auto test_case = test::TestCase(function, s_device); - test_case.add_input(Shape{4, 3, 2}, std::vector(24, 0)); - test_case.add_input(Shape{4, 3, 2}, std::vector(24, 1)); + test_case.add_input(Shape{4, 3, 2}, std::vector(24, 0.f)); + test_case.add_input(Shape{4, 3, 2}, std::vector(24, 1.f)); std::vector sequence_vals{ - 0.1, 0.2, 0.3, 0.4, 0.5, 0.6, 0.7, 0.8, 0.9, 1., 1.1, 1.2, 1.3, 1.4, 1.5, 1.6, 1.7, 1.8, - 1.9, 2., 2.1, 2.2, 2.3, 2.4, 2.5, 2.6, 2.7, 2.8, 2.9, 3., 3.1, 3.2, 3.3, 3.4, 3.5, 3.6, - 3.7, 3.8, 3.9, 4., 4.1, 4.2, 4.3, 4.4, 4.5, 4.6, 4.7, 4.8, 4.9, 5., 5.1, 5.2, 5.3, 5.4, - 5.5, 5.6, 5.7, 5.8, 5.9, 6., 6.1, 6.2, 6.3, 6.4, 6.5, 6.6, 6.7, 6.8, 6.9, 7., 7.1, 7.2, - 7.3, 7.4, 7.5, 7.6, 7.7, 7.8, 7.9, 8., 8.1, 8.2, 8.3, 8.4, 8.5, 8.6, 8.7, 8.8, 8.9, 9., - 9.1, 9.2, 9.3, 9.4, 9.5, 9.6, 9.7, 9.8, 9.9, 10., 10.1, 10.2, 10.3, 10.4, 10.5, 10.6, 10.7, 10.8, - 10.9, 11., 11.1, 11.2, 11.3, 11.4, 11.5, 11.6, 11.7, 11.8, 11.9, 12.}; - test_case.add_input(Shape{4, 5, 3, 2}, sequence_vals); // multiply factor (reverse) + 0.1f, 0.2f, 0.3f, 0.4f, 0.5f, 0.6f, 0.7f, 0.8f, 0.9f, 1.f, 1.1f, 1.2f, 1.3f, 1.4f, 1.5f, + 1.6f, 1.7f, 1.8f, 1.9f, 2.f, 2.1f, 2.2f, 2.3f, 2.4f, 2.5f, 2.6f, 2.7f, 2.8f, 2.9f, 3.f, + 3.1f, 3.2f, 3.3f, 3.4f, 3.5f, 3.6f, 3.7f, 3.8f, 3.9f, 4.f, 4.1f, 4.2f, 4.3f, 4.4f, 4.5f, + 4.6f, 4.7f, 4.8f, 4.9f, 5.f, 5.1f, 5.2f, 5.3f, 5.4f, 5.5f, 5.6f, 5.7f, 5.8f, 5.9f, 6.f, + 6.1f, 6.2f, 6.3f, 6.4f, 6.5f, 6.6f, 6.7f, 6.8f, 6.9f, 7.f, 7.1f, 7.2f, 7.3f, 7.4f, 7.5f, + 7.6f, 7.7f, 7.8f, 7.9f, 8.f, 8.1f, 8.2f, 8.3f, 8.4f, 8.5f, 8.6f, 8.7f, 8.8f, 8.9f, 9.f, + 9.1f, 9.2f, 9.3f, 9.4f, 9.5f, 9.6f, 9.7f, 9.8f, 9.9f, 10.f, 10.1f, 10.2f, 10.3f, 10.4f, 10.5f, + 10.6f, 10.7f, 10.8f, 10.9f, 11.f, 11.1f, 11.2f, 11.3f, 11.4f, 11.5f, 11.6f, 11.7f, 11.8f, 11.9f, 12.f}; + test_case.add_input(Shape{4, 5, 3, 2}, sequence_vals); // multiply factor (areverse) test_case.add_input(Shape{4, 5, 3, 2}, sequence_vals); // div factor test_case.add_expected_output( Shape{4, 3, 2}, - {61.210526, 33.2, 23.857145, 19.181818, 16.373913, 14.5, 6.8880844, 6.83, - 6.7754016, 6.7239814, 6.6754713, 6.6296296, 5.9686656, 5.953226, 5.9382715, 5.9237804, - 5.9097314, 5.896105, 5.652082, 5.645059, 5.638186, 5.6314588, 5.624872, 5.618421}); + {61.210526f, 33.2f, 23.857145f, 19.181818f, 16.373913f, 14.5f, 6.8880844f, 6.83f, + 6.7754016f, 6.7239814f, 6.6754713f, 6.6296296f, 5.9686656f, 5.953226f, 5.9382715f, 5.9237804f, + 5.9097314f, 5.896105f, 5.652082f, 5.645059f, 5.638186f, 5.6314588f, 5.624872f, 5.618421f}); test_case.add_expected_output( Shape{4, 3, 2}, - {6.271278, 6.2461543, 6.2433867, 6.2545457, 6.2744985, 6.3, 6.9531364, 6.970527, - 6.987378, 7.003712, 7.019554, 7.034921, 7.30868, 7.3164845, 7.324116, 7.3315806, - 7.338885, 7.346032, 7.485426, 7.489783, 7.494067, 7.49828, 7.5024257, 7.506502}); + {6.271278f, 6.2461543f, 6.2433867f, 6.2545457f, 6.2744985f, 6.3f, 6.9531364f, 6.970527f, + 6.987378f, 7.003712f, 7.019554f, 7.034921f, 7.30868f, 7.3164845f, 7.324116f, 7.3315806f, + 7.338885f, 7.346032f, 7.485426f, 7.489783f, 7.494067f, 7.49828f, 7.5024257f, 7.506502f}); test_case.add_expected_output( Shape{5, 4, 3, 2}, - {25., 13., 9., 7., 5.8, 5., 1.7741936, 1.75, 1.7272727, 1.7058823, - 1.6857144, 1.6666667, 1.3934426, 1.3870969, 1.3809522, 1.375, 1.3692307, 1.3636364, 1.2637362, 1.2608696, - 1.2580644, 1.2553192, 1.2526315, 1.25, 70.57143, 35., 23.333334, 17.6, 14.218181, 12., - 3.6739323, 3.618421, 3.5664334, 3.5176468, 3.471777, 3.4285717, 2.822119, 2.8083491, 2.7950313, 2.7821426, - 2.7696643, 2.757576, 2.543786, 2.5377107, 2.5317693, 2.5259573, 2.520271, 2.514706, 95.57143, 47.999996, - 32.333336, 24.6, 20.01818, 17., 5.448126, 5.368421, 5.293706, 5.223529, 5.157491, 5.0952387, - 4.215562, 4.195446, 4.1759834, 4.1571426, 4.138895, 4.1212125, 3.8075223, 3.7985802, 3.7898335, 3.7812767, - 3.7729027, 3.764706, 61.210526, 33.2, 23.857145, 19.181818, 16.373913, 14.5, 6.8880844, 6.83, - 6.7754016, 6.7239814, 6.6754713, 6.6296296, 5.9686656, 5.953226, 5.9382715, 5.9237804, 5.9097314, 5.896105, - 5.652082, 5.645059, 5.638186, 5.6314588, 5.624872, 5.618421, 6.271278, 6.2461543, 6.2433867, 6.2545457, - 6.2744985, 6.3, 6.9531364, 6.970527, 6.987378, 7.003712, 7.019554, 7.034921, 7.30868, 7.3164845, - 7.324116, 7.3315806, 7.338885, 7.346032, 7.485426, 7.489783, 7.494067, 7.49828, 7.5024257, 7.506502}); + {25.f, 13.f, 9.f, 7.f, 5.8f, 5.f, 1.7741936f, 1.75f, 1.7272727f, + 1.7058823f, 1.6857144f, 1.6666667f, 1.3934426f, 1.3870969f, 1.3809522f, 1.375f, 1.3692307f, 1.3636364f, + 1.2637362f, 1.2608696f, 1.2580644f, 1.2553192f, 1.2526315f, 1.25f, 70.57143f, 35.f, 23.333334f, + 17.6f, 14.218181f, 12.f, 3.6739323f, 3.618421f, 3.5664334f, 3.5176468f, 3.471777f, 3.4285717f, + 2.822119f, 2.8083491f, 2.7950313f, 2.7821426f, 2.7696643f, 2.757576f, 2.543786f, 2.5377107f, 2.5317693f, + 2.5259573f, 2.520271f, 2.514706f, 95.57143f, 47.999996f, 32.333336f, 24.6f, 20.01818f, 17.f, + 5.448126f, 5.368421f, 5.293706f, 5.223529f, 5.157491f, 5.0952387f, 4.215562f, 4.195446f, 4.1759834f, + 4.1571426f, 4.138895f, 4.1212125f, 3.8075223f, 3.7985802f, 3.7898335f, 3.7812767f, 3.7729027f, 3.764706f, + 61.210526f, 33.2f, 23.857145f, 19.181818f, 16.373913f, 14.5f, 6.8880844f, 6.83f, 6.7754016f, + 6.7239814f, 6.6754713f, 6.6296296f, 5.9686656f, 5.953226f, 5.9382715f, 5.9237804f, 5.9097314f, 5.896105f, + 5.652082f, 5.645059f, 5.638186f, 5.6314588f, 5.624872f, 5.618421f, 6.271278f, 6.2461543f, 6.2433867f, + 6.2545457f, 6.2744985f, 6.3f, 6.9531364f, 6.970527f, 6.987378f, 7.003712f, 7.019554f, 7.034921f, + 7.30868f, 7.3164845f, 7.324116f, 7.3315806f, 7.338885f, 7.346032f, 7.485426f, 7.489783f, 7.494067f, + 7.49828f, 7.5024257f, 7.506502f}); test_case.run(); } @@ -5600,43 +5619,46 @@ NGRAPH_TEST(${BACKEND_NAME}, onnx_scan8_ND_b4_input_rev_vals) { "onnx/scan8_ND_b4_input_rev.onnx")); auto test_case = test::TestCase(function, s_device); - test_case.add_input(Shape{4, 3, 2}, std::vector(24, 0)); - test_case.add_input(Shape{4, 3, 2}, std::vector(24, 1)); + test_case.add_input(Shape{4, 3, 2}, std::vector(24, 0.f)); + test_case.add_input(Shape{4, 3, 2}, std::vector(24, 1.f)); std::vector sequence_vals{ - 0.1, 0.2, 0.3, 0.4, 0.5, 0.6, 0.7, 0.8, 0.9, 1., 1.1, 1.2, 1.3, 1.4, 1.5, 1.6, 1.7, 1.8, - 1.9, 2., 2.1, 2.2, 2.3, 2.4, 2.5, 2.6, 2.7, 2.8, 2.9, 3., 3.1, 3.2, 3.3, 3.4, 3.5, 3.6, - 3.7, 3.8, 3.9, 4., 4.1, 4.2, 4.3, 4.4, 4.5, 4.6, 4.7, 4.8, 4.9, 5., 5.1, 5.2, 5.3, 5.4, - 5.5, 5.6, 5.7, 5.8, 5.9, 6., 6.1, 6.2, 6.3, 6.4, 6.5, 6.6, 6.7, 6.8, 6.9, 7., 7.1, 7.2, - 7.3, 7.4, 7.5, 7.6, 7.7, 7.8, 7.9, 8., 8.1, 8.2, 8.3, 8.4, 8.5, 8.6, 8.7, 8.8, 8.9, 9., - 9.1, 9.2, 9.3, 9.4, 9.5, 9.6, 9.7, 9.8, 9.9, 10., 10.1, 10.2, 10.3, 10.4, 10.5, 10.6, 10.7, 10.8, - 10.9, 11., 11.1, 11.2, 11.3, 11.4, 11.5, 11.6, 11.7, 11.8, 11.9, 12.}; + 0.1f, 0.2f, 0.3f, 0.4f, 0.5f, 0.6f, 0.7f, 0.8f, 0.9f, 1.f, 1.1f, 1.2f, 1.3f, 1.4f, 1.5f, + 1.6f, 1.7f, 1.8f, 1.9f, 2.f, 2.1f, 2.2f, 2.3f, 2.4f, 2.5f, 2.6f, 2.7f, 2.8f, 2.9f, 3.f, + 3.1f, 3.2f, 3.3f, 3.4f, 3.5f, 3.6f, 3.7f, 3.8f, 3.9f, 4.f, 4.1f, 4.2f, 4.3f, 4.4f, 4.5f, + 4.6f, 4.7f, 4.8f, 4.9f, 5.f, 5.1f, 5.2f, 5.3f, 5.4f, 5.5f, 5.6f, 5.7f, 5.8f, 5.9f, 6.f, + 6.1f, 6.2f, 6.3f, 6.4f, 6.5f, 6.6f, 6.7f, 6.8f, 6.9f, 7.f, 7.1f, 7.2f, 7.3f, 7.4f, 7.5f, + 7.6f, 7.7f, 7.8f, 7.9f, 8.f, 8.1f, 8.2f, 8.3f, 8.4f, 8.5f, 8.6f, 8.7f, 8.8f, 8.9f, 9.f, + 9.1f, 9.2f, 9.3f, 9.4f, 9.5f, 9.6f, 9.7f, 9.8f, 9.9f, 10.f, 10.1f, 10.2f, 10.3f, 10.4f, 10.5f, + 10.6f, 10.7f, 10.8f, 10.9f, 11.f, 11.1f, 11.2f, 11.3f, 11.4f, 11.5f, 11.6f, 11.7f, 11.8f, 11.9f, 12.f}; test_case.add_input(Shape{4, 5, 3, 2}, sequence_vals); // multiply factor (reverse) test_case.add_input(Shape{4, 5, 3, 2}, sequence_vals); // div factor test_case.add_expected_output( Shape{4, 3, 2}, - {61.210526, 33.2, 23.857145, 19.181818, 16.373913, 14.5, 6.8880844, 6.83, - 6.7754016, 6.7239814, 6.6754713, 6.6296296, 5.9686656, 5.953226, 5.9382715, 5.9237804, - 5.9097314, 5.896105, 5.652082, 5.645059, 5.638186, 5.6314588, 5.624872, 5.618421}); + {61.210526f, 33.2f, 23.857145f, 19.181818f, 16.373913f, 14.5f, 6.8880844f, 6.83f, + 6.7754016f, 6.7239814f, 6.6754713f, 6.6296296f, 5.9686656f, 5.953226f, 5.9382715f, 5.9237804f, + 5.9097314f, 5.896105f, 5.652082f, 5.645059f, 5.638186f, 5.6314588f, 5.624872f, 5.618421f}); test_case.add_expected_output( Shape{4, 3, 2}, - {6.271278, 6.2461543, 6.2433867, 6.2545457, 6.2744985, 6.3, 6.9531364, 6.970527, - 6.987378, 7.003712, 7.019554, 7.034921, 7.30868, 7.3164845, 7.324116, 7.3315806, - 7.338885, 7.346032, 7.485426, 7.489783, 7.494067, 7.49828, 7.5024257, 7.506502}); + {6.271278f, 6.2461543f, 6.2433867f, 6.2545457f, 6.2744985f, 6.3f, 6.9531364f, 6.970527f, + 6.987378f, 7.003712f, 7.019554f, 7.034921f, 7.30868f, 7.3164845f, 7.324116f, 7.3315806f, + 7.338885f, 7.346032f, 7.485426f, 7.489783f, 7.494067f, 7.49828f, 7.5024257f, 7.506502f}); test_case.add_expected_output( Shape{4, 5, 3, 2}, - {25., 13., 9., 7., 5.8, 5., 70.57143, 35., 23.333334, 17.6, - 14.218181, 12., 95.57143, 47.999996, 32.333336, 24.6, 20.01818, 17., 61.210526, 33.2, - 23.857145, 19.181818, 16.373913, 14.5, 6.271278, 6.2461543, 6.2433867, 6.2545457, 6.2744985, 6.3, - 1.7741936, 1.75, 1.7272727, 1.7058823, 1.6857144, 1.6666667, 3.6739323, 3.618421, 3.5664334, 3.5176468, - 3.471777, 3.4285717, 5.448126, 5.368421, 5.293706, 5.223529, 5.157491, 5.0952387, 6.8880844, 6.83, - 6.7754016, 6.7239814, 6.6754713, 6.6296296, 6.9531364, 6.970527, 6.987378, 7.003712, 7.019554, 7.034921, - 1.3934426, 1.3870969, 1.3809522, 1.375, 1.3692307, 1.3636364, 2.822119, 2.8083491, 2.7950313, 2.7821426, - 2.7696643, 2.757576, 4.215562, 4.195446, 4.1759834, 4.1571426, 4.138895, 4.1212125, 5.9686656, 5.953226, - 5.9382715, 5.9237804, 5.9097314, 5.896105, 7.30868, 7.3164845, 7.324116, 7.3315806, 7.338885, 7.346032, - 1.2637362, 1.2608696, 1.2580644, 1.2553192, 1.2526315, 1.25, 2.543786, 2.5377107, 2.5317693, 2.5259573, - 2.520271, 2.514706, 3.8075223, 3.7985802, 3.7898335, 3.7812767, 3.7729027, 3.764706, 5.652082, 5.645059, - 5.638186, 5.6314588, 5.624872, 5.618421, 7.485426, 7.489783, 7.494067, 7.49828, 7.5024257, 7.506502}); + {25.f, 13.f, 9.f, 7.f, 5.8f, 5.f, 70.57143f, 35.f, 23.333334f, + 17.6f, 14.218181f, 12.f, 95.57143f, 47.999996f, 32.333336f, 24.6f, 20.01818f, 17.f, + 61.210526f, 33.2f, 23.857145f, 19.181818f, 16.373913f, 14.5f, 6.271278f, 6.2461543f, 6.2433867f, + 6.2545457f, 6.2744985f, 6.3f, 1.7741936f, 1.75f, 1.7272727f, 1.7058823f, 1.6857144f, 1.6666667f, + 3.6739323f, 3.618421f, 3.5664334f, 3.5176468f, 3.471777f, 3.4285717f, 5.448126f, 5.368421f, 5.293706f, + 5.223529f, 5.157491f, 5.0952387f, 6.8880844f, 6.83f, 6.7754016f, 6.7239814f, 6.6754713f, 6.6296296f, + 6.9531364f, 6.970527f, 6.987378f, 7.003712f, 7.019554f, 7.034921f, 1.3934426f, 1.3870969f, 1.3809522f, + 1.375f, 1.3692307f, 1.3636364f, 2.822119f, 2.8083491f, 2.7950313f, 2.7821426f, 2.7696643f, 2.757576f, + 4.215562f, 4.195446f, 4.1759834f, 4.1571426f, 4.138895f, 4.1212125f, 5.9686656f, 5.953226f, 5.9382715f, + 5.9237804f, 5.9097314f, 5.896105f, 7.30868f, 7.3164845f, 7.324116f, 7.3315806f, 7.338885f, 7.346032f, + 1.2637362f, 1.2608696f, 1.2580644f, 1.2553192f, 1.2526315f, 1.25f, 2.543786f, 2.5377107f, 2.5317693f, + 2.5259573f, 2.520271f, 2.514706f, 3.8075223f, 3.7985802f, 3.7898335f, 3.7812767f, 3.7729027f, 3.764706f, + 5.652082f, 5.645059f, 5.638186f, 5.6314588f, 5.624872f, 5.618421f, 7.485426f, 7.489783f, 7.494067f, + 7.49828f, 7.5024257f, 7.506502f}); test_case.run(); } @@ -5658,10 +5680,18 @@ NGRAPH_TEST(${BACKEND_NAME}, onnx_model_softsign) { auto model = onnx_import::import_onnx_model( file_util::path_join(CommonTestUtils::getExecutableDirectory(), SERIALIZED_ZOO, "onnx/softsign.onnx")); - Inputs inputs{std::vector{1.0, 0.1, 20.0, 12.0, -12.0, -0.2, 0.5, 100.0, 0.0, -1.0}}; + Inputs inputs{std::vector{1.0f, 0.1f, 20.0f, 12.0f, -12.0f, -0.2f, 0.5f, 100.0f, 0.0f, -1.0f}}; - std::vector - output{0.5, 0.09090909, 0.95238096, 0.9230769, -0.9230769, -0.16666666, 0.33333334, 0.990099, 0., -0.5}; + std::vector output{0.5f, + 0.09090909f, + 0.95238096f, + 0.9230769f, + -0.9230769f, + -0.16666666f, + 0.33333334f, + 0.990099f, + 0.f, + -0.5f}; auto test_case = test::TestCase(model, s_device); test_case.add_multiple_inputs(inputs); @@ -6215,7 +6245,7 @@ NGRAPH_TEST(${BACKEND_NAME}, castlike_float16_to_uint32) { auto test_case = test::TestCase(function, s_device); - test_case.add_input(Shape{1, 1, 2, 2}, std::vector{1.5, 2.3, 3, 4}); + test_case.add_input(Shape{1, 1, 2, 2}, std::vector{1.5f, 2.3f, 3.f, 4.f}); test_case.add_input(Shape{4}, {1, 2, 3, 4}); test_case.add_expected_output(std::vector{1, 2, 3, 4}); @@ -6229,7 +6259,7 @@ NGRAPH_TEST(${BACKEND_NAME}, castlike_float16_to_int64) { auto test_case = test::TestCase(function, s_device); - test_case.add_input(Shape{1, 1, 2, 2}, std::vector{1.5, 2.3, 3, 4}); + test_case.add_input(Shape{1, 1, 2, 2}, std::vector{1.5f, 2.3f, 3.f, 4.f}); test_case.add_input(Shape{4}, {1, 2, 3, 4}); test_case.add_expected_output(std::vector{1, 2, 3, 4}); @@ -6287,7 +6317,7 @@ NGRAPH_TEST(${BACKEND_NAME}, castlike_int32_to_float) { test_case.add_input(Shape{1, 1, 2, 2}, std::vector{-1, 2, 3, 4}); test_case.add_input(Shape{4}, {1, 2, 3, 4}); - test_case.add_expected_output(std::vector{-1.0, 2.0, 3.0, 4.0}); + test_case.add_expected_output(std::vector{-1.0f, 2.0f, 3.0f, 4.0f}); test_case.run(); } @@ -6299,7 +6329,7 @@ NGRAPH_TEST(${BACKEND_NAME}, castlike_float64_to_int32) { auto test_case = test::TestCase(function, s_device); - test_case.add_input(Shape{1, 1, 2, 2}, std::vector{-107374.9876543, -2.2, 3.3, 4.4}); + test_case.add_input(Shape{1, 1, 2, 2}, std::vector{-107374.9876543f, -2.2f, 3.3f, 4.4f}); test_case.add_input(Shape{4}, {1, 2, 3, 4}); test_case.add_expected_output(std::vector{-107374, -2, 3, 4}); @@ -6313,9 +6343,13 @@ NGRAPH_TEST(${BACKEND_NAME}, DISABLED_castlike_float32_to_bfloat16) { auto test_case = test::TestCase(function, s_device); - test_case.add_input(Shape{3, 4}, std::vector{121.5, 122.7, 3, 4, 5, 6, 7, 8.8, 9, 10, 11, 12}); - test_case.add_input(Shape{3, 4}, {1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12}); - test_case.add_expected_output(std::vector{121.5, 122.7, 3, 4, 5, 6, 7, 8.8, 9, 10, 11, 12}); + test_case.add_input( + Shape{3, 4}, + std::vector{121.5f, 122.7f, 3.f, 4.f, 5.f, 6.f, 7.f, 8.8f, 9.f, 10.f, 11.f, 12.f}); + test_case.add_input(Shape{3, 4}, + {1.5f, 2.5f, 3.5f, 4.5f, 5.5f, 6.5f, 7.5f, 8.5f, 9.5f, 10.5f, 11.5f, 12.5f}); + test_case.add_expected_output( + std::vector{121.5f, 122.7f, 3.f, 4.f, 5.f, 6.f, 7.f, 8.8f, 9.f, 10.f, 11.f, 12.f}); test_case.run(); } @@ -6327,9 +6361,12 @@ NGRAPH_TEST(${BACKEND_NAME}, DISABLED_castlike_bfloat16_to_float32) { auto test_case = test::TestCase(function, s_device); - test_case.add_input(Shape{3, 4}, std::vector{121.5, 122.7, 3, 4, 5, 6, 7, 8.8, 9, 10, 11, 12}); + test_case.add_input( + Shape{3, 4}, + std::vector{121.5f, 122.7f, 3.f, 4.f, 5.f, 6.f, 7.f, 8.8f, 9.f, 10.f, 11.f, 12.f}); test_case.add_input(Shape{3, 4}, {1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12}); - test_case.add_expected_output(std::vector{121.5, 122.7, 3, 4, 5, 6, 7, 8.75, 9, 10, 11, 12}); + test_case.add_expected_output( + std::vector{121.5f, 122.7f, 3.f, 4.f, 5.f, 6.f, 7.f, 8.75f, 9.f, 10.f, 11.f, 12.f}); test_case.run(); } diff --git a/src/frontends/onnx/tests/onnx_import_com_microsoft.in.cpp b/src/frontends/onnx/tests/onnx_import_com_microsoft.in.cpp index 45e4876998acc5..e231334cfc87e8 100644 --- a/src/frontends/onnx/tests/onnx_import_com_microsoft.in.cpp +++ b/src/frontends/onnx/tests/onnx_import_com_microsoft.in.cpp @@ -33,19 +33,27 @@ NGRAPH_TEST(${BACKEND_NAME}, onnx_model_bias_gelu) { "onnx/com.microsoft/bias_gelu.onnx")); auto test_case = test::TestCase(function, s_device); - test_case.add_input({0.5488135, - 0.71518934, - 0.60276335, - 0.5448832, - 0.4236548, - 0.6458941, - 0.4375872, - 0.891773, - 0.96366274, - 0.3834415}); - test_case.add_input({0.79172504, 0.5288949, 0.56804454, 0.92559665, 0.07103606}); - test_case.add_expected_output( - {1.2198428, 1.1112978, 1.0293297, 1.366493, 0.3411342, 1.329408, 0.8051748, 1.354462, 1.8336612, 0.3068893}); + test_case.add_input({0.5488135f, + 0.71518934f, + 0.60276335f, + 0.5448832f, + 0.4236548f, + 0.6458941f, + 0.4375872f, + 0.891773f, + 0.96366274f, + 0.3834415f}); + test_case.add_input({0.79172504f, 0.5288949f, 0.56804454f, 0.92559665f, 0.07103606f}); + test_case.add_expected_output({1.2198428f, + 1.1112978f, + 1.0293297f, + 1.366493f, + 0.3411342f, + 1.329408f, + 0.8051748f, + 1.354462f, + 1.8336612f, + 0.3068893f}); test_case.run(); } @@ -56,19 +64,19 @@ NGRAPH_TEST(${BACKEND_NAME}, onnx_model_skip_layer_normalization_with_gamma_beta "onnx/com.microsoft/skip_layer_normalization_with_gamma_beta_bias.onnx")); std::vector input = { - 0.54881352, 0.71518934, 0.60276335, 0.54488319, 0.42365479, 0.64589411, 0.43758720, 0.89177299, - 0.96366274, 0.38344151, 0.79172504, 0.52889490, 0.56804454, 0.92559665, 0.07103606, 0.08712930, - 0.02021840, 0.83261985, 0.77815676, 0.87001216, 0.97861832, 0.79915857, 0.46147937, 0.78052920, + 0.54881352f, 0.71518934f, 0.60276335f, 0.54488319f, 0.42365479f, 0.64589411f, 0.43758720f, 0.89177299f, + 0.96366274f, 0.38344151f, 0.79172504f, 0.52889490f, 0.56804454f, 0.92559665f, 0.07103606f, 0.08712930f, + 0.02021840f, 0.83261985f, 0.77815676f, 0.87001216f, 0.97861832f, 0.79915857f, 0.46147937f, 0.78052920f, }; std::vector skip = { - 0.11827443, 0.63992101, 0.14335328, 0.94466889, 0.52184832, 0.41466194, 0.26455560, 0.77423370, - 0.45615032, 0.56843394, 0.01878980, 0.61763549, 0.61209571, 0.61693400, 0.94374806, 0.68182027, - 0.35950789, 0.43703195, 0.69763118, 0.06022547, 0.66676670, 0.67063785, 0.21038257, 0.12892629, + 0.11827443f, 0.63992101f, 0.14335328f, 0.94466889f, 0.52184832f, 0.41466194f, 0.26455560f, 0.77423370f, + 0.45615032f, 0.56843394f, 0.01878980f, 0.61763549f, 0.61209571f, 0.61693400f, 0.94374806f, 0.68182027f, + 0.35950789f, 0.43703195f, 0.69763118f, 0.06022547f, 0.66676670f, 0.67063785f, 0.21038257f, 0.12892629f, }; std::vector expected = { - -0.19721794, -0.42944565, 0.18620640, 0.61282152, -0.11097327, -0.59518522, 0.13393641, 0.66901535, - 0.04256713, -0.71902490, 0.23107991, 0.17300847, -0.04390603, -0.31109563, 0.51021838, -0.66914201, - -0.20009395, -0.43313017, 0.67281967, -0.01712347, 0.09767530, -0.43024653, -0.01836969, -0.29238200, + -0.19721794f, -0.42944565f, 0.18620640f, 0.61282152f, -0.11097327f, -0.59518522f, 0.13393641f, 0.66901535f, + 0.04256713f, -0.71902490f, 0.23107991f, 0.17300847f, -0.04390603f, -0.31109563f, 0.51021838f, -0.66914201f, + -0.20009395f, -0.43313017f, 0.67281967f, -0.01712347f, 0.09767530f, -0.43024653f, -0.01836969f, -0.29238200f, }; auto test_case = test::TestCase(function, s_device); test_case.add_input(input); @@ -84,19 +92,19 @@ NGRAPH_TEST(${BACKEND_NAME}, onnx_model_skip_layer_normalization_with_gamma_beta "onnx/com.microsoft/skip_layer_normalization_with_gamma_beta.onnx")); std::vector input = { - 0.54881352, 0.71518934, 0.60276335, 0.54488319, 0.42365479, 0.64589411, 0.43758720, 0.89177299, - 0.96366274, 0.38344151, 0.79172504, 0.52889490, 0.56804454, 0.92559665, 0.07103606, 0.08712930, - 0.02021840, 0.83261985, 0.77815676, 0.87001216, 0.97861832, 0.79915857, 0.46147937, 0.78052920, + 0.54881352f, 0.71518934f, 0.60276335f, 0.54488319f, 0.42365479f, 0.64589411f, 0.43758720f, 0.89177299f, + 0.96366274f, 0.38344151f, 0.79172504f, 0.52889490f, 0.56804454f, 0.92559665f, 0.07103606f, 0.08712930f, + 0.02021840f, 0.83261985f, 0.77815676f, 0.87001216f, 0.97861832f, 0.79915857f, 0.46147937f, 0.78052920f, }; std::vector skip = { - 0.11827443, 0.63992101, 0.14335328, 0.94466889, 0.52184832, 0.41466194, 0.26455560, 0.77423370, - 0.45615032, 0.56843394, 0.01878980, 0.61763549, 0.61209571, 0.61693400, 0.94374806, 0.68182027, - 0.35950789, 0.43703195, 0.69763118, 0.06022547, 0.66676670, 0.67063785, 0.21038257, 0.12892629, + 0.11827443f, 0.63992101f, 0.14335328f, 0.94466889f, 0.52184832f, 0.41466194f, 0.26455560f, 0.77423370f, + 0.45615032f, 0.56843394f, 0.01878980f, 0.61763549f, 0.61209571f, 0.61693400f, 0.94374806f, 0.68182027f, + 0.35950789f, 0.43703195f, 0.69763118f, 0.06022547f, 0.66676670f, 0.67063785f, 0.21038257f, 0.12892629f, }; std::vector expected = { - -0.17974678, -0.23946194, -0.04376268, 0.46959469, -0.11171167, -0.41859278, -0.11082965, 0.64513868, - 0.07773457, -0.51403606, -0.13661698, 0.11262375, -0.05096011, -0.10416907, 0.10070466, -0.50876135, - -0.22290939, -0.27663514, 0.55416691, -0.08064821, 0.04857478, -0.25121087, -0.15912610, -0.26637587, + -0.17974678f, -0.23946194f, -0.04376268f, 0.46959469f, -0.11171167f, -0.41859278f, -0.11082965f, 0.64513868f, + 0.07773457f, -0.51403606f, -0.13661698f, 0.11262375f, -0.05096011f, -0.10416907f, 0.10070466f, -0.50876135f, + -0.22290939f, -0.27663514f, 0.55416691f, -0.08064821f, 0.04857478f, -0.25121087f, -0.15912610f, -0.26637587f, }; auto test_case = test::TestCase(function, s_device); test_case.add_input(input); @@ -112,19 +120,19 @@ NGRAPH_TEST(${BACKEND_NAME}, onnx_model_skip_layer_normalization_with_gamma) { "onnx/com.microsoft/skip_layer_normalization_with_gamma.onnx")); std::vector input = { - 0.54881352, 0.71518934, 0.60276335, 0.54488319, 0.42365479, 0.64589411, 0.43758720, 0.89177299, - 0.96366274, 0.38344151, 0.79172504, 0.52889490, 0.56804454, 0.92559665, 0.07103606, 0.08712930, - 0.02021840, 0.83261985, 0.77815676, 0.87001216, 0.97861832, 0.79915857, 0.46147937, 0.78052920, + 0.54881352f, 0.71518934f, 0.60276335f, 0.54488319f, 0.42365479f, 0.64589411f, 0.43758720f, 0.89177299f, + 0.96366274f, 0.38344151f, 0.79172504f, 0.52889490f, 0.56804454f, 0.92559665f, 0.07103606f, 0.08712930f, + 0.02021840f, 0.83261985f, 0.77815676f, 0.87001216f, 0.97861832f, 0.79915857f, 0.46147937f, 0.78052920f, }; std::vector skip = { - 0.11827443, 0.63992101, 0.14335328, 0.94466889, 0.52184832, 0.41466194, 0.26455560, 0.77423370, - 0.45615032, 0.56843394, 0.01878980, 0.61763549, 0.61209571, 0.61693400, 0.94374806, 0.68182027, - 0.35950789, 0.43703195, 0.69763118, 0.06022547, 0.66676670, 0.67063785, 0.21038257, 0.12892629, + 0.11827443f, 0.63992101f, 0.14335328f, 0.94466889f, 0.52184832f, 0.41466194f, 0.26455560f, 0.77423370f, + 0.45615032f, 0.56843394f, 0.01878980f, 0.61763549f, 0.61209571f, 0.61693400f, 0.94374806f, 0.68182027f, + 0.35950789f, 0.43703195f, 0.69763118f, 0.06022547f, 0.66676670f, 0.67063785f, 0.21038257f, 0.12892629f, }; std::vector expected = { - -0.10974677, 0.16053806, -0.26376268, 0.46959469, -0.04171166, -0.01859277, -0.33082965, 0.64513868, - 0.14773457, -0.11403608, -0.35661697, 0.11262375, 0.01903989, 0.29583094, -0.11929534, -0.50876135, - -0.15290938, 0.12336487, 0.33416691, -0.08064821, 0.11857478, 0.14878914, -0.37912610, -0.26637587, + -0.10974677f, 0.16053806f, -0.26376268f, 0.46959469f, -0.04171166f, -0.01859277f, -0.33082965f, 0.64513868f, + 0.14773457f, -0.11403608f, -0.35661697f, 0.11262375f, 0.01903989f, 0.29583094f, -0.11929534f, -0.50876135f, + -0.15290938f, 0.12336487f, 0.33416691f, -0.08064821f, 0.11857478f, 0.14878914f, -0.37912610f, -0.26637587f, }; auto test_case = test::TestCase(function, s_device); test_case.add_input(input); @@ -140,37 +148,37 @@ NGRAPH_TEST(${BACKEND_NAME}, onnx_model_skip_layer_normalization_dynamic_shapes) "onnx/com.microsoft/skip_layer_normalization_dynamic_shapes.onnx")); std::vector input = { - 0.54881352, 0.71518934, 0.60276335, 0.54488319, 0.42365479, 0.64589411, 0.43758720, 0.89177299, - 0.96366274, 0.38344151, 0.79172504, 0.52889490, 0.56804454, 0.92559665, 0.07103606, 0.08712930, - 0.02021840, 0.83261985, 0.77815676, 0.87001216, 0.97861832, 0.79915857, 0.46147937, 0.78052920, + 0.54881352f, 0.71518934f, 0.60276335f, 0.54488319f, 0.42365479f, 0.64589411f, 0.43758720f, 0.89177299f, + 0.96366274f, 0.38344151f, 0.79172504f, 0.52889490f, 0.56804454f, 0.92559665f, 0.07103606f, 0.08712930f, + 0.02021840f, 0.83261985f, 0.77815676f, 0.87001216f, 0.97861832f, 0.79915857f, 0.46147937f, 0.78052920f, }; std::vector skip = { - 0.11827443, 0.63992101, 0.14335328, 0.94466889, 0.52184832, 0.41466194, 0.26455560, 0.77423370, - 0.45615032, 0.56843394, 0.01878980, 0.61763549, 0.61209571, 0.61693400, 0.94374806, 0.68182027, - 0.35950789, 0.43703195, 0.69763118, 0.06022547, 0.66676670, 0.67063785, 0.21038257, 0.12892629, + 0.11827443f, 0.63992101f, 0.14335328f, 0.94466889f, 0.52184832f, 0.41466194f, 0.26455560f, 0.77423370f, + 0.45615032f, 0.56843394f, 0.01878980f, 0.61763549f, 0.61209571f, 0.61693400f, 0.94374806f, 0.68182027f, + 0.35950789f, 0.43703195f, 0.69763118f, 0.06022547f, 0.66676670f, 0.67063785f, 0.21038257f, 0.12892629f, }; std::vector gamma = { - 0.31542835, - 0.36371076, - 0.57019675, - 0.43860152, + 0.31542835f, + 0.36371076f, + 0.57019675f, + 0.43860152f, }; std::vector beta = { - 0.98837382, - 0.10204481, - 0.20887676, - 0.16130951, + 0.98837382f, + 0.10204481f, + 0.20887676f, + 0.16130951f, }; std::vector bias = { - 0.65310830, - 0.25329161, - 0.46631077, - 0.24442559, + 0.65310830f, + 0.25329161f, + 0.46631077f, + 0.24442559f, }; std::vector expected = { - 0.76600611, 0.34308332, -0.48470584, 0.71335256, 1.10028172, -0.13354334, -0.45232186, 0.79840088, - 1.52454257, -0.19450217, -0.13759643, 0.03988872, 1.27861762, 0.39529073, 0.12247884, -0.52944231, - 0.64228040, 0.21059875, 1.05966032, -0.14278713, 1.46366918, 0.21215858, -0.31640187, -0.22832340, + 0.76600611f, 0.34308332f, -0.48470584f, 0.71335256f, 1.10028172f, -0.13354334f, -0.45232186f, 0.79840088f, + 1.52454257f, -0.19450217f, -0.13759643f, 0.03988872f, 1.27861762f, 0.39529073f, 0.12247884f, -0.52944231f, + 0.64228040f, 0.21059875f, 1.05966032f, -0.14278713f, 1.46366918f, 0.21215858f, -0.31640187f, -0.22832340f, }; auto test_case = test::TestCase(function, s_device); @@ -193,21 +201,21 @@ NGRAPH_TEST(${BACKEND_NAME}, onnx_model_embed_layer_normalization) { 8, 1, 5, 9, 8, 9, 4, 3, 0, 3, 5, 0, 2, 3, 8, 1, 3, 3, 3, 7, 0, 1, 9, 9, }; std::vector expected_output = { - -0.06615843, -0.18040463, 0.02199928, 0.01868065, 0.05397778, -0.11761580, -0.09138932, -0.02506775, - -0.02368510, -0.10373901, -0.05551499, -0.20972314, 0.01365213, 0.01132561, -0.08603337, -0.08906764, - 0.09692993, -0.04444099, -0.02037602, -0.03453060, -0.10214549, -0.13331436, -0.02665862, -0.01228805, - -0.14232540, -0.07032782, 0.05511986, -0.00120272, -0.04875736, -0.13051267, -0.05709254, 0.17854357, - -0.01759873, -0.01819968, 0.07573269, 0.00557164, 0.06232717, 0.00530490, -0.01565807, -0.14841977, - -0.02299280, 0.02038561, -0.00049481, 0.02575402, 0.10081697, -0.12517214, -0.09316762, -0.00974943, - -0.03093284, -0.06309240, -0.05551499, -0.20972314, 0.01365213, 0.01132561, -0.08603337, -0.06176658, - 0.08304203, -0.05025182, 0.00383657, -0.02288112, -0.11407227, -0.01386134, -0.04411830, -0.00537948, - 0.00164397, -0.03739140, 0.09941526, 0.00333974, -0.04251949, -0.12992151, -0.09509478, -0.11811313, - -0.03307065, -0.00866115, -0.15162414, 0.01106802, 0.06037656, 0.00035292, -0.00223284, -0.11215645, - -0.01390734, 0.07064321, 0.04028325, -0.00290875, 0.12875907, -0.12517214, -0.09316762, -0.00974943, - -0.03093284, -0.06309240, -0.08723789, 0.03130914, 0.03131931, -0.01526242, 0.20811458, -0.05696163, - 0.16304255, -0.02407495, -0.02955675, -0.03086288, -0.08130091, -0.05001551, -0.04875683, 0.00143666, - -0.12153473, -0.00018507, 0.10957482, -0.00416618, -0.01612359, -0.11605026, -0.08593204, 0.09055272, - -0.03054028, -0.03603891, -0.08479506, -0.00034568, 0.03713699, 0.00163411, -0.01738501, -0.18267182, + -0.06615843f, -0.18040463f, 0.02199928f, 0.01868065f, 0.05397778f, -0.11761580f, -0.09138932f, -0.02506775f, + -0.02368510f, -0.10373901f, -0.05551499f, -0.20972314f, 0.01365213f, 0.01132561f, -0.08603337f, -0.08906764f, + 0.09692993f, -0.04444099f, -0.02037602f, -0.03453060f, -0.10214549f, -0.13331436f, -0.02665862f, -0.01228805f, + -0.14232540f, -0.07032782f, 0.05511986f, -0.00120272f, -0.04875736f, -0.13051267f, -0.05709254f, 0.17854357f, + -0.01759873f, -0.01819968f, 0.07573269f, 0.00557164f, 0.06232717f, 0.00530490f, -0.01565807f, -0.14841977f, + -0.02299280f, 0.02038561f, -0.00049481f, 0.02575402f, 0.10081697f, -0.12517214f, -0.09316762f, -0.00974943f, + -0.03093284f, -0.06309240f, -0.05551499f, -0.20972314f, 0.01365213f, 0.01132561f, -0.08603337f, -0.06176658f, + 0.08304203f, -0.05025182f, 0.00383657f, -0.02288112f, -0.11407227f, -0.01386134f, -0.04411830f, -0.00537948f, + 0.00164397f, -0.03739140f, 0.09941526f, 0.00333974f, -0.04251949f, -0.12992151f, -0.09509478f, -0.11811313f, + -0.03307065f, -0.00866115f, -0.15162414f, 0.01106802f, 0.06037656f, 0.00035292f, -0.00223284f, -0.11215645f, + -0.01390734f, 0.07064321f, 0.04028325f, -0.00290875f, 0.12875907f, -0.12517214f, -0.09316762f, -0.00974943f, + -0.03093284f, -0.06309240f, -0.08723789f, 0.03130914f, 0.03131931f, -0.01526242f, 0.20811458f, -0.05696163f, + 0.16304255f, -0.02407495f, -0.02955675f, -0.03086288f, -0.08130091f, -0.05001551f, -0.04875683f, 0.00143666f, + -0.12153473f, -0.00018507f, 0.10957482f, -0.00416618f, -0.01612359f, -0.11605026f, -0.08593204f, 0.09055272f, + -0.03054028f, -0.03603891f, -0.08479506f, -0.00034568f, 0.03713699f, 0.00163411f, -0.01738501f, -0.18267182f, }; auto test_case = test::TestCase(function, s_device); @@ -229,21 +237,21 @@ NGRAPH_TEST(${BACKEND_NAME}, onnx_model_embed_layer_normalization_with_segment_e 0, 2, 0, 2, 2, 0, 2, 0, 0, 0, 1, 1, 2, 0, 0, 1, 0, 1, 2, 2, 0, 1, 1, 1, }; std::vector expected_output = { - -0.06044213, -0.14845914, 0.02457689, 0.02091519, 0.09514004, -0.10280035, -0.02087995, -0.03323204, - -0.02967127, -0.13447416, -0.05191760, -0.16518904, 0.02340531, 0.02176395, 0.04972410, -0.07360736, - 0.12192874, -0.04081530, -0.02338044, -0.05671440, -0.09475864, -0.08944942, -0.03362993, -0.01683486, - -0.16770349, -0.07382569, 0.06230322, 0.02215859, -0.05212611, -0.03934773, -0.04748865, 0.18134241, - -0.01965741, -0.02202452, 0.01973994, 0.01575558, 0.04300199, 0.01436110, -0.00198062, -0.09065692, - -0.02923042, -0.00748686, 0.00717049, 0.02638642, 0.12174864, -0.12973398, -0.11872391, -0.00549398, - -0.02386289, -0.02210563, -0.03590920, -0.13728066, -0.01337939, 0.01538021, -0.14687485, -0.05033565, - 0.03818212, -0.04939338, 0.00961064, -0.07407621, -0.09624685, 0.05594898, -0.04948713, -0.01305631, - -0.03779668, -0.01469170, 0.12346989, 0.02082030, -0.03449103, -0.06029151, -0.09300473, -0.16308543, - -0.02370042, 0.01066893, -0.06523034, 0.00497636, 0.01933458, -0.00900802, 0.00430878, -0.13999483, - -0.02377289, 0.01760014, 0.03896973, 0.00831112, 0.15634246, -0.11109130, -0.11997811, -0.02304414, - -0.01989413, -0.12763791, -0.05698400, 0.17125534, 0.00499324, -0.02953288, 0.09178342, -0.05001877, - 0.16157132, -0.02312993, -0.02932195, -0.04914058, -0.07994118, -0.07199102, -0.04517454, 0.01249476, - -0.07525793, -0.00207180, 0.03993115, -0.01676321, -0.00214832, -0.16074482, -0.05012497, -0.00552153, - -0.04302063, -0.00549224, -0.18399858, -0.00767871, -0.02209404, -0.01383207, -0.00082931, -0.19533031, + -0.06044213f, -0.14845914f, 0.02457689f, 0.02091519f, 0.09514004f, -0.10280035f, -0.02087995f, -0.03323204f, + -0.02967127f, -0.13447416f, -0.05191760f, -0.16518904f, 0.02340531f, 0.02176395f, 0.04972410f, -0.07360736f, + 0.12192874f, -0.04081530f, -0.02338044f, -0.05671440f, -0.09475864f, -0.08944942f, -0.03362993f, -0.01683486f, + -0.16770349f, -0.07382569f, 0.06230322f, 0.02215859f, -0.05212611f, -0.03934773f, -0.04748865f, 0.18134241f, + -0.01965741f, -0.02202452f, 0.01973994f, 0.01575558f, 0.04300199f, 0.01436110f, -0.00198062f, -0.09065692f, + -0.02923042f, -0.00748686f, 0.00717049f, 0.02638642f, 0.12174864f, -0.12973398f, -0.11872391f, -0.00549398f, + -0.02386289f, -0.02210563f, -0.03590920f, -0.13728066f, -0.01337939f, 0.01538021f, -0.14687485f, -0.05033565f, + 0.03818212f, -0.04939338f, 0.00961064f, -0.07407621f, -0.09624685f, 0.05594898f, -0.04948713f, -0.01305631f, + -0.03779668f, -0.01469170f, 0.12346989f, 0.02082030f, -0.03449103f, -0.06029151f, -0.09300473f, -0.16308543f, + -0.02370042f, 0.01066893f, -0.06523034f, 0.00497636f, 0.01933458f, -0.00900802f, 0.00430878f, -0.13999483f, + -0.02377289f, 0.01760014f, 0.03896973f, 0.00831112f, 0.15634246f, -0.11109130f, -0.11997811f, -0.02304414f, + -0.01989413f, -0.12763791f, -0.05698400f, 0.17125534f, 0.00499324f, -0.02953288f, 0.09178342f, -0.05001877f, + 0.16157132f, -0.02312993f, -0.02932195f, -0.04914058f, -0.07994118f, -0.07199102f, -0.04517454f, 0.01249476f, + -0.07525793f, -0.00207180f, 0.03993115f, -0.01676321f, -0.00214832f, -0.16074482f, -0.05012497f, -0.00552153f, + -0.04302063f, -0.00549224f, -0.18399858f, -0.00767871f, -0.02209404f, -0.01383207f, -0.00082931f, -0.19533031f, }; std::vector expected_mask_index = { @@ -257,7 +265,7 @@ NGRAPH_TEST(${BACKEND_NAME}, onnx_model_embed_layer_normalization_with_segment_e test_case.add_input(segment_ids); test_case.add_expected_output(expected_output); test_case.add_expected_output(expected_mask_index); - test_case.run_with_tolerance_as_fp(1e-7); + test_case.run_with_tolerance_as_fp(1e-7f); } NGRAPH_TEST(${BACKEND_NAME}, onnx_model_embed_layer_normalization_with_segment_embedding_and_mask) { @@ -276,21 +284,21 @@ NGRAPH_TEST(${BACKEND_NAME}, onnx_model_embed_layer_normalization_with_segment_e 1, 1, 0, 1, 1, 0, 0, 1, 0, 0, 1, 1, 0, 1, 0, 0, 1, 0, 0, 0, 1, 1, 0, 1, }; std::vector expected_output = { - -0.06044213, -0.14845914, 0.02457689, 0.02091519, 0.09514004, -0.10280035, -0.02087995, -0.03323204, - -0.02967127, -0.13447416, -0.05191760, -0.16518904, 0.02340531, 0.02176395, 0.04972410, -0.07360736, - 0.12192874, -0.04081530, -0.02338044, -0.05671440, -0.09475864, -0.08944942, -0.03362993, -0.01683486, - -0.16770349, -0.07382569, 0.06230322, 0.02215859, -0.05212611, -0.03934773, -0.04748865, 0.18134241, - -0.01965741, -0.02202452, 0.01973994, 0.01575558, 0.04300199, 0.01436110, -0.00198062, -0.09065692, - -0.02923042, -0.00748686, 0.00717049, 0.02638642, 0.12174864, -0.12973398, -0.11872391, -0.00549398, - -0.02386289, -0.02210563, -0.03590920, -0.13728066, -0.01337939, 0.01538021, -0.14687485, -0.05033565, - 0.03818212, -0.04939338, 0.00961064, -0.07407621, -0.09624685, 0.05594898, -0.04948713, -0.01305631, - -0.03779668, -0.01469170, 0.12346989, 0.02082030, -0.03449103, -0.06029151, -0.09300473, -0.16308543, - -0.02370042, 0.01066893, -0.06523034, 0.00497636, 0.01933458, -0.00900802, 0.00430878, -0.13999483, - -0.02377289, 0.01760014, 0.03896973, 0.00831112, 0.15634246, -0.11109130, -0.11997811, -0.02304414, - -0.01989413, -0.12763791, -0.05698400, 0.17125534, 0.00499324, -0.02953288, 0.09178342, -0.05001877, - 0.16157132, -0.02312993, -0.02932195, -0.04914058, -0.07994118, -0.07199102, -0.04517454, 0.01249476, - -0.07525793, -0.00207180, 0.03993115, -0.01676321, -0.00214832, -0.16074482, -0.05012497, -0.00552153, - -0.04302063, -0.00549224, -0.18399858, -0.00767871, -0.02209404, -0.01383207, -0.00082931, -0.19533031, + -0.06044213f, -0.14845914f, 0.02457689f, 0.02091519f, 0.09514004f, -0.10280035f, -0.02087995f, -0.03323204f, + -0.02967127f, -0.13447416f, -0.05191760f, -0.16518904f, 0.02340531f, 0.02176395f, 0.04972410f, -0.07360736f, + 0.12192874f, -0.04081530f, -0.02338044f, -0.05671440f, -0.09475864f, -0.08944942f, -0.03362993f, -0.01683486f, + -0.16770349f, -0.07382569f, 0.06230322f, 0.02215859f, -0.05212611f, -0.03934773f, -0.04748865f, 0.18134241f, + -0.01965741f, -0.02202452f, 0.01973994f, 0.01575558f, 0.04300199f, 0.01436110f, -0.00198062f, -0.09065692f, + -0.02923042f, -0.00748686f, 0.00717049f, 0.02638642f, 0.12174864f, -0.12973398f, -0.11872391f, -0.00549398f, + -0.02386289f, -0.02210563f, -0.03590920f, -0.13728066f, -0.01337939f, 0.01538021f, -0.14687485f, -0.05033565f, + 0.03818212f, -0.04939338f, 0.00961064f, -0.07407621f, -0.09624685f, 0.05594898f, -0.04948713f, -0.01305631f, + -0.03779668f, -0.01469170f, 0.12346989f, 0.02082030f, -0.03449103f, -0.06029151f, -0.09300473f, -0.16308543f, + -0.02370042f, 0.01066893f, -0.06523034f, 0.00497636f, 0.01933458f, -0.00900802f, 0.00430878f, -0.13999483f, + -0.02377289f, 0.01760014f, 0.03896973f, 0.00831112f, 0.15634246f, -0.11109130f, -0.11997811f, -0.02304414f, + -0.01989413f, -0.12763791f, -0.05698400f, 0.17125534f, 0.00499324f, -0.02953288f, 0.09178342f, -0.05001877f, + 0.16157132f, -0.02312993f, -0.02932195f, -0.04914058f, -0.07994118f, -0.07199102f, -0.04517454f, 0.01249476f, + -0.07525793f, -0.00207180f, 0.03993115f, -0.01676321f, -0.00214832f, -0.16074482f, -0.05012497f, -0.00552153f, + -0.04302063f, -0.00549224f, -0.18399858f, -0.00767871f, -0.02209404f, -0.01383207f, -0.00082931f, -0.19533031f, }; std::vector expected_mask_index = { 5, @@ -304,7 +312,7 @@ NGRAPH_TEST(${BACKEND_NAME}, onnx_model_embed_layer_normalization_with_segment_e test_case.add_input(mask); test_case.add_expected_output(expected_output); test_case.add_expected_output(expected_mask_index); - test_case.run_with_tolerance_as_fp(1e-7); + test_case.run_with_tolerance_as_fp(1e-7f); } NGRAPH_TEST(${BACKEND_NAME}, onnx_model_embed_layer_normalization_dynamic_shapes) { @@ -320,70 +328,71 @@ NGRAPH_TEST(${BACKEND_NAME}, onnx_model_embed_layer_normalization_dynamic_shapes 0, 2, 0, 2, 2, 0, 2, 0, 0, 0, 1, 1, 2, 0, 0, 1, 0, 1, 2, 2, 0, 1, 1, 1, }; std::vector word_embeddings = { - 0.96980906, 0.65314001, 0.17090958, 0.35815218, 0.75068617, 0.60783064, 0.32504722, 0.03842543, 0.63427407, - 0.95894927, 0.65279031, 0.63505888, 0.99529958, 0.58185035, 0.41436860, 0.47469750, 0.62351012, 0.33800763, - 0.67475230, 0.31720173, 0.77834547, 0.94957107, 0.66252685, 0.01357164, 0.62284607, 0.67365962, 0.97194499, - 0.87819350, 0.50962436, 0.05571469, 0.45115921, 0.01998767, 0.44171092, 0.97958672, 0.35944447, 0.48089352, - 0.68866116, 0.88047588, 0.91823548, 0.21682213, 0.56518888, 0.86510259, 0.50896895, 0.91672295, 0.92115760, - 0.08311249, 0.27771857, 0.00935670, 0.84234208, 0.64717412, + 0.96980906f, 0.65314001f, 0.17090958f, 0.35815218f, 0.75068617f, 0.60783064f, 0.32504722f, 0.03842543f, + 0.63427407f, 0.95894927f, 0.65279031f, 0.63505888f, 0.99529958f, 0.58185035f, 0.41436860f, 0.47469750f, + 0.62351012f, 0.33800763f, 0.67475230f, 0.31720173f, 0.77834547f, 0.94957107f, 0.66252685f, 0.01357164f, + 0.62284607f, 0.67365962f, 0.97194499f, 0.87819350f, 0.50962436f, 0.05571469f, 0.45115921f, 0.01998767f, + 0.44171092f, 0.97958672f, 0.35944447f, 0.48089352f, 0.68866116f, 0.88047588f, 0.91823548f, 0.21682213f, + 0.56518888f, 0.86510259f, 0.50896895f, 0.91672295f, 0.92115760f, 0.08311249f, 0.27771857f, 0.00935670f, + 0.84234208f, 0.64717412f, }; std::vector position_embeddings = { - 0.84138614, 0.26473016, 0.39782074, 0.55282146, 0.16494046, 0.36980811, 0.14644176, 0.56961840, - 0.70373726, 0.28847644, 0.43328807, 0.75610667, 0.39609829, 0.89603841, 0.63892108, 0.89155442, - 0.68005556, 0.44919774, 0.97857094, 0.11620191, 0.76702368, 0.41182014, 0.67543906, 0.24979627, - 0.31321833, 0.96541619, 0.58846509, 0.65966839, 0.53320622, 0.23053302, 0.39486930, 0.61880857, - 0.47486752, 0.47013220, 0.71607453, 0.28799102, 0.38346222, 0.74916983, 0.87845218, 0.10286336, + 0.84138614f, 0.26473016f, 0.39782074f, 0.55282146f, 0.16494046f, 0.36980811f, 0.14644176f, 0.56961840f, + 0.70373726f, 0.28847644f, 0.43328807f, 0.75610667f, 0.39609829f, 0.89603841f, 0.63892108f, 0.89155442f, + 0.68005556f, 0.44919774f, 0.97857094f, 0.11620191f, 0.76702368f, 0.41182014f, 0.67543906f, 0.24979627f, + 0.31321833f, 0.96541619f, 0.58846509f, 0.65966839f, 0.53320622f, 0.23053302f, 0.39486930f, 0.61880857f, + 0.47486752f, 0.47013220f, 0.71607453f, 0.28799102f, 0.38346222f, 0.74916983f, 0.87845218f, 0.10286336f, }; std::vector segment_embeddings = { - 0.09237389, - 0.35404667, - 0.55181628, - 0.03362509, - 0.96896178, - 0.32099724, - 0.22126268, - 0.14126390, - 0.09725992, - 0.98404223, - 0.26034093, - 0.53702253, - 0.44792616, - 0.09956909, - 0.35231167, + 0.09237389f, + 0.35404667f, + 0.55181628f, + 0.03362509f, + 0.96896178f, + 0.32099724f, + 0.22126268f, + 0.14126390f, + 0.09725992f, + 0.98404223f, + 0.26034093f, + 0.53702253f, + 0.44792616f, + 0.09956909f, + 0.35231167f, }; std::vector gamma = { - 0.46924916, - 0.84114015, - 0.90464777, - 0.03755938, - 0.50831544, + 0.46924916f, + 0.84114015f, + 0.90464777f, + 0.03755938f, + 0.50831544f, }; std::vector beta = { - 0.16684751, - 0.77905101, - 0.86493331, - 0.41139671, - 0.13997258, + 0.16684751f, + 0.77905101f, + 0.86493331f, + 0.41139671f, + 0.13997258f, }; std::vector mask = { 1, 1, 1, 1, 1, 1, 0, 0, 0, 0, 1, 1, 1, 0, 1, 1, 1, 1, 1, 1, 1, 0, 0, 0, }; std::vector expected_output = { - -0.04089922, 0.35108989, 0.30442458, 0.39546335, 1.15422225, 0.10419128, -0.19301927, 0.01070970, - 0.43977541, 0.89119899, -0.51436460, 1.99256825, 1.41077507, 0.38642293, 0.17583044, 0.03320138, - 1.16508031, -0.24356931, 0.47440714, -0.17844005, 0.20463173, 1.90038323, 1.14138567, 0.34504607, - 0.16403235, -0.24976699, 0.29362509, 0.34502214, 0.41751838, 1.09390712, 0.12354189, 1.83025289, - 1.05569196, 0.34413773, 0.35469764, -0.69760042, 0.76338542, 1.75443077, 0.44126555, 0.18181801, - 0.73277575, 0.45443264, 0.17068321, 0.36591727, 0.72869974, -0.56090516, 0.14415455, 1.47314119, - 0.42908576, 0.73084539, -0.22373237, 2.26550221, 0.05606699, 0.39417523, 0.35234636, 0.78569502, - 0.77521765, -0.65131050, 0.40168875, 0.45527256, 0.38715565, 0.98521245, 2.21446753, 0.36345237, - -0.33269632, 0.36558092, 1.36846578, 1.37523413, 0.33698002, 0.28889543, -0.40639281, 1.01643157, - 0.59668219, 0.39197800, 1.03101778, 0.02551098, -0.03612846, -0.01371557, 0.43444607, 0.96746695, - 0.60583955, -0.10362893, 0.40574494, 0.38046724, 0.87445319, -0.00880148, -0.15437943, 0.08118075, - 0.44650543, 0.85956848, -0.27865338, 2.10837507, 0.04798460, 0.43948367, -0.10185169, 0.19978794, - 1.32323360, 1.20525467, 0.44288942, -0.84200430, 0.52563053, 0.69949460, 0.73987913, 0.34668452, - 0.74545687, 0.57696682, 0.22452033, -0.27099937, 0.39649010, 0.87083614, -0.18965788, 0.58206403, - -0.08108193, 0.42067638, 1.05117214, -0.34287399, 0.20424896, 0.27994895, 0.46011117, 0.70890665, + -0.04089922f, 0.35108989f, 0.30442458f, 0.39546335f, 1.15422225f, 0.10419128f, -0.19301927f, 0.01070970f, + 0.43977541f, 0.89119899f, -0.51436460f, 1.99256825f, 1.41077507f, 0.38642293f, 0.17583044f, 0.03320138f, + 1.16508031f, -0.24356931f, 0.47440714f, -0.17844005f, 0.20463173f, 1.90038323f, 1.14138567f, 0.34504607f, + 0.16403235f, -0.24976699f, 0.29362509f, 0.34502214f, 0.41751838f, 1.09390712f, 0.12354189f, 1.83025289f, + 1.05569196f, 0.34413773f, 0.35469764f, -0.69760042f, 0.76338542f, 1.75443077f, 0.44126555f, 0.18181801f, + 0.73277575f, 0.45443264f, 0.17068321f, 0.36591727f, 0.72869974f, -0.56090516f, 0.14415455f, 1.47314119f, + 0.42908576f, 0.73084539f, -0.22373237f, 2.26550221f, 0.05606699f, 0.39417523f, 0.35234636f, 0.78569502f, + 0.77521765f, -0.65131050f, 0.40168875f, 0.45527256f, 0.38715565f, 0.98521245f, 2.21446753f, 0.36345237f, + -0.33269632f, 0.36558092f, 1.36846578f, 1.37523413f, 0.33698002f, 0.28889543f, -0.40639281f, 1.01643157f, + 0.59668219f, 0.39197800f, 1.03101778f, 0.02551098f, -0.03612846f, -0.01371557f, 0.43444607f, 0.96746695f, + 0.60583955f, -0.10362893f, 0.40574494f, 0.38046724f, 0.87445319f, -0.00880148f, -0.15437943f, 0.08118075f, + 0.44650543f, 0.85956848f, -0.27865338f, 2.10837507f, 0.04798460f, 0.43948367f, -0.10185169f, 0.19978794f, + 1.32323360f, 1.20525467f, 0.44288942f, -0.84200430f, 0.52563053f, 0.69949460f, 0.73987913f, 0.34668452f, + 0.74545687f, 0.57696682f, 0.22452033f, -0.27099937f, 0.39649010f, 0.87083614f, -0.18965788f, 0.58206403f, + -0.08108193f, 0.42067638f, 1.05117214f, -0.34287399f, 0.20424896f, 0.27994895f, 0.46011117f, 0.70890665f, }; std::vector expected_mask_index = { 6, @@ -402,7 +411,7 @@ NGRAPH_TEST(${BACKEND_NAME}, onnx_model_embed_layer_normalization_dynamic_shapes test_case.add_input(Shape{3, 8}, mask); test_case.add_expected_output(Shape{3, 8, 5}, expected_output); test_case.add_expected_output(Shape{3}, expected_mask_index); - test_case.run_with_tolerance_as_fp(1e-6); + test_case.run_with_tolerance_as_fp(1e-6f); } NGRAPH_TEST(${BACKEND_NAME}, onnx_model_embed_layer_normalization_diff_seq_len_pos_embed_len) { @@ -418,70 +427,72 @@ NGRAPH_TEST(${BACKEND_NAME}, onnx_model_embed_layer_normalization_diff_seq_len_p 0, 2, 0, 2, 2, 0, 2, 0, 0, 0, 1, 1, 2, 0, 0, 1, 0, 1, 2, 2, 0, 1, 1, 1, }; std::vector word_embeddings = { - 0.96980906, 0.65314001, 0.17090958, 0.35815218, 0.75068617, 0.60783064, 0.32504722, 0.03842543, 0.63427407, - 0.95894927, 0.65279031, 0.63505888, 0.99529958, 0.58185035, 0.41436860, 0.47469750, 0.62351012, 0.33800763, - 0.67475230, 0.31720173, 0.77834547, 0.94957107, 0.66252685, 0.01357164, 0.62284607, 0.67365962, 0.97194499, - 0.87819350, 0.50962436, 0.05571469, 0.45115921, 0.01998767, 0.44171092, 0.97958672, 0.35944447, 0.48089352, - 0.68866116, 0.88047588, 0.91823548, 0.21682213, 0.56518888, 0.86510259, 0.50896895, 0.91672295, 0.92115760, - 0.08311249, 0.27771857, 0.00935670, 0.84234208, 0.64717412, + 0.96980906f, 0.65314001f, 0.17090958f, 0.35815218f, 0.75068617f, 0.60783064f, 0.32504722f, 0.03842543f, + 0.63427407f, 0.95894927f, 0.65279031f, 0.63505888f, 0.99529958f, 0.58185035f, 0.41436860f, 0.47469750f, + 0.62351012f, 0.33800763f, 0.67475230f, 0.31720173f, 0.77834547f, 0.94957107f, 0.66252685f, 0.01357164f, + 0.62284607f, 0.67365962f, 0.97194499f, 0.87819350f, 0.50962436f, 0.05571469f, 0.45115921f, 0.01998767f, + 0.44171092f, 0.97958672f, 0.35944447f, 0.48089352f, 0.68866116f, 0.88047588f, 0.91823548f, 0.21682213f, + 0.56518888f, 0.86510259f, 0.50896895f, 0.91672295f, 0.92115760f, 0.08311249f, 0.27771857f, 0.00935670f, + 0.84234208f, 0.64717412f, }; std::vector position_embeddings = { - 0.84138614, 0.26473016, 0.39782074, 0.55282146, 0.16494046, 0.36980811, 0.14644176, 0.56961840, 0.70373726, - 0.28847644, 0.43328807, 0.75610667, 0.39609829, 0.89603841, 0.63892108, 0.89155442, 0.68005556, 0.44919774, - 0.97857094, 0.11620191, 0.76702368, 0.41182014, 0.67543906, 0.24979627, 0.31321833, 0.96541619, 0.58846509, - 0.65966839, 0.53320622, 0.23053302, 0.39486930, 0.61880857, 0.47486752, 0.47013220, 0.71607453, 0.28799102, - 0.38346222, 0.74916983, 0.87845218, 0.10286336, 0.84138614, 0.26473016, 0.39782074, 0.55282146, 0.16494046, + 0.84138614f, 0.26473016f, 0.39782074f, 0.55282146f, 0.16494046f, 0.36980811f, 0.14644176f, 0.56961840f, + 0.70373726f, 0.28847644f, 0.43328807f, 0.75610667f, 0.39609829f, 0.89603841f, 0.63892108f, 0.89155442f, + 0.68005556f, 0.44919774f, 0.97857094f, 0.11620191f, 0.76702368f, 0.41182014f, 0.67543906f, 0.24979627f, + 0.31321833f, 0.96541619f, 0.58846509f, 0.65966839f, 0.53320622f, 0.23053302f, 0.39486930f, 0.61880857f, + 0.47486752f, 0.47013220f, 0.71607453f, 0.28799102f, 0.38346222f, 0.74916983f, 0.87845218f, 0.10286336f, + 0.84138614f, 0.26473016f, 0.39782074f, 0.55282146f, 0.16494046f, }; std::vector segment_embeddings = { - 0.09237389, - 0.35404667, - 0.55181628, - 0.03362509, - 0.96896178, - 0.32099724, - 0.22126268, - 0.14126390, - 0.09725992, - 0.98404223, - 0.26034093, - 0.53702253, - 0.44792616, - 0.09956909, - 0.35231167, + 0.09237389f, + 0.35404667f, + 0.55181628f, + 0.03362509f, + 0.96896178f, + 0.32099724f, + 0.22126268f, + 0.14126390f, + 0.09725992f, + 0.98404223f, + 0.26034093f, + 0.53702253f, + 0.44792616f, + 0.09956909f, + 0.35231167f, }; std::vector gamma = { - 0.46924916, - 0.84114015, - 0.90464777, - 0.03755938, - 0.50831544, + 0.46924916f, + 0.84114015f, + 0.90464777f, + 0.03755938f, + 0.50831544f, }; std::vector beta = { - 0.16684751, - 0.77905101, - 0.86493331, - 0.41139671, - 0.13997258, + 0.16684751f, + 0.77905101f, + 0.86493331f, + 0.41139671f, + 0.13997258f, }; std::vector mask = { 1, 1, 1, 1, 1, 1, 0, 0, 0, 0, 1, 1, 1, 0, 1, 1, 1, 1, 1, 1, 1, 0, 0, 0, }; std::vector expected_output = { - -0.04089922, 0.35108989, 0.30442458, 0.39546335, 1.15422225, 0.10419128, -0.19301927, 0.01070970, - 0.43977541, 0.89119899, -0.51436460, 1.99256825, 1.41077507, 0.38642293, 0.17583044, 0.03320138, - 1.16508031, -0.24356931, 0.47440714, -0.17844005, 0.20463173, 1.90038323, 1.14138567, 0.34504607, - 0.16403235, -0.24976699, 0.29362509, 0.34502214, 0.41751838, 1.09390712, 0.12354189, 1.83025289, - 1.05569196, 0.34413773, 0.35469764, -0.69760042, 0.76338542, 1.75443077, 0.44126555, 0.18181801, - 0.73277575, 0.45443264, 0.17068321, 0.36591727, 0.72869974, -0.56090516, 0.14415455, 1.47314119, - 0.42908576, 0.73084539, -0.22373237, 2.26550221, 0.05606699, 0.39417523, 0.35234636, 0.78569502, - 0.77521765, -0.65131050, 0.40168875, 0.45527256, 0.38715565, 0.98521245, 2.21446753, 0.36345237, - -0.33269632, 0.36558092, 1.36846578, 1.37523413, 0.33698002, 0.28889543, -0.40639281, 1.01643157, - 0.59668219, 0.39197800, 1.03101778, 0.02551098, -0.03612846, -0.01371557, 0.43444607, 0.96746695, - 0.60583955, -0.10362893, 0.40574494, 0.38046724, 0.87445319, -0.00880148, -0.15437943, 0.08118075, - 0.44650543, 0.85956848, -0.27865338, 2.10837507, 0.04798460, 0.43948367, -0.10185169, 0.19978794, - 1.32323360, 1.20525467, 0.44288942, -0.84200430, 0.52563053, 0.69949460, 0.73987913, 0.34668452, - 0.74545687, 0.57696682, 0.22452033, -0.27099937, 0.39649010, 0.87083614, -0.18965788, 0.58206403, - -0.08108193, 0.42067638, 1.05117214, -0.34287399, 0.20424896, 0.27994895, 0.46011117, 0.70890665, + -0.04089922f, 0.35108989f, 0.30442458f, 0.39546335f, 1.15422225f, 0.10419128f, -0.19301927f, 0.01070970f, + 0.43977541f, 0.89119899f, -0.51436460f, 1.99256825f, 1.41077507f, 0.38642293f, 0.17583044f, 0.03320138f, + 1.16508031f, -0.24356931f, 0.47440714f, -0.17844005f, 0.20463173f, 1.90038323f, 1.14138567f, 0.34504607f, + 0.16403235f, -0.24976699f, 0.29362509f, 0.34502214f, 0.41751838f, 1.09390712f, 0.12354189f, 1.83025289f, + 1.05569196f, 0.34413773f, 0.35469764f, -0.69760042f, 0.76338542f, 1.75443077f, 0.44126555f, 0.18181801f, + 0.73277575f, 0.45443264f, 0.17068321f, 0.36591727f, 0.72869974f, -0.56090516f, 0.14415455f, 1.47314119f, + 0.42908576f, 0.73084539f, -0.22373237f, 2.26550221f, 0.05606699f, 0.39417523f, 0.35234636f, 0.78569502f, + 0.77521765f, -0.65131050f, 0.40168875f, 0.45527256f, 0.38715565f, 0.98521245f, 2.21446753f, 0.36345237f, + -0.33269632f, 0.36558092f, 1.36846578f, 1.37523413f, 0.33698002f, 0.28889543f, -0.40639281f, 1.01643157f, + 0.59668219f, 0.39197800f, 1.03101778f, 0.02551098f, -0.03612846f, -0.01371557f, 0.43444607f, 0.96746695f, + 0.60583955f, -0.10362893f, 0.40574494f, 0.38046724f, 0.87445319f, -0.00880148f, -0.15437943f, 0.08118075f, + 0.44650543f, 0.85956848f, -0.27865338f, 2.10837507f, 0.04798460f, 0.43948367f, -0.10185169f, 0.19978794f, + 1.32323360f, 1.20525467f, 0.44288942f, -0.84200430f, 0.52563053f, 0.69949460f, 0.73987913f, 0.34668452f, + 0.74545687f, 0.57696682f, 0.22452033f, -0.27099937f, 0.39649010f, 0.87083614f, -0.18965788f, 0.58206403f, + -0.08108193f, 0.42067638f, 1.05117214f, -0.34287399f, 0.20424896f, 0.27994895f, 0.46011117f, 0.70890665f, }; std::vector expected_mask_index = { 6, @@ -500,7 +511,7 @@ NGRAPH_TEST(${BACKEND_NAME}, onnx_model_embed_layer_normalization_diff_seq_len_p test_case.add_input(Shape{3, 8}, mask); test_case.add_expected_output(Shape{3, 8, 5}, expected_output); test_case.add_expected_output(Shape{3}, expected_mask_index); - test_case.run_with_tolerance_as_fp(1e-6); + test_case.run_with_tolerance_as_fp(1e-6f); } NGRAPH_TEST(${BACKEND_NAME}, onnx_model_embed_layer_normalization_with_position_ids) { @@ -516,50 +527,52 @@ NGRAPH_TEST(${BACKEND_NAME}, onnx_model_embed_layer_normalization_with_position_ 0, 2, 0, 2, 2, 0, 2, 0, 0, 0, 1, 1, 2, 0, 0, 1, 0, 1, 2, 2, 0, 1, 1, 1, }; std::vector word_embeddings = { - 0.96980906, 0.65314001, 0.17090958, 0.35815218, 0.75068617, 0.60783064, 0.32504722, 0.03842543, 0.63427407, - 0.95894927, 0.65279031, 0.63505888, 0.99529958, 0.58185035, 0.41436860, 0.47469750, 0.62351012, 0.33800763, - 0.67475230, 0.31720173, 0.77834547, 0.94957107, 0.66252685, 0.01357164, 0.62284607, 0.67365962, 0.97194499, - 0.87819350, 0.50962436, 0.05571469, 0.45115921, 0.01998767, 0.44171092, 0.97958672, 0.35944447, 0.48089352, - 0.68866116, 0.88047588, 0.91823548, 0.21682213, 0.56518888, 0.86510259, 0.50896895, 0.91672295, 0.92115760, - 0.08311249, 0.27771857, 0.00935670, 0.84234208, 0.64717412, + 0.96980906f, 0.65314001f, 0.17090958f, 0.35815218f, 0.75068617f, 0.60783064f, 0.32504722f, 0.03842543f, + 0.63427407f, 0.95894927f, 0.65279031f, 0.63505888f, 0.99529958f, 0.58185035f, 0.41436860f, 0.47469750f, + 0.62351012f, 0.33800763f, 0.67475230f, 0.31720173f, 0.77834547f, 0.94957107f, 0.66252685f, 0.01357164f, + 0.62284607f, 0.67365962f, 0.97194499f, 0.87819350f, 0.50962436f, 0.05571469f, 0.45115921f, 0.01998767f, + 0.44171092f, 0.97958672f, 0.35944447f, 0.48089352f, 0.68866116f, 0.88047588f, 0.91823548f, 0.21682213f, + 0.56518888f, 0.86510259f, 0.50896895f, 0.91672295f, 0.92115760f, 0.08311249f, 0.27771857f, 0.00935670f, + 0.84234208f, 0.64717412f, }; std::vector position_embeddings = { - 0.84138614, 0.26473016, 0.39782074, 0.55282146, 0.16494046, 0.43328807, 0.75610667, 0.39609829, 0.89603841, - 0.63892108, 0.36980811, 0.14644176, 0.56961840, 0.70373726, 0.28847644, 0.89155442, 0.68005556, 0.44919774, - 0.97857094, 0.11620191, 0.76702368, 0.41182014, 0.67543906, 0.24979627, 0.31321833, 0.28799102, 0.38346222, - 0.74916983, 0.87845218, 0.10286336, 0.96541619, 0.58846509, 0.65966839, 0.53320622, 0.23053302, 0.39486930, - 0.61880857, 0.47486752, 0.47013220, 0.71607453, 0.84138614, 0.26473016, 0.39782074, 0.55282146, 0.16494046, + 0.84138614f, 0.26473016f, 0.39782074f, 0.55282146f, 0.16494046f, 0.43328807f, 0.75610667f, 0.39609829f, + 0.89603841f, 0.63892108f, 0.36980811f, 0.14644176f, 0.56961840f, 0.70373726f, 0.28847644f, 0.89155442f, + 0.68005556f, 0.44919774f, 0.97857094f, 0.11620191f, 0.76702368f, 0.41182014f, 0.67543906f, 0.24979627f, + 0.31321833f, 0.28799102f, 0.38346222f, 0.74916983f, 0.87845218f, 0.10286336f, 0.96541619f, 0.58846509f, + 0.65966839f, 0.53320622f, 0.23053302f, 0.39486930f, 0.61880857f, 0.47486752f, 0.47013220f, 0.71607453f, + 0.84138614f, 0.26473016f, 0.39782074f, 0.55282146f, 0.16494046f, }; std::vector segment_embeddings = { - 0.09237389, - 0.35404667, - 0.55181628, - 0.03362509, - 0.96896178, - 0.32099724, - 0.22126268, - 0.14126390, - 0.09725992, - 0.98404223, - 0.26034093, - 0.53702253, - 0.44792616, - 0.09956909, - 0.35231167, + 0.09237389f, + 0.35404667f, + 0.55181628f, + 0.03362509f, + 0.96896178f, + 0.32099724f, + 0.22126268f, + 0.14126390f, + 0.09725992f, + 0.98404223f, + 0.26034093f, + 0.53702253f, + 0.44792616f, + 0.09956909f, + 0.35231167f, }; std::vector gamma = { - 0.46924916, - 0.84114015, - 0.90464777, - 0.03755938, - 0.50831544, + 0.46924916f, + 0.84114015f, + 0.90464777f, + 0.03755938f, + 0.50831544f, }; std::vector beta = { - 0.16684751, - 0.77905101, - 0.86493331, - 0.41139671, - 0.13997258, + 0.16684751f, + 0.77905101f, + 0.86493331f, + 0.41139671f, + 0.13997258f, }; std::vector mask = { 1, 1, 1, 1, 1, 1, 0, 0, 0, 0, 1, 1, 1, 0, 1, 1, 1, 1, 1, 1, 1, 0, 0, 0, @@ -568,21 +581,21 @@ NGRAPH_TEST(${BACKEND_NAME}, onnx_model_embed_layer_normalization_with_position_ 0, 2, 1, 3, 4, 6, 7, 5, 8, 2, 1, 3, 4, 6, 7, 5, 0, 2, 1, 3, 4, 6, 7, 5, }; std::vector expected_output = { - -0.04089922, 0.35108989, 0.30442458, 0.39546335, 1.15422225, 0.10419128, -0.19301927, 0.01070970, - 0.43977541, 0.89119899, -0.51436460, 1.99256825, 1.41077507, 0.38642293, 0.17583044, 0.03320138, - 1.16508031, -0.24356931, 0.47440714, -0.17844005, 0.20463173, 1.90038323, 1.14138567, 0.34504607, - 0.16403235, -0.24976699, 0.29362509, 0.34502214, 0.41751838, 1.09390712, 0.12354189, 1.83025289, - 1.05569196, 0.34413773, 0.35469764, -0.69760042, 0.76338542, 1.75443077, 0.44126555, 0.18181801, - 0.73277575, 0.45443264, 0.17068321, 0.36591727, 0.72869974, -0.56090516, 0.14415455, 1.47314119, - 0.42908576, 0.73084539, -0.22373237, 2.26550221, 0.05606699, 0.39417523, 0.35234636, 0.78569502, - 0.77521765, -0.65131050, 0.40168875, 0.45527256, 0.38715565, 0.98521245, 2.21446753, 0.36345237, - -0.33269632, 0.36558092, 1.36846578, 1.37523413, 0.33698002, 0.28889543, -0.40639281, 1.01643157, - 0.59668219, 0.39197800, 1.03101778, 0.02551098, -0.03612846, -0.01371557, 0.43444607, 0.96746695, - 0.60583955, -0.10362893, 0.40574494, 0.38046724, 0.87445319, -0.00880148, -0.15437943, 0.08118075, - 0.44650543, 0.85956848, -0.27865338, 2.10837507, 0.04798460, 0.43948367, -0.10185169, 0.19978794, - 1.32323360, 1.20525467, 0.44288942, -0.84200430, 0.52563053, 0.69949460, 0.73987913, 0.34668452, - 0.74545687, 0.57696682, 0.22452033, -0.27099937, 0.39649010, 0.87083614, -0.18965788, 0.58206403, - -0.08108193, 0.42067638, 1.05117214, -0.34287399, 0.20424896, 0.27994895, 0.46011117, 0.70890665, + -0.04089922f, 0.35108989f, 0.30442458f, 0.39546335f, 1.15422225f, 0.10419128f, -0.19301927f, 0.01070970f, + 0.43977541f, 0.89119899f, -0.51436460f, 1.99256825f, 1.41077507f, 0.38642293f, 0.17583044f, 0.03320138f, + 1.16508031f, -0.24356931f, 0.47440714f, -0.17844005f, 0.20463173f, 1.90038323f, 1.14138567f, 0.34504607f, + 0.16403235f, -0.24976699f, 0.29362509f, 0.34502214f, 0.41751838f, 1.09390712f, 0.12354189f, 1.83025289f, + 1.05569196f, 0.34413773f, 0.35469764f, -0.69760042f, 0.76338542f, 1.75443077f, 0.44126555f, 0.18181801f, + 0.73277575f, 0.45443264f, 0.17068321f, 0.36591727f, 0.72869974f, -0.56090516f, 0.14415455f, 1.47314119f, + 0.42908576f, 0.73084539f, -0.22373237f, 2.26550221f, 0.05606699f, 0.39417523f, 0.35234636f, 0.78569502f, + 0.77521765f, -0.65131050f, 0.40168875f, 0.45527256f, 0.38715565f, 0.98521245f, 2.21446753f, 0.36345237f, + -0.33269632f, 0.36558092f, 1.36846578f, 1.37523413f, 0.33698002f, 0.28889543f, -0.40639281f, 1.01643157f, + 0.59668219f, 0.39197800f, 1.03101778f, 0.02551098f, -0.03612846f, -0.01371557f, 0.43444607f, 0.96746695f, + 0.60583955f, -0.10362893f, 0.40574494f, 0.38046724f, 0.87445319f, -0.00880148f, -0.15437943f, 0.08118075f, + 0.44650543f, 0.85956848f, -0.27865338f, 2.10837507f, 0.04798460f, 0.43948367f, -0.10185169f, 0.19978794f, + 1.32323360f, 1.20525467f, 0.44288942f, -0.84200430f, 0.52563053f, 0.69949460f, 0.73987913f, 0.34668452f, + 0.74545687f, 0.57696682f, 0.22452033f, -0.27099937f, 0.39649010f, 0.87083614f, -0.18965788f, 0.58206403f, + -0.08108193f, 0.42067638f, 1.05117214f, -0.34287399f, 0.20424896f, 0.27994895f, 0.46011117f, 0.70890665f, }; std::vector expected_mask_index = { 6, @@ -602,7 +615,7 @@ NGRAPH_TEST(${BACKEND_NAME}, onnx_model_embed_layer_normalization_with_position_ test_case.add_input(Shape{3, 8}, position_ids); test_case.add_expected_output(Shape{3, 8, 5}, expected_output); test_case.add_expected_output(Shape{3}, expected_mask_index); - test_case.run_with_tolerance_as_fp(1e-6); + test_case.run_with_tolerance_as_fp(1e-6f); } NGRAPH_TEST(${BACKEND_NAME}, onnx_model_attention) { @@ -612,20 +625,20 @@ NGRAPH_TEST(${BACKEND_NAME}, onnx_model_attention) { auto test_case = test::TestCase(function, s_device); std::vector input = { - 0.91475844, 0.91523546, 0.82536930, 0.37491974, 0.22384071, 0.05941105, 0.01902100, 0.70131350, - 0.09603709, 0.44200060, 0.53106076, 0.79464376, 0.35469049, 0.25225943, 0.25179818, 0.29592562, - 0.24836586, 0.65088797, 0.93126643, 0.67980725, 0.85708112, 0.59808528, 0.46321425, 0.19301885, + 0.91475844f, 0.91523546f, 0.82536930f, 0.37491974f, 0.22384071f, 0.05941105f, 0.01902100f, 0.70131350f, + 0.09603709f, 0.44200060f, 0.53106076f, 0.79464376f, 0.35469049f, 0.25225943f, 0.25179818f, 0.29592562f, + 0.24836586f, 0.65088797f, 0.93126643f, 0.67980725f, 0.85708112f, 0.59808528f, 0.46321425f, 0.19301885f, }; std::vector output = { - 0.07966283, 0.10783536, -0.19424979, 0.54514766, 0.07965867, 0.10783093, -0.19424866, 0.54510003, - 0.07965846, 0.10783067, -0.19424550, 0.54509139, 0.07966217, 0.10783640, -0.19424903, 0.54512268, - 0.06940663, 0.10962760, -0.19698445, 0.53492010, 0.06940675, 0.10962828, -0.19698484, 0.53492326, - 0.06940714, 0.10963022, -0.19698712, 0.53494006, 0.06940673, 0.10962812, -0.19698519, 0.53492481, + 0.07966283f, 0.10783536f, -0.19424979f, 0.54514766f, 0.07965867f, 0.10783093f, -0.19424866f, 0.54510003f, + 0.07965846f, 0.10783067f, -0.19424550f, 0.54509139f, 0.07966217f, 0.10783640f, -0.19424903f, 0.54512268f, + 0.06940663f, 0.10962760f, -0.19698445f, 0.53492010f, 0.06940675f, 0.10962828f, -0.19698484f, 0.53492326f, + 0.06940714f, 0.10963022f, -0.19698712f, 0.53494006f, 0.06940673f, 0.10962812f, -0.19698519f, 0.53492481f, }; test_case.add_input(input); test_case.add_expected_output(output); - test_case.run_with_tolerance_as_fp(1e-7); + test_case.run_with_tolerance_as_fp(1e-7f); } NGRAPH_TEST(${BACKEND_NAME}, onnx_model_attention_qkv_hidden_sizes) { @@ -636,24 +649,24 @@ NGRAPH_TEST(${BACKEND_NAME}, onnx_model_attention_qkv_hidden_sizes) { auto test_case = test::TestCase(function, s_device); std::vector input = { - 0.56477863, 0.60309958, 0.35158035, 0.03123519, 0.81918180, 0.76905495, 0.47219241, 0.72016627, - 0.59377003, 0.91380632, 0.56797302, 0.34846428, 0.83839595, 0.16394103, 0.34676281, 0.09161621, - 0.45562279, 0.23317528, 0.37197968, 0.06727808, 0.08500192, 0.84915495, 0.68266946, 0.00227691, + 0.56477863f, 0.60309958f, 0.35158035f, 0.03123519f, 0.81918180f, 0.76905495f, 0.47219241f, 0.72016627f, + 0.59377003f, 0.91380632f, 0.56797302f, 0.34846428f, 0.83839595f, 0.16394103f, 0.34676281f, 0.09161621f, + 0.45562279f, 0.23317528f, 0.37197968f, 0.06727808f, 0.08500192f, 0.84915495f, 0.68266946f, 0.00227691f, }; std::vector output = { - -0.59370947, -0.30300471, 0.12048547, -0.09029539, 0.08041390, 0.10250041, -0.19381392, 0.55126983, - -0.59370828, -0.30301332, 0.12049319, -0.09029691, 0.08041921, 0.10250521, -0.19381438, 0.55127531, - -0.59370869, -0.30301058, 0.12049074, -0.09029643, 0.08041564, 0.10250199, -0.19381410, 0.55127168, - -0.59370929, -0.30300608, 0.12048667, -0.09029562, 0.08041184, 0.10249855, -0.19381374, 0.55126774, - -0.59681994, -0.26327702, 0.07638434, -0.06311120, 0.06671587, 0.10916986, -0.19412412, 0.51977092, - -0.59682053, -0.26328400, 0.07638102, -0.06311222, 0.06671817, 0.10917170, -0.19412397, 0.51977223, - -0.59682077, -0.26328647, 0.07637984, -0.06311259, 0.06671739, 0.10917108, -0.19412403, 0.51977175, - -0.59682101, -0.26328778, 0.07637922, -0.06311278, 0.06671065, 0.10916568, -0.19412443, 0.51976782, + -0.59370947f, -0.30300471f, 0.12048547f, -0.09029539f, 0.08041390f, 0.10250041f, -0.19381392f, 0.55126983f, + -0.59370828f, -0.30301332f, 0.12049319f, -0.09029691f, 0.08041921f, 0.10250521f, -0.19381438f, 0.55127531f, + -0.59370869f, -0.30301058f, 0.12049074f, -0.09029643f, 0.08041564f, 0.10250199f, -0.19381410f, 0.55127168f, + -0.59370929f, -0.30300608f, 0.12048667f, -0.09029562f, 0.08041184f, 0.10249855f, -0.19381374f, 0.55126774f, + -0.59681994f, -0.26327702f, 0.07638434f, -0.06311120f, 0.06671587f, 0.10916986f, -0.19412412f, 0.51977092f, + -0.59682053f, -0.26328400f, 0.07638102f, -0.06311222f, 0.06671817f, 0.10917170f, -0.19412397f, 0.51977223f, + -0.59682077f, -0.26328647f, 0.07637984f, -0.06311259f, 0.06671739f, 0.10917108f, -0.19412403f, 0.51977175f, + -0.59682101f, -0.26328778f, 0.07637922f, -0.06311278f, 0.06671065f, 0.10916568f, -0.19412443f, 0.51976782f, }; test_case.add_input(input); test_case.add_expected_output(output); - test_case.run_with_tolerance_as_fp(1e-4); + test_case.run_with_tolerance_as_fp(1e-4f); } NGRAPH_TEST(${BACKEND_NAME}, onnx_model_attention_unidirectional) { @@ -664,34 +677,34 @@ NGRAPH_TEST(${BACKEND_NAME}, onnx_model_attention_unidirectional) { auto test_case = test::TestCase(function, s_device); std::vector input = { - 0.89578921, 0.42421508, 0.35630688, 0.77461642, 0.65753633, 0.09723099, 0.62597734, 0.72117692, - 0.57636845, 0.17104276, 0.13245547, 0.59879875, 0.15624641, 0.44903454, 0.50483286, 0.92975074, - 0.36934483, 0.29919949, 0.57185954, 0.83036488, 0.08384345, 0.20378476, 0.74684393, 0.46716982, + 0.89578921f, 0.42421508f, 0.35630688f, 0.77461642f, 0.65753633f, 0.09723099f, 0.62597734f, 0.72117692f, + 0.57636845f, 0.17104276f, 0.13245547f, 0.59879875f, 0.15624641f, 0.44903454f, 0.50483286f, 0.92975074f, + 0.36934483f, 0.29919949f, 0.57185954f, 0.83036488f, 0.08384345f, 0.20378476f, 0.74684393f, 0.46716982f, }; std::vector output = { - 0.05604819, 0.09000472, -0.19437021, 0.52487367, 0.06211422, 0.08740954, -0.19139624, 0.52762908, - 0.06708897, 0.08992603, -0.19214047, 0.53631783, 0.06896879, 0.10248676, -0.19485690, 0.53477794, - 0.08577005, 0.12807365, -0.19762954, 0.54432857, 0.06929274, 0.10893210, -0.19599904, 0.53184807, - 0.07348281, 0.10215081, -0.19280069, 0.53552240, 0.07861833, 0.10517240, -0.19285706, 0.54126489, + 0.05604819f, 0.09000472f, -0.19437021f, 0.52487367f, 0.06211422f, 0.08740954f, -0.19139624f, 0.52762908f, + 0.06708897f, 0.08992603f, -0.19214047f, 0.53631783f, 0.06896879f, 0.10248676f, -0.19485690f, 0.53477794f, + 0.08577005f, 0.12807365f, -0.19762954f, 0.54432857f, 0.06929274f, 0.10893210f, -0.19599904f, 0.53184807f, + 0.07348281f, 0.10215081f, -0.19280069f, 0.53552240f, 0.07861833f, 0.10517240f, -0.19285706f, 0.54126489f, }; std::vector present = { - -0.60427380, -0.25958878, -0.59609234, -0.24055196, -0.59613681, -0.30088067, -0.59633607, -0.33270463, - 0.06899665, -0.09284544, 0.08059876, -0.06146053, 0.11841078, -0.10019838, 0.10605468, -0.09273906, - -0.59036821, -0.32410735, -0.60532302, -0.25127757, -0.58926487, -0.25271094, -0.58640373, -0.31730092, - 0.12509561, -0.07968873, 0.06005794, -0.08937149, 0.10523240, -0.05083811, 0.14162725, -0.07438751, - 0.05604819, 0.09000472, 0.06819826, 0.08480665, 0.07700446, 0.09494394, 0.07459175, 0.14003153, - -0.19437021, 0.52487367, -0.18843602, 0.53037173, -0.19362189, 0.55360907, -0.20299932, 0.53020388, - 0.08577005, 0.12807365, 0.05276009, 0.08972625, 0.08190014, 0.08852972, 0.09400313, 0.11423884, - -0.19762954, 0.54432857, -0.19435294, 0.51924801, -0.18643703, 0.54280555, -0.19302703, 0.55837619, + -0.60427380f, -0.25958878f, -0.59609234f, -0.24055196f, -0.59613681f, -0.30088067f, -0.59633607f, -0.33270463f, + 0.06899665f, -0.09284544f, 0.08059876f, -0.06146053f, 0.11841078f, -0.10019838f, 0.10605468f, -0.09273906f, + -0.59036821f, -0.32410735f, -0.60532302f, -0.25127757f, -0.58926487f, -0.25271094f, -0.58640373f, -0.31730092f, + 0.12509561f, -0.07968873f, 0.06005794f, -0.08937149f, 0.10523240f, -0.05083811f, 0.14162725f, -0.07438751f, + 0.05604819f, 0.09000472f, 0.06819826f, 0.08480665f, 0.07700446f, 0.09494394f, 0.07459175f, 0.14003153f, + -0.19437021f, 0.52487367f, -0.18843602f, 0.53037173f, -0.19362189f, 0.55360907f, -0.20299932f, 0.53020388f, + 0.08577005f, 0.12807365f, 0.05276009f, 0.08972625f, 0.08190014f, 0.08852972f, 0.09400313f, 0.11423884f, + -0.19762954f, 0.54432857f, -0.19435294f, 0.51924801f, -0.18643703f, 0.54280555f, -0.19302703f, 0.55837619f, }; test_case.add_input(input); test_case.add_expected_output(output); test_case.add_expected_output(present); - test_case.run_with_tolerance_as_fp(1e-7); + test_case.run_with_tolerance_as_fp(1e-7f); } -NGRAPH_TEST(${BACKEND_NAME}, onnx_model_attention_mask_index_1) { +NGRAPH_TEST(${BACKEND_NAME}, onnx_model_attention_mask_index_1f) { const auto function = onnx_import::import_onnx_model(file_util::path_join(CommonTestUtils::getExecutableDirectory(), SERIALIZED_ZOO, @@ -699,29 +712,29 @@ NGRAPH_TEST(${BACKEND_NAME}, onnx_model_attention_mask_index_1) { auto test_case = test::TestCase(function, s_device); std::vector input = { - 0.02841483, 0.47845092, 0.14633700, 0.54597300, 0.40160629, 0.55281311, 0.14931096, 0.64483738, - 0.96559167, 0.05262021, 0.12391864, 0.20093553, 0.74290562, 0.19367455, 0.19253619, 0.41593507, - 0.91188699, 0.61606920, 0.72673517, 0.86981291, 0.19963337, 0.22747350, 0.34308898, 0.57267183, + 0.02841483f, 0.47845092f, 0.14633700f, 0.54597300f, 0.40160629f, 0.55281311f, 0.14931096f, 0.64483738f, + 0.96559167f, 0.05262021f, 0.12391864f, 0.20093553f, 0.74290562f, 0.19367455f, 0.19253619f, 0.41593507f, + 0.91188699f, 0.61606920f, 0.72673517f, 0.86981291f, 0.19963337f, 0.22747350f, 0.34308898f, 0.57267183f, }; std::vector mask_index = { 0, 1, }; std::vector output = { - 0.08298690, 0.12711772, -0.19757506, 0.54029012, 0.08298548, 0.12711433, -0.19757731, 0.54031140, - 0.08298430, 0.12711799, -0.19757695, 0.54031777, 0.08298548, 0.12711433, -0.19757444, 0.54028159, - 0.05380550, 0.10459180, -0.19593412, 0.50907606, 0.05380550, 0.10459180, -0.19593412, 0.50907606, - 0.05380550, 0.10459180, -0.19593412, 0.50907606, 0.05380550, 0.10459180, -0.19593412, 0.50907606, + 0.08298690f, 0.12711772f, -0.19757506f, 0.54029012f, 0.08298548f, 0.12711433f, -0.19757731f, 0.54031140f, + 0.08298430f, 0.12711799f, -0.19757695f, 0.54031777f, 0.08298548f, 0.12711433f, -0.19757444f, 0.54028159f, + 0.05380550f, 0.10459180f, -0.19593412f, 0.50907606f, 0.05380550f, 0.10459180f, -0.19593412f, 0.50907606f, + 0.05380550f, 0.10459180f, -0.19593412f, 0.50907606f, 0.05380550f, 0.10459180f, -0.19593412f, 0.50907606f, }; std::vector present = { - -0.58437425, -0.29483819, -0.59927911, -0.30336475, -0.59104657, -0.37327260, -0.59078789, -0.29863101, - 0.11751597, -0.04114649, 0.09933343, -0.09884726, 0.16250694, -0.12028439, 0.09319257, -0.05129660, - -0.60341775, -0.25221461, -0.58933026, -0.31912822, -0.59271193, -0.25470981, -0.59399152, -0.32643768, - 0.05398282, -0.07468132, 0.14743008, -0.09407346, 0.10399222, -0.06682440, 0.11632499, -0.08986320, - 0.09104910, 0.12973849, 0.06917210, 0.11059431, 0.09356256, 0.12594685, 0.07814129, 0.14221822, - -0.19329809, 0.53526556, -0.19787431, 0.53673857, -0.20045389, 0.57165766, -0.19869246, 0.51749766, - 0.05380550, 0.10459180, 0.09169570, 0.09892380, 0.07746917, 0.08042616, 0.07953370, 0.12909687, - -0.19593412, 0.50907606, -0.19202785, 0.56904894, -0.18689045, 0.54643762, -0.19969353, 0.53976399, + -0.58437425f, -0.29483819f, -0.59927911f, -0.30336475f, -0.59104657f, -0.37327260f, -0.59078789f, -0.29863101f, + 0.11751597f, -0.04114649f, 0.09933343f, -0.09884726f, 0.16250694f, -0.12028439f, 0.09319257f, -0.05129660f, + -0.60341775f, -0.25221461f, -0.58933026f, -0.31912822f, -0.59271193f, -0.25470981f, -0.59399152f, -0.32643768f, + 0.05398282f, -0.07468132f, 0.14743008f, -0.09407346f, 0.10399222f, -0.06682440f, 0.11632499f, -0.08986320f, + 0.09104910f, 0.12973849f, 0.06917210f, 0.11059431f, 0.09356256f, 0.12594685f, 0.07814129f, 0.14221822f, + -0.19329809f, 0.53526556f, -0.19787431f, 0.53673857f, -0.20045389f, 0.57165766f, -0.19869246f, 0.51749766f, + 0.05380550f, 0.10459180f, 0.09169570f, 0.09892380f, 0.07746917f, 0.08042616f, 0.07953370f, 0.12909687f, + -0.19593412f, 0.50907606f, -0.19202785f, 0.56904894f, -0.18689045f, 0.54643762f, -0.19969353f, 0.53976399f, }; test_case.add_input(input); @@ -739,9 +752,9 @@ NGRAPH_TEST(${BACKEND_NAME}, onnx_model_attention_mask_index_2) { auto test_case = test::TestCase(function, s_device); std::vector input = { - 0.75259578, 0.81492645, 0.46713001, 0.29483622, 0.06768602, 0.95105755, 0.32065326, 0.52417183, - 0.73136383, 0.77176476, 0.60997742, 0.64625764, 0.16311000, 0.89680773, 0.01331447, 0.42468646, - 0.58711547, 0.00345124, 0.13053808, 0.46278623, 0.13786320, 0.65182054, 0.74864876, 0.81506181, + 0.75259578f, 0.81492645f, 0.46713001f, 0.29483622f, 0.06768602f, 0.95105755f, 0.32065326f, 0.52417183f, + 0.73136383f, 0.77176476f, 0.60997742f, 0.64625764f, 0.16311000f, 0.89680773f, 0.01331447f, 0.42468646f, + 0.58711547f, 0.00345124f, 0.13053808f, 0.46278623f, 0.13786320f, 0.65182054f, 0.74864876f, 0.81506181f, }; std::vector mask_index = { 3, @@ -750,20 +763,20 @@ NGRAPH_TEST(${BACKEND_NAME}, onnx_model_attention_mask_index_2) { 1, }; std::vector output = { - 0.07524174, 0.11320241, -0.19909523, 0.54785377, 0.06825337, 0.13981669, -0.20774621, 0.53718704, - 0.07531278, 0.12957911, -0.20330518, 0.54547405, 0.07531209, 0.12958010, -0.20330583, 0.54547292, - 0.08900890, 0.11150353, -0.18931937, 0.53757656, 0.07915881, 0.10416336, -0.18914750, 0.52921104, - 0.08285815, 0.11462159, -0.19115375, 0.53077918, 0.08285838, 0.11462225, -0.19115454, 0.53077984, + 0.07524174f, 0.11320241f, -0.19909523f, 0.54785377f, 0.06825337f, 0.13981669f, -0.20774621f, 0.53718704f, + 0.07531278f, 0.12957911f, -0.20330518f, 0.54547405f, 0.07531209f, 0.12958010f, -0.20330583f, 0.54547292f, + 0.08900890f, 0.11150353f, -0.18931937f, 0.53757656f, 0.07915881f, 0.10416336f, -0.18914750f, 0.52921104f, + 0.08285815f, 0.11462159f, -0.19115375f, 0.53077918f, 0.08285838f, 0.11462225f, -0.19115454f, 0.53077984f, }; std::vector present = { - -0.59630549, -0.28110915, -0.60274345, -0.36154836, -0.59437746, -0.33717164, -0.60134649, -0.29849592, - 0.11169122, -0.09345293, 0.11103803, -0.13096604, 0.13131849, -0.10597084, 0.10463209, -0.11332577, - -0.57949269, -0.27235535, -0.58941406, -0.25372508, -0.58658379, -0.28718373, -0.59821802, -0.32433146, - 0.13244939, -0.02865628, 0.09308393, -0.04083736, 0.10948701, -0.04423397, 0.13060363, -0.12316251, - 0.07509718, 0.08392500, 0.06825337, 0.13981669, 0.08239168, 0.11931328, 0.06770951, 0.09240761, - -0.19074154, 0.55260652, -0.20774621, 0.53718704, -0.19888818, 0.55371630, -0.19559640, 0.54754448, - 0.09983939, 0.10603377, 0.07915881, 0.10416336, 0.08655046, 0.12505992, 0.07738422, 0.09509270, - -0.18571433, 0.55095005, -0.18914750, 0.52921104, -0.19315663, 0.53234470, -0.19601485, 0.56322992, + -0.59630549f, -0.28110915f, -0.60274345f, -0.36154836f, -0.59437746f, -0.33717164f, -0.60134649f, -0.29849592f, + 0.11169122f, -0.09345293f, 0.11103803f, -0.13096604f, 0.13131849f, -0.10597084f, 0.10463209f, -0.11332577f, + -0.57949269f, -0.27235535f, -0.58941406f, -0.25372508f, -0.58658379f, -0.28718373f, -0.59821802f, -0.32433146f, + 0.13244939f, -0.02865628f, 0.09308393f, -0.04083736f, 0.10948701f, -0.04423397f, 0.13060363f, -0.12316251f, + 0.07509718f, 0.08392500f, 0.06825337f, 0.13981669f, 0.08239168f, 0.11931328f, 0.06770951f, 0.09240761f, + -0.19074154f, 0.55260652f, -0.20774621f, 0.53718704f, -0.19888818f, 0.55371630f, -0.19559640f, 0.54754448f, + 0.09983939f, 0.10603377f, 0.07915881f, 0.10416336f, 0.08655046f, 0.12505992f, 0.07738422f, 0.09509270f, + -0.18571433f, 0.55095005f, -0.18914750f, 0.52921104f, -0.19315663f, 0.53234470f, -0.19601485f, 0.56322992f, }; test_case.add_input(input); @@ -781,9 +794,9 @@ NGRAPH_TEST(${BACKEND_NAME}, onnx_model_attention_mask_index_3) { auto test_case = test::TestCase(function, s_device); std::vector input = { - 0.33093750, 0.39181390, 0.14586255, 0.39709702, 0.98086524, 0.03891133, 0.72234219, 0.21966648, - 0.79986620, 0.97251678, 0.04131543, 0.43971965, 0.50185394, 0.11452501, 0.88111717, 0.76076663, - 0.31870860, 0.54107893, 0.91756296, 0.58112669, 0.99117357, 0.00256292, 0.58885485, 0.93481058, + 0.33093750f, 0.39181390f, 0.14586255f, 0.39709702f, 0.98086524f, 0.03891133f, 0.72234219f, 0.21966648f, + 0.79986620f, 0.97251678f, 0.04131543f, 0.43971965f, 0.50185394f, 0.11452501f, 0.88111717f, 0.76076663f, + 0.31870860f, 0.54107893f, 0.91756296f, 0.58112669f, 0.99117357f, 0.00256292f, 0.58885485f, 0.93481058f, }; std::vector mask = { 1, @@ -796,27 +809,27 @@ NGRAPH_TEST(${BACKEND_NAME}, onnx_model_attention_mask_index_3) { 1, }; std::vector output = { - 0.07551830, 0.10666487, -0.19357042, 0.53683108, 0.07551410, 0.10666656, -0.19356072, 0.53684169, - 0.07552745, 0.10666100, -0.19358172, 0.53682435, 0.07552218, 0.10666317, -0.19358677, 0.53681952, - 0.09727416, 0.13513327, -0.20121223, 0.57003713, 0.09727416, 0.13513327, -0.20121223, 0.57003713, - 0.09727416, 0.13513327, -0.20121223, 0.57003713, 0.09727416, 0.13513327, -0.20121223, 0.57003713, + 0.07551830f, 0.10666487f, -0.19357042f, 0.53683108f, 0.07551410f, 0.10666656f, -0.19356072f, 0.53684169f, + 0.07552745f, 0.10666100f, -0.19358172f, 0.53682435f, 0.07552218f, 0.10666317f, -0.19358677f, 0.53681952f, + 0.09727416f, 0.13513327f, -0.20121223f, 0.57003713f, 0.09727416f, 0.13513327f, -0.20121223f, 0.57003713f, + 0.09727416f, 0.13513327f, -0.20121223f, 0.57003713f, 0.09727416f, 0.13513327f, -0.20121223f, 0.57003713f, }; std::vector present = { - -0.59174627, -0.27471560, -0.58307797, -0.25967693, -0.60766846, -0.31754097, -0.61241394, -0.26291698, - 0.09206123, -0.05307099, 0.12491645, -0.03853742, 0.08732655, -0.13050151, 0.04073093, -0.10792807, - -0.60556883, -0.34055573, -0.60474855, -0.28785610, -0.60757709, -0.32514900, -0.58872569, -0.37967020, - 0.09779400, -0.13136166, 0.07915612, -0.10649752, 0.11043755, -0.15124020, 0.16626491, -0.11274654, - 0.07639833, 0.11762549, 0.09370039, 0.09133558, 0.05661478, 0.11096847, 0.04019671, 0.10117501, - -0.19371650, 0.52530587, -0.18429738, 0.55240726, -0.20283231, 0.53265429, -0.20036045, 0.50568837, - 0.06171235, 0.12687264, 0.05802051, 0.10266830, 0.06172965, 0.08967118, 0.09727416, 0.13513327, - -0.20576829, 0.53365225, -0.19832623, 0.52809310, -0.19971462, 0.55584043, -0.20121223, 0.57003713, + -0.59174627f, -0.27471560f, -0.58307797f, -0.25967693f, -0.60766846f, -0.31754097f, -0.61241394f, -0.26291698f, + 0.09206123f, -0.05307099f, 0.12491645f, -0.03853742f, 0.08732655f, -0.13050151f, 0.04073093f, -0.10792807f, + -0.60556883f, -0.34055573f, -0.60474855f, -0.28785610f, -0.60757709f, -0.32514900f, -0.58872569f, -0.37967020f, + 0.09779400f, -0.13136166f, 0.07915612f, -0.10649752f, 0.11043755f, -0.15124020f, 0.16626491f, -0.11274654f, + 0.07639833f, 0.11762549f, 0.09370039f, 0.09133558f, 0.05661478f, 0.11096847f, 0.04019671f, 0.10117501f, + -0.19371650f, 0.52530587f, -0.18429738f, 0.55240726f, -0.20283231f, 0.53265429f, -0.20036045f, 0.50568837f, + 0.06171235f, 0.12687264f, 0.05802051f, 0.10266830f, 0.06172965f, 0.08967118f, 0.09727416f, 0.13513327f, + -0.20576829f, 0.53365225f, -0.19832623f, 0.52809310f, -0.19971462f, 0.55584043f, -0.20121223f, 0.57003713f, }; test_case.add_input(input); test_case.add_input(mask); test_case.add_expected_output(output); test_case.add_expected_output(present); - test_case.run_with_tolerance_as_fp(1e-7); + test_case.run_with_tolerance_as_fp(1e-7f); } NGRAPH_TEST(${BACKEND_NAME}, onnx_model_attention_mask_index_4) { @@ -827,35 +840,35 @@ NGRAPH_TEST(${BACKEND_NAME}, onnx_model_attention_mask_index_4) { auto test_case = test::TestCase(function, s_device); std::vector input = { - 0.23565151, 0.58627969, 0.75137484, 0.68586946, 0.62750375, 0.13284931, 0.13347220, 0.36357051, - 0.56910241, 0.48275986, 0.49440190, 0.45483324, 0.63547862, 0.97893149, 0.40630588, 0.38783622, - 0.07172249, 0.46385381, 0.99764502, 0.22219376, 0.67735291, 0.40799847, 0.74337566, 0.87263006, + 0.23565151f, 0.58627969f, 0.75137484f, 0.68586946f, 0.62750375f, 0.13284931f, 0.13347220f, 0.36357051f, + 0.56910241f, 0.48275986f, 0.49440190f, 0.45483324f, 0.63547862f, 0.97893149f, 0.40630588f, 0.38783622f, + 0.07172249f, 0.46385381f, 0.99764502f, 0.22219376f, 0.67735291f, 0.40799847f, 0.74337566f, 0.87263006f, }; std::vector mask = { 1, 1, 0, 1, 0, 0, 0, 1, 1, 1, 0, 0, 0, 0, 0, 1, 0, 1, 1, 0, 0, 0, 1, 1, 1, 1, 1, 0, 0, 1, 1, 1, }; std::vector output = { - 0.07771622, 0.10724538, -0.19453585, 0.54342043, 0.07459468, 0.10934003, -0.19561143, 0.53936625, - 0.07927690, 0.10619678, -0.19399606, 0.54543519, 0.07459468, 0.10934003, -0.19561143, 0.53936625, - 0.05485561, 0.11278091, -0.20117569, 0.52096349, 0.06629646, 0.10195158, -0.19900991, 0.54654449, - 0.06491723, 0.10292297, -0.19678673, 0.53451663, 0.06549793, 0.11126325, -0.19989857, 0.53717279, + 0.07771622f, 0.10724538f, -0.19453585f, 0.54342043f, 0.07459468f, 0.10934003f, -0.19561143f, 0.53936625f, + 0.07927690f, 0.10619678f, -0.19399606f, 0.54543519f, 0.07459468f, 0.10934003f, -0.19561143f, 0.53936625f, + 0.05485561f, 0.11278091f, -0.20117569f, 0.52096349f, 0.06629646f, 0.10195158f, -0.19900991f, 0.54654449f, + 0.06491723f, 0.10292297f, -0.19678673f, 0.53451663f, 0.06549793f, 0.11126325f, -0.19989857f, 0.53717279f, }; std::vector present = { - -0.59188855, -0.34495637, -0.59508181, -0.25013468, -0.59176934, -0.33229247, -0.59576762, -0.29731843, - 0.14217430, -0.10403840, 0.08584045, -0.06193545, 0.12358667, -0.08588549, 0.10515238, -0.08629489, - -0.59092808, -0.28260738, -0.60047609, -0.30411413, -0.61210287, -0.28645760, -0.59391296, -0.34649473, - 0.12789863, -0.08159252, 0.08122411, -0.08866425, 0.06395009, -0.12896645, 0.14855847, -0.11978809, - 0.08783118, 0.12152332, 0.07067389, 0.09078297, 0.08385989, 0.13306075, 0.07459468, 0.10934003, - -0.19849420, 0.55928540, -0.18948570, 0.53154731, -0.19960676, 0.54237455, -0.19561143, 0.53936625, - 0.08509844, 0.08314656, 0.06388859, 0.12990499, 0.04582624, 0.09566365, 0.08674107, 0.10823163, - -0.18808734, 0.56137776, -0.20168513, 0.51830697, -0.20066255, 0.52363914, -0.19737384, 0.56921995, + -0.59188855f, -0.34495637f, -0.59508181f, -0.25013468f, -0.59176934f, -0.33229247f, -0.59576762f, -0.29731843f, + 0.14217430f, -0.10403840f, 0.08584045f, -0.06193545f, 0.12358667f, -0.08588549f, 0.10515238f, -0.08629489f, + -0.59092808f, -0.28260738f, -0.60047609f, -0.30411413f, -0.61210287f, -0.28645760f, -0.59391296f, -0.34649473f, + 0.12789863f, -0.08159252f, 0.08122411f, -0.08866425f, 0.06395009f, -0.12896645f, 0.14855847f, -0.11978809f, + 0.08783118f, 0.12152332f, 0.07067389f, 0.09078297f, 0.08385989f, 0.13306075f, 0.07459468f, 0.10934003f, + -0.19849420f, 0.55928540f, -0.18948570f, 0.53154731f, -0.19960676f, 0.54237455f, -0.19561143f, 0.53936625f, + 0.08509844f, 0.08314656f, 0.06388859f, 0.12990499f, 0.04582624f, 0.09566365f, 0.08674107f, 0.10823163f, + -0.18808734f, 0.56137776f, -0.20168513f, 0.51830697f, -0.20066255f, 0.52363914f, -0.19737384f, 0.56921995f, }; test_case.add_input(input); test_case.add_input(mask); test_case.add_expected_output(output); test_case.add_expected_output(present); - test_case.run_with_tolerance_as_fp(1e-7); + test_case.run_with_tolerance_as_fp(1e-7f); } NGRAPH_TEST(${BACKEND_NAME}, onnx_model_attention_past) { @@ -866,9 +879,9 @@ NGRAPH_TEST(${BACKEND_NAME}, onnx_model_attention_past) { auto test_case = test::TestCase(function, s_device); std::vector input = { - 0.82966000, 0.77751911, 0.08977074, 0.06076468, 0.40659550, 0.19995944, 0.55544919, 0.83971608, - 0.86254036, 0.30894691, 0.80156928, 0.83092463, 0.14506543, 0.32196075, 0.42209163, 0.24465553, - 0.93944097, 0.73528159, 0.23347616, 0.60544974, 0.93329269, 0.67604774, 0.56349903, 0.26199624, + 0.82966000f, 0.77751911f, 0.08977074f, 0.06076468f, 0.40659550f, 0.19995944f, 0.55544919f, 0.83971608f, + 0.86254036f, 0.30894691f, 0.80156928f, 0.83092463f, 0.14506543f, 0.32196075f, 0.42209163f, 0.24465553f, + 0.93944097f, 0.73528159f, 0.23347616f, 0.60544974f, 0.93329269f, 0.67604774f, 0.56349903f, 0.26199624f, }; std::vector mask = { 1, @@ -891,41 +904,42 @@ NGRAPH_TEST(${BACKEND_NAME}, onnx_model_attention_past) { 1, }; std::vector past = { - 0.92467678, 0.79873562, 0.00939191, 0.34891853, 0.35521412, 0.21872006, 0.89974332, 0.74132687, 0.73566031, - 0.75168055, 0.06773245, 0.85702997, 0.76256698, 0.51739877, 0.91567177, 0.66617578, 0.88056499, 0.08436447, - 0.54744655, 0.25466520, 0.08500137, 0.19271941, 0.86525357, 0.21717627, 0.97158766, 0.42288730, 0.09890039, - 0.01148765, 0.97024685, 0.19697112, 0.67671591, 0.67960924, 0.46656516, 0.30850092, 0.73536104, 0.73938161, - 0.91650903, 0.57628596, 0.51164514, 0.11695814, 0.79792547, 0.97192264, 0.29246020, 0.41030061, 0.19014873, - 0.90233624, 0.84986305, 0.26141909, 0.84528726, 0.81416380, 0.00429944, 0.31476986, 0.00440918, 0.77413058, - 0.13409913, 0.20965169, 0.61764991, 0.55266041, 0.56107825, 0.42051074, 0.16804738, 0.80362344, 0.52392679, - 0.27550557, 0.66738850, 0.39348483, 0.31801429, 0.30325863, 0.37068403, 0.92767614, 0.60799408, 0.01458820, - 0.24194679, 0.59596598, 0.81762302, 0.38094005, 0.16618672, 0.92488551, 0.84298438, 0.21752745, + 0.92467678f, 0.79873562f, 0.00939191f, 0.34891853f, 0.35521412f, 0.21872006f, 0.89974332f, 0.74132687f, + 0.73566031f, 0.75168055f, 0.06773245f, 0.85702997f, 0.76256698f, 0.51739877f, 0.91567177f, 0.66617578f, + 0.88056499f, 0.08436447f, 0.54744655f, 0.25466520f, 0.08500137f, 0.19271941f, 0.86525357f, 0.21717627f, + 0.97158766f, 0.42288730f, 0.09890039f, 0.01148765f, 0.97024685f, 0.19697112f, 0.67671591f, 0.67960924f, + 0.46656516f, 0.30850092f, 0.73536104f, 0.73938161f, 0.91650903f, 0.57628596f, 0.51164514f, 0.11695814f, + 0.79792547f, 0.97192264f, 0.29246020f, 0.41030061f, 0.19014873f, 0.90233624f, 0.84986305f, 0.26141909f, + 0.84528726f, 0.81416380f, 0.00429944f, 0.31476986f, 0.00440918f, 0.77413058f, 0.13409913f, 0.20965169f, + 0.61764991f, 0.55266041f, 0.56107825f, 0.42051074f, 0.16804738f, 0.80362344f, 0.52392679f, 0.27550557f, + 0.66738850f, 0.39348483f, 0.31801429f, 0.30325863f, 0.37068403f, 0.92767614f, 0.60799408f, 0.01458820f, + 0.24194679f, 0.59596598f, 0.81762302f, 0.38094005f, 0.16618672f, 0.92488551f, 0.84298438f, 0.21752745f, }; std::vector output = { - 0.26186451, 0.45950246, -0.04001215, 0.47680017, 0.26333901, 0.46158865, -0.04006424, 0.47588652, - 0.26875457, 0.47031689, -0.03951600, 0.47674999, 0.26851410, 0.46987134, -0.03919901, 0.47629333, - 0.18083976, 0.16579385, -0.05161894, 0.63075018, 0.18228555, 0.16642828, -0.04873618, 0.63316816, - 0.18362364, 0.16702136, -0.05045432, 0.63178891, 0.18000112, 0.16541445, -0.05139139, 0.63105792, + 0.26186451f, 0.45950246f, -0.04001215f, 0.47680017f, 0.26333901f, 0.46158865f, -0.04006424f, 0.47588652f, + 0.26875457f, 0.47031689f, -0.03951600f, 0.47674999f, 0.26851410f, 0.46987134f, -0.03919901f, 0.47629333f, + 0.18083976f, 0.16579385f, -0.05161894f, 0.63075018f, 0.18228555f, 0.16642828f, -0.04873618f, 0.63316816f, + 0.18362364f, 0.16702136f, -0.05045432f, 0.63178891f, 0.18000112f, 0.16541445f, -0.05139139f, 0.63105792f, }; std::vector present = { - 0.92467678, 0.79873562, 0.00939191, 0.34891853, 0.35521412, 0.21872006, 0.89974332, 0.74132687, - 0.73566031, 0.75168055, -0.59527576, -0.23625080, -0.58657664, -0.29827437, -0.59528387, -0.33578828, - -0.59068960, -0.34870598, 0.06773245, 0.85702997, 0.76256698, 0.51739877, 0.91567177, 0.66617578, - 0.88056499, 0.08436447, 0.54744655, 0.25466520, 0.08536442, -0.06134639, 0.11295843, -0.04818217, - 0.14562836, -0.12305059, 0.15695867, -0.11161390, 0.08500137, 0.19271941, 0.86525357, 0.21717627, - 0.97158766, 0.42288730, 0.09890039, 0.01148765, 0.97024685, 0.19697112, -0.59141791, -0.31600696, - -0.58647990, -0.34302223, -0.59306550, -0.36427227, -0.59695083, -0.26431620, 0.67671591, 0.67960924, - 0.46656516, 0.30850092, 0.73536104, 0.73938161, 0.91650903, 0.57628596, 0.51164514, 0.11695814, - 0.11255538, -0.07302766, 0.16620418, -0.09871224, 0.15272795, -0.12076923, 0.08827571, -0.07442430, - 0.79792547, 0.97192264, 0.29246020, 0.41030061, 0.19014873, 0.90233624, 0.84986305, 0.26141909, - 0.84528726, 0.81416380, 0.07014155, 0.07749540, 0.08745074, 0.13131952, 0.08430066, 0.09709007, - 0.09247591, 0.11065811, 0.00429944, 0.31476986, 0.00440918, 0.77413058, 0.13409913, 0.20965169, - 0.61764991, 0.55266041, 0.56107825, 0.42051074, -0.18658412, 0.53568852, -0.19482780, 0.53271860, - -0.19558203, 0.57155901, -0.19633618, 0.57260245, 0.16804738, 0.80362344, 0.52392679, 0.27550557, - 0.66738850, 0.39348483, 0.31801429, 0.30325863, 0.37068403, 0.92767614, 0.08172131, 0.13249113, - 0.09947956, 0.10781212, 0.08890627, 0.12280971, 0.06911418, 0.09499176, 0.60799408, 0.01458820, - 0.24194679, 0.59596598, 0.81762302, 0.38094005, 0.16618672, 0.92488551, 0.84298438, 0.21752745, - -0.19839945, 0.53462923, -0.19349247, 0.57778782, -0.20039621, 0.56689924, -0.19190890, 0.53286803, + 0.92467678f, 0.79873562f, 0.00939191f, 0.34891853f, 0.35521412f, 0.21872006f, 0.89974332f, 0.74132687f, + 0.73566031f, 0.75168055f, -0.59527576f, -0.23625080f, -0.58657664f, -0.29827437f, -0.59528387f, -0.33578828f, + -0.59068960f, -0.34870598f, 0.06773245f, 0.85702997f, 0.76256698f, 0.51739877f, 0.91567177f, 0.66617578f, + 0.88056499f, 0.08436447f, 0.54744655f, 0.25466520f, 0.08536442f, -0.06134639f, 0.11295843f, -0.04818217f, + 0.14562836f, -0.12305059f, 0.15695867f, -0.11161390f, 0.08500137f, 0.19271941f, 0.86525357f, 0.21717627f, + 0.97158766f, 0.42288730f, 0.09890039f, 0.01148765f, 0.97024685f, 0.19697112f, -0.59141791f, -0.31600696f, + -0.58647990f, -0.34302223f, -0.59306550f, -0.36427227f, -0.59695083f, -0.26431620f, 0.67671591f, 0.67960924f, + 0.46656516f, 0.30850092f, 0.73536104f, 0.73938161f, 0.91650903f, 0.57628596f, 0.51164514f, 0.11695814f, + 0.11255538f, -0.07302766f, 0.16620418f, -0.09871224f, 0.15272795f, -0.12076923f, 0.08827571f, -0.07442430f, + 0.79792547f, 0.97192264f, 0.29246020f, 0.41030061f, 0.19014873f, 0.90233624f, 0.84986305f, 0.26141909f, + 0.84528726f, 0.81416380f, 0.07014155f, 0.07749540f, 0.08745074f, 0.13131952f, 0.08430066f, 0.09709007f, + 0.09247591f, 0.11065811f, 0.00429944f, 0.31476986f, 0.00440918f, 0.77413058f, 0.13409913f, 0.20965169f, + 0.61764991f, 0.55266041f, 0.56107825f, 0.42051074f, -0.18658412f, 0.53568852f, -0.19482780f, 0.53271860f, + -0.19558203f, 0.57155901f, -0.19633618f, 0.57260245f, 0.16804738f, 0.80362344f, 0.52392679f, 0.27550557f, + 0.66738850f, 0.39348483f, 0.31801429f, 0.30325863f, 0.37068403f, 0.92767614f, 0.08172131f, 0.13249113f, + 0.09947956f, 0.10781212f, 0.08890627f, 0.12280971f, 0.06911418f, 0.09499176f, 0.60799408f, 0.01458820f, + 0.24194679f, 0.59596598f, 0.81762302f, 0.38094005f, 0.16618672f, 0.92488551f, 0.84298438f, 0.21752745f, + -0.19839945f, 0.53462923f, -0.19349247f, 0.57778782f, -0.20039621f, 0.56689924f, -0.19190890f, 0.53286803f, }; test_case.add_input(input); @@ -933,7 +947,7 @@ NGRAPH_TEST(${BACKEND_NAME}, onnx_model_attention_past) { test_case.add_input(past); test_case.add_expected_output(output); test_case.add_expected_output(present); - test_case.run_with_tolerance_as_fp(1e-6); + test_case.run_with_tolerance_as_fp(1e-6f); } NGRAPH_TEST(${BACKEND_NAME}, onnx_model_attention_extra_add) { @@ -944,9 +958,9 @@ NGRAPH_TEST(${BACKEND_NAME}, onnx_model_attention_extra_add) { auto test_case = test::TestCase(function, s_device); std::vector input = { - 0.14930259, 0.11199699, 0.81292826, 0.08368169, 0.05704883, 0.41276145, 0.38760167, 0.00146112, - 0.14275745, 0.54254925, 0.07962929, 0.31023681, 0.09597706, 0.60583973, 0.90233743, 0.33360451, - 0.18193199, 0.19159532, 0.07869831, 0.86026299, 0.20683478, 0.40150928, 0.93124926, 0.31805834, + 0.14930259f, 0.11199699f, 0.81292826f, 0.08368169f, 0.05704883f, 0.41276145f, 0.38760167f, 0.00146112f, + 0.14275745f, 0.54254925f, 0.07962929f, 0.31023681f, 0.09597706f, 0.60583973f, 0.90233743f, 0.33360451f, + 0.18193199f, 0.19159532f, 0.07869831f, 0.86026299f, 0.20683478f, 0.40150928f, 0.93124926f, 0.31805834f, }; std::vector mask = { 0, @@ -959,30 +973,30 @@ NGRAPH_TEST(${BACKEND_NAME}, onnx_model_attention_extra_add) { 0, }; std::vector extra_add = { - 0.73230380, 0.61824518, 0.19738488, 0.57034588, 0.22331032, 0.53262889, 0.60098642, 0.72943515, - 0.09009175, 0.81116527, 0.47240964, 0.49679127, 0.41110733, 0.29418564, 0.93818313, 0.64175284, - 0.06807775, 0.66733366, 0.78848422, 0.48788327, 0.38806340, 0.14002480, 0.72263688, 0.22772972, - 0.24000823, 0.75820386, 0.64254439, 0.19385594, 0.95595860, 0.59840417, 0.93769604, 0.62474734, - 0.36690548, 0.76047903, 0.62352085, 0.58574778, 0.64251810, 0.78072041, 0.43344691, 0.75383639, - 0.73950553, 0.92625278, 0.05066428, 0.08448382, 0.25980917, 0.50312829, 0.97800279, 0.05422170, - 0.05171391, 0.82828254, 0.42234898, 0.95752198, 0.96325767, 0.97909677, 0.35578200, 0.48091716, - 0.03637243, 0.91552693, 0.43403026, 0.94275808, 0.51182085, 0.86773109, 0.38459453, 0.87822068, + 0.73230380f, 0.61824518f, 0.19738488f, 0.57034588f, 0.22331032f, 0.53262889f, 0.60098642f, 0.72943515f, + 0.09009175f, 0.81116527f, 0.47240964f, 0.49679127f, 0.41110733f, 0.29418564f, 0.93818313f, 0.64175284f, + 0.06807775f, 0.66733366f, 0.78848422f, 0.48788327f, 0.38806340f, 0.14002480f, 0.72263688f, 0.22772972f, + 0.24000823f, 0.75820386f, 0.64254439f, 0.19385594f, 0.95595860f, 0.59840417f, 0.93769604f, 0.62474734f, + 0.36690548f, 0.76047903f, 0.62352085f, 0.58574778f, 0.64251810f, 0.78072041f, 0.43344691f, 0.75383639f, + 0.73950553f, 0.92625278f, 0.05066428f, 0.08448382f, 0.25980917f, 0.50312829f, 0.97800279f, 0.05422170f, + 0.05171391f, 0.82828254f, 0.42234898f, 0.95752198f, 0.96325767f, 0.97909677f, 0.35578200f, 0.48091716f, + 0.03637243f, 0.91552693f, 0.43403026f, 0.94275808f, 0.51182085f, 0.86773109f, 0.38459453f, 0.87822068f, }; std::vector output = { - 0.06090815, 0.12919067, -0.19883196, 0.50295448, 0.06090815, 0.12919067, -0.19883196, 0.50295448, - 0.06090815, 0.12919067, -0.19883196, 0.50295448, 0.06090815, 0.12919067, -0.19883196, 0.50295448, - 0.08714182, 0.12259886, -0.19516067, 0.54010558, 0.08671370, 0.12369543, -0.19658084, 0.54502594, - 0.08458151, 0.12488046, -0.19519810, 0.53906947, 0.09063499, 0.12088943, -0.19583938, 0.54266596, + 0.06090815f, 0.12919067f, -0.19883196f, 0.50295448f, 0.06090815f, 0.12919067f, -0.19883196f, 0.50295448f, + 0.06090815f, 0.12919067f, -0.19883196f, 0.50295448f, 0.06090815f, 0.12919067f, -0.19883196f, 0.50295448f, + 0.08714182f, 0.12259886f, -0.19516067f, 0.54010558f, 0.08671370f, 0.12369543f, -0.19658084f, 0.54502594f, + 0.08458151f, 0.12488046f, -0.19519810f, 0.53906947f, 0.09063499f, 0.12088943f, -0.19583938f, 0.54266596f, }; std::vector present = { - -0.59800303, -0.35666457, -0.59420627, -0.31881350, -0.59887993, -0.27025288, -0.60216135, -0.27772796, - 0.11659990, -0.11224300, 0.09693416, -0.07304113, 0.06023501, -0.05941332, 0.06434284, -0.07978789, - -0.59005713, -0.37009716, -0.59542215, -0.27914333, -0.57998544, -0.29826957, -0.58625919, -0.28872511, - 0.15994480, -0.11288825, 0.07906821, -0.05991337, 0.14479136, -0.04415035, 0.13493451, -0.06541853, - 0.07513385, 0.14411135, 0.07505661, 0.14532046, 0.06090815, 0.12919067, 0.05788904, 0.12018456, - -0.20586906, 0.53715372, -0.20203318, 0.52092510, -0.19883196, 0.50295448, -0.19937295, 0.51055026, - 0.09417956, 0.12943678, 0.06923291, 0.12574309, 0.10221909, 0.11366953, 0.09235901, 0.09584601, - -0.20036517, 0.56818324, -0.19709785, 0.51547027, -0.18871340, 0.55736589, -0.18826833, 0.55965197, + -0.59800303f, -0.35666457f, -0.59420627f, -0.31881350f, -0.59887993f, -0.27025288f, -0.60216135f, -0.27772796f, + 0.11659990f, -0.11224300f, 0.09693416f, -0.07304113f, 0.06023501f, -0.05941332f, 0.06434284f, -0.07978789f, + -0.59005713f, -0.37009716f, -0.59542215f, -0.27914333f, -0.57998544f, -0.29826957f, -0.58625919f, -0.28872511f, + 0.15994480f, -0.11288825f, 0.07906821f, -0.05991337f, 0.14479136f, -0.04415035f, 0.13493451f, -0.06541853f, + 0.07513385f, 0.14411135f, 0.07505661f, 0.14532046f, 0.06090815f, 0.12919067f, 0.05788904f, 0.12018456f, + -0.20586906f, 0.53715372f, -0.20203318f, 0.52092510f, -0.19883196f, 0.50295448f, -0.19937295f, 0.51055026f, + 0.09417956f, 0.12943678f, 0.06923291f, 0.12574309f, 0.10221909f, 0.11366953f, 0.09235901f, 0.09584601f, + -0.20036517f, 0.56818324f, -0.19709785f, 0.51547027f, -0.18871340f, 0.55736589f, -0.18826833f, 0.55965197f, }; test_case.add_input(input); @@ -990,7 +1004,7 @@ NGRAPH_TEST(${BACKEND_NAME}, onnx_model_attention_extra_add) { test_case.add_input(extra_add); test_case.add_expected_output(output); test_case.add_expected_output(present); - test_case.run_with_tolerance_as_fp(1e-7); + test_case.run_with_tolerance_as_fp(1e-7f); } NGRAPH_TEST(${BACKEND_NAME}, onnx_model_attention_dynamic_shapes) { @@ -1001,29 +1015,30 @@ NGRAPH_TEST(${BACKEND_NAME}, onnx_model_attention_dynamic_shapes) { auto test_case = test::TestCase(function, s_device); std::vector input = { - 0.42226878, 0.50984067, 0.80440795, 0.68040705, 0.93614250, 0.45104721, 0.71767306, 0.48596525, - 0.70076728, 0.04500086, 0.28930107, 0.77435863, 0.19392140, 0.90290719, 0.91955870, 0.58811885, - 0.76795286, 0.62884814, 0.23377730, 0.49212688, 0.87256873, 0.11944817, 0.57715887, 0.91886938, + 0.42226878f, 0.50984067f, 0.80440795f, 0.68040705f, 0.93614250f, 0.45104721f, 0.71767306f, 0.48596525f, + 0.70076728f, 0.04500086f, 0.28930107f, 0.77435863f, 0.19392140f, 0.90290719f, 0.91955870f, 0.58811885f, + 0.76795286f, 0.62884814f, 0.23377730f, 0.49212688f, 0.87256873f, 0.11944817f, 0.57715887f, 0.91886938f, }; std::vector weights = { - 0.99377930, 0.22733542, 0.43217131, 0.60717988, 0.97224706, 0.70020503, 0.92439449, 0.41512674, 0.47728160, - 0.40306625, 0.72619593, 0.37954643, 0.36950976, 0.84305370, 0.61671126, 0.22251014, 0.73839295, 0.73471880, - 0.37428924, 0.80240524, 0.23120961, 0.06072779, 0.92840081, 0.71558088, 0.08719950, 0.51666921, 0.53768843, - 0.48113129, 0.46389169, 0.01036468, 0.37341005, 0.67195475, 0.53599644, 0.41795707, 0.58081782, 0.97939289, + 0.99377930f, 0.22733542f, 0.43217131f, 0.60717988f, 0.97224706f, 0.70020503f, 0.92439449f, 0.41512674f, + 0.47728160f, 0.40306625f, 0.72619593f, 0.37954643f, 0.36950976f, 0.84305370f, 0.61671126f, 0.22251014f, + 0.73839295f, 0.73471880f, 0.37428924f, 0.80240524f, 0.23120961f, 0.06072779f, 0.92840081f, 0.71558088f, + 0.08719950f, 0.51666921f, 0.53768843f, 0.48113129f, 0.46389169f, 0.01036468f, 0.37341005f, 0.67195475f, + 0.53599644f, 0.41795707f, 0.58081782f, 0.97939289f, }; std::vector bias = { - 0.77122736, - 0.75600564, - 0.86177206, - 0.69982684, - 0.74719858, - 0.78054035, - 0.80007398, - 0.74902135, - 0.81258053, - 0.01575289, - 0.08463049, - 0.39671996, + 0.77122736f, + 0.75600564f, + 0.86177206f, + 0.69982684f, + 0.74719858f, + 0.78054035f, + 0.80007398f, + 0.74902135f, + 0.81258053f, + 0.01575289f, + 0.08463049f, + 0.39671996f, }; std::vector mask = { 0, @@ -1046,39 +1061,42 @@ NGRAPH_TEST(${BACKEND_NAME}, onnx_model_attention_dynamic_shapes) { 0, }; std::vector past = { - 0.27759778, 0.18458818, 0.63114458, 0.09953160, 0.59739488, 0.63917851, 0.18828323, 0.65625650, 0.84574437, - 0.91846281, 0.55102497, 0.27506110, 0.06816208, 0.82616585, 0.85912132, 0.88682729, 0.14730524, 0.61618829, - 0.89891797, 0.27753425, 0.57438278, 0.33753166, 0.88768929, 0.35533753, 0.30193496, 0.81678063, 0.26569194, - 0.62769043, 0.61990744, 0.59077013, 0.11058200, 0.97370809, 0.81339806, 0.57207322, 0.80417949, 0.54185718, - 0.80831683, 0.29390740, 0.29051417, 0.51964313, 0.04341308, 0.05925354, 0.82397246, 0.55753845, 0.61247689, - 0.98571628, 0.07566493, 0.37537411, 0.42080343, 0.21715857, 0.57869565, 0.55962265, 0.82500041, 0.60776925, - 0.19367239, 0.88382334, 0.20328504, 0.58192456, 0.94542676, 0.98562658, 0.64355153, 0.69856495, 0.30377558, - 0.02857198, 0.96969068, 0.48450547, 0.98341352, 0.03546083, 0.84963584, 0.94460547, 0.90907097, 0.22525074, - 0.12530145, 0.52223104, 0.09549426, 0.93127102, 0.93429947, 0.01428344, 0.74249738, 0.22606593, + 0.27759778f, 0.18458818f, 0.63114458f, 0.09953160f, 0.59739488f, 0.63917851f, 0.18828323f, 0.65625650f, + 0.84574437f, 0.91846281f, 0.55102497f, 0.27506110f, 0.06816208f, 0.82616585f, 0.85912132f, 0.88682729f, + 0.14730524f, 0.61618829f, 0.89891797f, 0.27753425f, 0.57438278f, 0.33753166f, 0.88768929f, 0.35533753f, + 0.30193496f, 0.81678063f, 0.26569194f, 0.62769043f, 0.61990744f, 0.59077013f, 0.11058200f, 0.97370809f, + 0.81339806f, 0.57207322f, 0.80417949f, 0.54185718f, 0.80831683f, 0.29390740f, 0.29051417f, 0.51964313f, + 0.04341308f, 0.05925354f, 0.82397246f, 0.55753845f, 0.61247689f, 0.98571628f, 0.07566493f, 0.37537411f, + 0.42080343f, 0.21715857f, 0.57869565f, 0.55962265f, 0.82500041f, 0.60776925f, 0.19367239f, 0.88382334f, + 0.20328504f, 0.58192456f, 0.94542676f, 0.98562658f, 0.64355153f, 0.69856495f, 0.30377558f, 0.02857198f, + 0.96969068f, 0.48450547f, 0.98341352f, 0.03546083f, 0.84963584f, 0.94460547f, 0.90907097f, 0.22525074f, + 0.12530145f, 0.52223104f, 0.09549426f, 0.93127102f, 0.93429947f, 0.01428344f, 0.74249738f, 0.22606593f, }; std::vector output = { - 1.47439122, 0.50951630, 1.17974961, 1.58501005, 1.49403512, 0.51560062, 1.18972027, 1.59668207, - 1.48384988, 0.51248586, 1.18596375, 1.59219086, 1.44181466, 0.50219649, 1.15537691, 1.55348074, - 0.83429223, 0.59521818, 0.87688094, 0.13611843, 0.82936716, 0.61004817, 0.87633312, 0.13887596, - 0.83155584, 0.59382534, 0.87496555, 0.14041223, 0.83309680, 0.58982348, 0.87517864, 0.13930768, + 1.47439122f, 0.50951630f, 1.17974961f, 1.58501005f, 1.49403512f, 0.51560062f, 1.18972027f, 1.59668207f, + 1.48384988f, 0.51248586f, 1.18596375f, 1.59219086f, 1.44181466f, 0.50219649f, 1.15537691f, 1.55348074f, + 0.83429223f, 0.59521818f, 0.87688094f, 0.13611843f, 0.82936716f, 0.61004817f, 0.87633312f, 0.13887596f, + 0.83155584f, 0.59382534f, 0.87496555f, 0.14041223f, 0.83309680f, 0.58982348f, 0.87517864f, 0.13930768f, }; std::vector present = { - 0.27759778, 0.18458818, 0.63114458, 0.09953160, 0.59739488, 0.63917851, 0.18828323, 0.65625650, 0.84574437, - 0.91846281, 1.90736914, 1.45914197, 2.30920029, 1.94944119, 2.12886763, 1.64736962, 1.36378694, 1.03263116, - 0.55102497, 0.27506110, 0.06816208, 0.82616585, 0.85912132, 0.88682729, 0.14730524, 0.61618829, 0.89891797, - 0.27753425, 1.68161881, 1.87394094, 1.94785213, 2.08572555, 1.90705216, 1.90777159, 1.23910809, 1.52017307, - 0.57438278, 0.33753166, 0.88768929, 0.35533753, 0.30193496, 0.81678063, 0.26569194, 0.62769043, 0.61990744, - 0.59077013, 2.02901411, 1.58923888, 2.17776394, 1.76309133, 1.74264824, 1.31485105, 1.71575761, 1.29775190, - 0.11058200, 0.97370809, 0.81339806, 0.57207322, 0.80417949, 0.54185718, 0.80831683, 0.29390740, 0.29051417, - 0.51964313, 1.66065478, 2.17192268, 1.86598253, 2.03193212, 1.52620018, 1.82728052, 1.46963060, 1.87916136, - 0.04341308, 0.05925354, 0.82397246, 0.55753845, 0.61247689, 0.98571628, 0.07566493, 0.37537411, 0.42080343, - 0.21715857, 1.56316149, 0.55312467, 1.59553123, 0.53537023, 1.64308119, 0.62742490, 1.31600118, 0.37510848, - 0.57869565, 0.55962265, 0.82500041, 0.60776925, 0.19367239, 0.88382334, 0.20328504, 0.58192456, 0.94542676, - 0.98562658, 1.33183134, 1.70965421, 1.70983100, 1.76660407, 1.46399045, 1.70318413, 0.83565855, 1.37921953, - 0.64355153, 0.69856495, 0.30377558, 0.02857198, 0.96969068, 0.48450547, 0.98341352, 0.03546083, 0.84963584, - 0.94460547, 1.60677671, 0.53308368, 1.60789728, 0.56227136, 1.50563633, 0.50456268, 1.49554634, 0.48299593, - 0.90907097, 0.22525074, 0.12530145, 0.52223104, 0.09549426, 0.93127102, 0.93429947, 0.01428344, 0.74249738, - 0.22606593, 1.59781134, 2.01703453, 1.58993423, 1.78536010, 1.21809304, 1.69219351, 1.24090374, 1.75499403, + 0.27759778f, 0.18458818f, 0.63114458f, 0.09953160f, 0.59739488f, 0.63917851f, 0.18828323f, 0.65625650f, + 0.84574437f, 0.91846281f, 1.90736914f, 1.45914197f, 2.30920029f, 1.94944119f, 2.12886763f, 1.64736962f, + 1.36378694f, 1.03263116f, 0.55102497f, 0.27506110f, 0.06816208f, 0.82616585f, 0.85912132f, 0.88682729f, + 0.14730524f, 0.61618829f, 0.89891797f, 0.27753425f, 1.68161881f, 1.87394094f, 1.94785213f, 2.08572555f, + 1.90705216f, 1.90777159f, 1.23910809f, 1.52017307f, 0.57438278f, 0.33753166f, 0.88768929f, 0.35533753f, + 0.30193496f, 0.81678063f, 0.26569194f, 0.62769043f, 0.61990744f, 0.59077013f, 2.02901411f, 1.58923888f, + 2.17776394f, 1.76309133f, 1.74264824f, 1.31485105f, 1.71575761f, 1.29775190f, 0.11058200f, 0.97370809f, + 0.81339806f, 0.57207322f, 0.80417949f, 0.54185718f, 0.80831683f, 0.29390740f, 0.29051417f, 0.51964313f, + 1.66065478f, 2.17192268f, 1.86598253f, 2.03193212f, 1.52620018f, 1.82728052f, 1.46963060f, 1.87916136f, + 0.04341308f, 0.05925354f, 0.82397246f, 0.55753845f, 0.61247689f, 0.98571628f, 0.07566493f, 0.37537411f, + 0.42080343f, 0.21715857f, 1.56316149f, 0.55312467f, 1.59553123f, 0.53537023f, 1.64308119f, 0.62742490f, + 1.31600118f, 0.37510848f, 0.57869565f, 0.55962265f, 0.82500041f, 0.60776925f, 0.19367239f, 0.88382334f, + 0.20328504f, 0.58192456f, 0.94542676f, 0.98562658f, 1.33183134f, 1.70965421f, 1.70983100f, 1.76660407f, + 1.46399045f, 1.70318413f, 0.83565855f, 1.37921953f, 0.64355153f, 0.69856495f, 0.30377558f, 0.02857198f, + 0.96969068f, 0.48450547f, 0.98341352f, 0.03546083f, 0.84963584f, 0.94460547f, 1.60677671f, 0.53308368f, + 1.60789728f, 0.56227136f, 1.50563633f, 0.50456268f, 1.49554634f, 0.48299593f, 0.90907097f, 0.22525074f, + 0.12530145f, 0.52223104f, 0.09549426f, 0.93127102f, 0.93429947f, 0.01428344f, 0.74249738f, 0.22606593f, + 1.59781134f, 2.01703453f, 1.58993423f, 1.78536010f, 1.21809304f, 1.69219351f, 1.24090374f, 1.75499403f, }; test_case.add_input(Shape{2, 4, 3}, input); @@ -1088,7 +1106,7 @@ NGRAPH_TEST(${BACKEND_NAME}, onnx_model_attention_dynamic_shapes) { test_case.add_input(Shape{2, 2, 2, 5, 2}, past); test_case.add_expected_output(Shape{2, 4, 4}, output); test_case.add_expected_output(Shape{2, 2, 2, 9, 2}, present); - test_case.run_with_tolerance_as_fp(1e-6); + test_case.run_with_tolerance_as_fp(1e-6f); } NGRAPH_TEST(${BACKEND_NAME}, onnx_model_fusedgemm_abc) { @@ -1098,67 +1116,68 @@ NGRAPH_TEST(${BACKEND_NAME}, onnx_model_fusedgemm_abc) { auto test_case = test::TestCase(function, s_device); std::vector inputA = { - 0.760289272, - 0.155913759, - 0.781790674, - -0.916164881, - -0.599392663, - 0.264654594, - 0.793851873, - 0.177088557, - 0.082737454, - 0.070692121, - -0.811413035, - -0.098108588, - 0.650090827, - -0.987659751, - -0.815909968, - -0.375566031, - -0.192777789, - -0.843511765, + 0.760289272f, + 0.155913759f, + 0.781790674f, + -0.916164881f, + -0.599392663f, + 0.264654594f, + 0.793851873f, + 0.177088557f, + 0.082737454f, + 0.070692121f, + -0.811413035f, + -0.098108588f, + 0.650090827f, + -0.987659751f, + -0.815909968f, + -0.375566031f, + -0.192777789f, + -0.843511765f, }; std::vector inputB = { - -0.599338344, -0.893724541, -0.362130441, -0.510642812, -0.943908814, -0.247790266, -0.732624930, 0.660286910, - -0.264866660, -0.907203793, 0.339617010, -0.322529173, 0.714601048, 0.581729832, -0.609115490, -0.369882312, - -0.462432785, -0.554824440, -0.833489997, -0.899945507, -0.088337136, -0.253637339, -0.443307744, -0.677004897, + -0.599338344f, -0.893724541f, -0.362130441f, -0.510642812f, -0.943908814f, -0.247790266f, + -0.732624930f, 0.660286910f, -0.264866660f, -0.907203793f, 0.339617010f, -0.322529173f, + 0.714601048f, 0.581729832f, -0.609115490f, -0.369882312f, -0.462432785f, -0.554824440f, + -0.833489997f, -0.899945507f, -0.088337136f, -0.253637339f, -0.443307744f, -0.677004897f, }; std::vector inputC = { - -0.540039918, - -0.235745675, - -0.337291175, - -0.702340580, - 0.532629731, - -0.794515569, - -0.532012999, - 0.372558416, - 0.582367524, - -0.483044018, - 0.656635884, - -0.655929499, + -0.540039918f, + -0.235745675f, + -0.337291175f, + -0.702340580f, + 0.532629731f, + -0.794515569f, + -0.532012999f, + 0.372558416f, + 0.582367524f, + -0.483044018f, + 0.656635884f, + -0.655929499f, }; std::vector output = { - -8.75421E-05, - -9.65321E-05, - 0.239491309, - -2.70329E-05, - 0.151090653, - -5.53371E-05, - -1.22197E-05, - 0.413963711, - 0.618195780, - 0.011654445, - 0.815541101, - -2.46706E-05, + -8.75421E-05f, + -9.65321E-05f, + 0.239491309f, + -2.70329E-05f, + 0.151090653f, + -5.53371E-05f, + -1.22197E-05f, + 0.413963711f, + 0.618195780f, + 0.011654445f, + 0.815541101f, + -2.46706E-05f, }; test_case.add_input(Shape{3, 6}, inputA); test_case.add_input(Shape{6, 4}, inputB); test_case.add_input(Shape{3, 4}, inputC); test_case.add_expected_output(Shape{3, 4}, output); - test_case.run_with_tolerance_as_fp(1e-6); + test_case.run_with_tolerance_as_fp(1e-6f); } NGRAPH_TEST(${BACKEND_NAME}, onnx_com_microsoft_fused_conv_hard_sigmoid) { diff --git a/src/frontends/onnx/tests/onnx_import_const_folding.in.cpp b/src/frontends/onnx/tests/onnx_import_const_folding.in.cpp index 1c9a1054bf674d..a6c151d0ee526d 100644 --- a/src/frontends/onnx/tests/onnx_import_const_folding.in.cpp +++ b/src/frontends/onnx/tests/onnx_import_const_folding.in.cpp @@ -56,7 +56,7 @@ NGRAPH_TEST(${BACKEND_NAME}, onnx_const_folding_model_scatter_elements) { SERIALIZED_ZOO, "onnx/scatter_elements_opset11.onnx")); - test_constant_folding(fn, {1.0, 1.1, 3.0, 2.1, 5.0}, Shape{1, 5}); + test_constant_folding(fn, {1.0f, 1.1f, 3.0f, 2.1f, 5.0f}, Shape{1, 5}); } NGRAPH_TEST(${BACKEND_NAME}, onnx_const_folding_model_non_zero_scalar) { diff --git a/src/frontends/onnx/tests/onnx_import_controlflow.in.cpp b/src/frontends/onnx/tests/onnx_import_controlflow.in.cpp index 0c1d0648d8abee..3cf38e223d07ed 100644 --- a/src/frontends/onnx/tests/onnx_import_controlflow.in.cpp +++ b/src/frontends/onnx/tests/onnx_import_controlflow.in.cpp @@ -537,7 +537,7 @@ NGRAPH_TEST(${BACKEND_NAME}, onnx_if_branches_with_same_inputs) { auto test_case = test::TestCase(function, s_device); std::vector x(40, 2); std::vector y(40); - std::iota(y.begin(), y.end(), -20); + std::iota(y.begin(), y.end(), -20.f); // condition test_case.add_input({true}); @@ -577,7 +577,7 @@ NGRAPH_TEST(${BACKEND_NAME}, onnx_if_branches_with_different_inputs) { auto test_case = test::TestCase(function, s_device); std::vector x(40, 2); std::vector y(40); - std::iota(y.begin(), y.end(), -20); + std::iota(y.begin(), y.end(), -20.f); // condition test_case.add_input({true}); @@ -649,7 +649,7 @@ NGRAPH_TEST(${BACKEND_NAME}, onnx_if_inside_if) { // expected value == x * y std::vector x(40, 2); std::vector y(40); - std::iota(y.begin(), y.end(), -20); + std::iota(y.begin(), y.end(), -20.f); std::vector expected; std::transform(x.begin(), x.end(), y.begin(), std::back_inserter(expected), [](float i, float j) -> float { return i * j; @@ -663,7 +663,7 @@ NGRAPH_TEST(${BACKEND_NAME}, onnx_if_inside_if) { // case when condition == true and all(x < y) // expected value == x + y std::iota(x.begin(), x.end(), -static_cast(x.size())); - std::iota(y.begin(), y.end(), 1); + std::iota(y.begin(), y.end(), 1.f); std::transform(x.begin(), x.end(), y.begin(), expected.begin(), [](float i, float j) -> float { return i + j; }); @@ -703,13 +703,13 @@ NGRAPH_TEST(${BACKEND_NAME}, onnx_if_branches_with_multiple_outputs) { // case when condition == true so split is along axis 0 std::vector x(36); - std::iota(x.begin(), x.end(), 0); + std::iota(x.begin(), x.end(), 0.f); std::vector expected1(12); - std::iota(expected1.begin(), expected1.end(), 0); + std::iota(expected1.begin(), expected1.end(), 0.f); std::vector expected2(12); - std::iota(expected2.begin(), expected2.end(), 12); + std::iota(expected2.begin(), expected2.end(), 12.f); std::vector expected3(12); - std::iota(expected3.begin(), expected3.end(), 24); + std::iota(expected3.begin(), expected3.end(), 24.f); test_case.add_input({true}); // condition test_case.add_input(x); test_case.add_expected_output(expected1); @@ -768,9 +768,10 @@ NGRAPH_TEST(${BACKEND_NAME}, onnx_if_with_only_indentity_in_else_branch) { auto test_case = test::TestCase(function, s_device); std::vector x(shape_size(Shape{1, 5, 2, 2})); - std::iota(x.begin(), x.end(), 0); - std::vector expected{1.333333, 3, 4.666666, 6.333333, 8, 10, 12, 14, 16, 18, - 20, 22, 24, 26, 28, 30, 25.33333, 27, 28.666667, 30.33333}; + std::iota(x.begin(), x.end(), 0.f); + std::vector expected{1.333333f, 3.f, 4.666666f, 6.333333f, 8.f, 10.f, 12.f, + 14.f, 16.f, 18.f, 20.f, 22.f, 24.f, 26.f, + 28.f, 30.f, 25.33333f, 27.f, 28.666667f, 30.33333f}; test_case.add_input(x); test_case.add_expected_output(expected); test_case.run(); @@ -820,7 +821,7 @@ NGRAPH_TEST(${BACKEND_NAME}, onnx_if_dynamic_inputs) { auto test_case = test::TestCase(function, s_device); std::vector x(40, 2); std::vector y(40); - std::iota(y.begin(), y.end(), -20); + std::iota(y.begin(), y.end(), -20.f); std::vector expected; std::transform(x.begin(), x.end(), y.begin(), std::back_inserter(expected), [](float i, float j) -> float { return i + j; diff --git a/src/frontends/onnx/tests/onnx_import_dyn_shapes.in.cpp b/src/frontends/onnx/tests/onnx_import_dyn_shapes.in.cpp index 2afbdef3999159..dfa888bb943ce9 100644 --- a/src/frontends/onnx/tests/onnx_import_dyn_shapes.in.cpp +++ b/src/frontends/onnx/tests/onnx_import_dyn_shapes.in.cpp @@ -220,7 +220,7 @@ NGRAPH_TEST(${BACKEND_NAME}, onnx_dyn_shapes_model_asinh_3_2) { auto test_case = test::TestCase(function, s_device); test_case.add_input(Shape{3, 2}, {-1.5f, 0.0f, 1.5f, -1.5f, 0.0f, 1.5f}); - test_case.add_expected_output(Shape{3, 2}, {-1.1947632f, 0.0f, 1.1947632f, -1.1947632, 0.0f, 1.1947632f}); + test_case.add_expected_output(Shape{3, 2}, {-1.1947632f, 0.0f, 1.1947632f, -1.1947632f, 0.0f, 1.1947632f}); test_case.run(); } @@ -713,7 +713,7 @@ NGRAPH_TEST(${BACKEND_NAME}, onnx_dyn_shapes_transpose) { const auto elems_in_tensor = shape_size(shape); std::vector input_values(elems_in_tensor); - std::iota(std::begin(input_values), std::end(input_values), 1); + std::iota(std::begin(input_values), std::end(input_values), 1.f); test_case.add_input(shape, input_values); @@ -729,9 +729,12 @@ NGRAPH_TEST(${BACKEND_NAME}, onnx_dyn_shapes_transpose) { namespace { Shape get_flattened_shape(const Shape& in_shape, size_t axis) { - size_t first_dim_size = - std::accumulate(begin(in_shape), next(begin(in_shape), axis), 1UL, std::multiplies()); - size_t last_dim_size = std::accumulate(next(begin(in_shape), axis), end(in_shape), 1UL, std::multiplies()); + size_t first_dim_size = std::accumulate(begin(in_shape), + next(begin(in_shape), axis), + static_cast(1), + std::multiplies()); + size_t last_dim_size = + std::accumulate(next(begin(in_shape), axis), end(in_shape), static_cast(1), std::multiplies()); return Shape{first_dim_size, last_dim_size}; } } // namespace @@ -751,7 +754,7 @@ NGRAPH_TEST(${BACKEND_NAME}, onnx_dyn_shapes_flatten_axis_0) { const auto elems_in_tensor = shape_size(shape); std::vector input_values(elems_in_tensor); - std::iota(input_values.begin(), input_values.end(), 1); + std::iota(input_values.begin(), input_values.end(), 1.f); test_case.add_input(shape, input_values); @@ -778,7 +781,7 @@ NGRAPH_TEST(${BACKEND_NAME}, onnx_dyn_shapes_flatten_axis) { const auto elems_in_tensor = shape_size(shape); std::vector input_values(elems_in_tensor); - std::iota(input_values.begin(), input_values.end(), 1); + std::iota(input_values.begin(), input_values.end(), 1.f); test_case.add_input(shape, input_values); @@ -805,7 +808,7 @@ NGRAPH_TEST(${BACKEND_NAME}, onnx_dyn_shapes_flatten_neg_axis) { const auto elems_in_tensor = shape_size(shape); std::vector input_values(elems_in_tensor); - std::iota(input_values.begin(), input_values.end(), 1); + std::iota(input_values.begin(), input_values.end(), 1.f); test_case.add_input(shape, input_values); @@ -907,7 +910,7 @@ NGRAPH_TEST(${BACKEND_NAME}, onnx_dyn_shapes_slice_10_3d_input) { const Shape input_shape{3, 4, 1}; std::vector input_values(shape_size(input_shape)); - std::iota(input_values.begin(), input_values.end(), 0); + std::iota(input_values.begin(), input_values.end(), 0.f); test_case.add_input(input_values); test_case.add_input({0, 0}); test_case.add_input({2, 3}); @@ -926,7 +929,7 @@ NGRAPH_TEST(${BACKEND_NAME}, onnx_dyn_shapes_slice_10_3d_input_neg_axes) { const Shape input_shape{3, 4, 1}; std::vector input_values(shape_size(input_shape)); - std::iota(input_values.begin(), input_values.end(), 0); + std::iota(input_values.begin(), input_values.end(), 0.f); test_case.add_input(input_values); test_case.add_input({0, 0}); test_case.add_input({2, 3}); @@ -945,7 +948,7 @@ NGRAPH_TEST(${BACKEND_NAME}, onnx_dyn_shapes_slice_10_3d_input_12_axes) { const Shape input_shape{4, 3, 2}; std::vector input_values(shape_size(input_shape)); - std::iota(input_values.begin(), input_values.end(), 0); + std::iota(input_values.begin(), input_values.end(), 0.f); test_case.add_input(input_values); test_case.add_input({0, 0}); test_case.add_input({2, 1}); @@ -963,7 +966,7 @@ NGRAPH_TEST(${BACKEND_NAME}, onnx_dyn_shapes_slice_10_3d_input_20_axes) { const Shape input_shape{4, 3, 2}; std::vector input_values(shape_size(input_shape)); - std::iota(input_values.begin(), input_values.end(), 0); + std::iota(input_values.begin(), input_values.end(), 0.f); test_case.add_input(input_shape, input_values); test_case.add_input({0, 1}); test_case.add_input({1, 3}); @@ -982,7 +985,7 @@ NGRAPH_TEST(${BACKEND_NAME}, onnx_dyn_shapes_slice_10_4d_input_23_axes) { const Shape input_shape{2, 2, 2, 2}; std::vector input_values(shape_size(input_shape)); - std::iota(input_values.begin(), input_values.end(), 0); + std::iota(input_values.begin(), input_values.end(), 0.f); test_case.add_input(input_values); test_case.add_input({0, 0}); test_case.add_input({1, 1}); @@ -1000,7 +1003,7 @@ NGRAPH_TEST(${BACKEND_NAME}, onnx_dyn_shapes_slice_10_4d_input_0231_axes_ends_ma const Shape input_shape{2, 2, 2, 2}; std::vector input_values(shape_size(input_shape)); - std::iota(input_values.begin(), input_values.end(), 0); + std::iota(input_values.begin(), input_values.end(), 0.f); test_case.add_input(input_values); test_case.add_input({0, 1, 1, 0}); test_case.add_input({std::numeric_limits::max(), @@ -1021,7 +1024,7 @@ NGRAPH_TEST(${BACKEND_NAME}, onnx_dyn_shapes_slice_10_4d_input_2103_axes_ends_ma const Shape input_shape{2, 2, 2, 5}; std::vector input_values(shape_size(input_shape)); - std::iota(input_values.begin(), input_values.end(), 0); + std::iota(input_values.begin(), input_values.end(), 0.f); test_case.add_input(input_values); test_case.add_input({1, 0, 0, 1}); test_case.add_input({2, @@ -1043,7 +1046,7 @@ NGRAPH_TEST(${BACKEND_NAME}, onnx_dyn_shapes_slice_10_4d_input_23_axes_21_steps) const Shape input_shape{2, 2, 6, 2}; std::vector input_values(shape_size(input_shape)); - std::iota(input_values.begin(), input_values.end(), 0); + std::iota(input_values.begin(), input_values.end(), 0.f); test_case.add_input(input_values); test_case.add_input({0, 1}); test_case.add_input({5, 2}); @@ -1060,7 +1063,7 @@ NGRAPH_TEST(${BACKEND_NAME}, onnx_dyn_shapes_slice_10_default_axes) { const Shape input_shape{4, 3, 2}; std::vector input_values(shape_size(input_shape)); - std::iota(input_values.begin(), input_values.end(), 0); + std::iota(input_values.begin(), input_values.end(), 0.f); test_case.add_input(input_values); test_case.add_input({1, 1, 1}); test_case.add_input({2, 2, 2}); @@ -1116,34 +1119,34 @@ NGRAPH_TEST(${BACKEND_NAME}, onnx_dyn_model_softmax_axis_2) { file_util::path_join(CommonTestUtils::getExecutableDirectory(), SERIALIZED_ZOO, "onnx/softmax_axis_2.onnx")); const std::vector input = { - 2.75793882, -0.50841322, 0.82013929, -0.62409912, -0.96136118, 0.21004745, 1.38337255, - 1.19030397, 2.0940445, -0.03551657, -0.78686039, 1.992782, 0.04300319, -0.29230777, - -0.56797112, -1.26732165, -0.61935399, 0.57670432, 0.92844898, 2.82469233, + 2.75793882f, -0.50841322f, 0.82013929f, -0.62409912f, -0.96136118f, 0.21004745f, 1.38337255f, + 1.19030397f, 2.0940445f, -0.03551657f, -0.78686039f, 1.992782f, 0.04300319f, -0.29230777f, + -0.56797112f, -1.26732165f, -0.61935399f, 0.57670432f, 0.92844898f, 2.82469233f, - 0.98721677, -0.05100663, -1.21178917, -0.17530157, 1.40051805, -0.13259761, -1.14313018, - 0.2673723, -0.87996154, 1.29053106, 1.55, 0.8396538, 1.20729817, 0.23727845, - -0.89113606, -1.70909842, 0.26460363, -0.70566808, 2.383518, 1.07024615, + 0.98721677f, -0.05100663f, -1.21178917f, -0.17530157f, 1.40051805f, -0.13259761f, -1.14313018f, + 0.2673723f, -0.87996154f, 1.29053106f, 1.55f, 0.8396538f, 1.20729817f, 0.23727845f, + -0.89113606f, -1.70909842f, 0.26460363f, -0.70566808f, 2.383518f, 1.07024615f, - -1.21722605, 0.82919357, 0.55765697, 0.12657686, 0.63432172, 0.75425957, -2.43721014, - -1.24478184, 2.65316853, 1.19509542, -0.95523998, 0.5149006, -0.01151649, 0.68327026, - -0.4589638, -0.46554745, 0.21055324, 0.39266729, 2.05098086, 1.83207919}; + -1.21722605f, 0.82919357f, 0.55765697f, 0.12657686f, 0.63432172f, 0.75425957f, -2.43721014f, + -1.24478184f, 2.65316853f, 1.19509542f, -0.95523998f, 0.5149006f, -0.01151649f, 0.68327026f, + -0.4589638f, -0.46554745f, 0.21055324f, 0.39266729f, 2.05098086f, 1.83207919f}; auto test_case = test::TestCase(function, s_device); test_case.add_input(input); test_case.add_expected_output( Shape{3, 4, 5}, - {0.80619486, 0.03075257, 0.1161086, 0.027393, 0.01955098, 0.07012682, 0.22670066, - 0.18689779, 0.4614171, 0.05485763, 0.04486172, 0.72286838, 0.10286818, 0.07356265, - 0.05583908, 0.01280724, 0.02448298, 0.08096658, 0.11509768, 0.76664552, + {0.80619486f, 0.03075257f, 0.1161086f, 0.027393f, 0.01955098f, 0.07012682f, 0.22670066f, + 0.18689779f, 0.4614171f, 0.05485763f, 0.04486172f, 0.72286838f, 0.10286818f, 0.07356265f, + 0.05583908f, 0.01280724f, 0.02448298f, 0.08096658f, 0.11509768f, 0.76664552f, - 0.30399806, 0.1076406, 0.03371745, 0.0950595, 0.4595844, 0.13369873, 0.04866969, - 0.19944906, 0.06332151, 0.55486101, 0.39101105, 0.19217177, 0.27755913, 0.10521588, - 0.03404216, 0.01150354, 0.08279411, 0.03137732, 0.68902071, 0.18530432, + 0.30399806f, 0.1076406f, 0.03371745f, 0.0950595f, 0.4595844f, 0.13369873f, 0.04866969f, + 0.19944906f, 0.06332151f, 0.55486101f, 0.39101105f, 0.19217177f, 0.27755913f, 0.10521588f, + 0.03404216f, 0.01150354f, 0.08279411f, 0.03137732f, 0.68902071f, 0.18530432f, - 0.0402528, 0.31156222, 0.23747503, 0.1543129, 0.25639705, 0.10627912, 0.00436928, - 0.01439711, 0.70979614, 0.16515835, 0.06798343, 0.2957175, 0.17468555, 0.34994439, - 0.11166912, 0.03615172, 0.07108136, 0.08527994, 0.44775794, 0.35972905}); + 0.0402528f, 0.31156222f, 0.23747503f, 0.1543129f, 0.25639705f, 0.10627912f, 0.00436928f, + 0.01439711f, 0.70979614f, 0.16515835f, 0.06798343f, 0.2957175f, 0.17468555f, 0.34994439f, + 0.11166912f, 0.03615172f, 0.07108136f, 0.08527994f, 0.44775794f, 0.35972905f}); test_case.run(3); } @@ -1184,7 +1187,7 @@ NGRAPH_TEST(${BACKEND_NAME}, onnx_model_instance_normalization_dyn_shape) { Shape data_shape{1, 2, 3, 4}; std::vector data(shape_size(data_shape)); - std::iota(std::begin(data), std::end(data), 1); + std::iota(std::begin(data), std::end(data), 1.f); auto test_case = test::TestCase(function, s_device); @@ -1207,7 +1210,7 @@ NGRAPH_TEST(${BACKEND_NAME}, onnx_model_instance_normalization_dyn_shape2) { Shape data_shape{1, 2, 3, 4}; std::vector data(shape_size(data_shape)); - std::iota(std::begin(data), std::end(data), 1); + std::iota(std::begin(data), std::end(data), 1.f); auto test_case = test::TestCase(function, s_device); @@ -1275,7 +1278,7 @@ NGRAPH_TEST(${BACKEND_NAME}, onnx_dyn_shapes_slice_1_3d_input_21_axes_ends_max) const Shape input_shape{1, 2, 3, 4}; std::vector input_values(shape_size(input_shape)); - std::iota(input_values.begin(), input_values.end(), 0); + std::iota(input_values.begin(), input_values.end(), 0.f); test_case.add_input(input_shape, input_values); test_case.add_expected_output(Shape{1, 1, 3, 3}, {13, 14, 15, 17, 18, 19, 21, 22, 23}); test_case.run(); @@ -1315,7 +1318,7 @@ NGRAPH_TEST(${BACKEND_NAME}, onnx_model_max_pool_dyn_rank_without_default_attrs) Shape input_shape{1, 1, 4, 4}; std::vector input(shape_size(input_shape)); - std::iota(input.begin(), input.end(), 0); + std::iota(input.begin(), input.end(), 0.f); test_case.add_input(input_shape, input); test_case.add_expected_output(Shape{1, 1, 3, 3}, {5, 6, 7, 9, 10, 11, 13, 14, 15}); test_case.run(); @@ -1327,7 +1330,7 @@ NGRAPH_TEST(${BACKEND_NAME}, onnx_model_depth_to_space_dynamic_input) { "onnx/dynamic_shapes/depth_to_space.onnx")); std::vector input(32); - std::iota(input.begin(), input.end(), 0); + std::iota(input.begin(), input.end(), 0.f); std::vector expected_output{0.f, 8.f, 1.f, 9.f, 16.f, 24.f, 17.f, 25.f, 2.f, 10.f, 3.f, 11.f, 18.f, 26.f, 19.f, 27.f, 4.f, 12.f, 5.f, 13.f, 20.f, 28.f, @@ -1345,7 +1348,7 @@ NGRAPH_TEST(${BACKEND_NAME}, onnx_model_space_to_depth_dynamic_input) { "onnx/dynamic_shapes/space_to_depth.onnx")); std::vector input(32); - std::iota(input.begin(), input.end(), 0); + std::iota(input.begin(), input.end(), 0.f); std::vector expected_output{ 0.f, 2.f, 8.f, 10.f, 16.f, 18.f, 24.f, 26.f, 1.f, 3.f, 9.f, 11.f, 17.f, 19.f, 25.f, 27.f, diff --git a/src/frontends/onnx/tests/onnx_import_org_openvino.in.cpp b/src/frontends/onnx/tests/onnx_import_org_openvino.in.cpp index fe85ef213db8ae..909eb4bb7bf2fb 100644 --- a/src/frontends/onnx/tests/onnx_import_org_openvino.in.cpp +++ b/src/frontends/onnx/tests/onnx_import_org_openvino.in.cpp @@ -57,14 +57,14 @@ NGRAPH_TEST(${BACKEND_NAME}, onnx_prior_box) { std::vector A(3 * 2 * 2); std::vector B(3 * 6 * 6); std::vector output = { - -2.3200002, -2.3200002, 3.6533334, 3.6533334, -3.7053659, -3.7053659, 5.0386992, 5.0386992, - -0.98666668, -2.3200002, 4.9866667, 3.6533334, -2.3720326, -3.7053659, 6.3720322, 5.0386992, - -2.3200002, -0.98666668, 3.6533334, 4.9866667, -3.7053659, -2.3720326, 5.0386992, 6.3720322, - -0.98666668, -0.98666668, 4.9866667, 4.9866667, -2.3720326, -2.3720326, 6.3720322, 6.3720322, - 0.1, 0.1, 0.2, 0.2, 0.1, 0.1, 0.2, 0.2, - 0.1, 0.1, 0.2, 0.2, 0.1, 0.1, 0.2, 0.2, - 0.1, 0.1, 0.2, 0.2, 0.1, 0.1, 0.2, 0.2, - 0.1, 0.1, 0.2, 0.2, 0.1, 0.1, 0.2, 0.2, + -2.3200002f, -2.3200002f, 3.6533334f, 3.6533334f, -3.7053659f, -3.7053659f, 5.0386992f, 5.0386992f, + -0.98666668f, -2.3200002f, 4.9866667f, 3.6533334f, -2.3720326f, -3.7053659f, 6.3720322f, 5.0386992f, + -2.3200002f, -0.98666668f, 3.6533334f, 4.9866667f, -3.7053659f, -2.3720326f, 5.0386992f, 6.3720322f, + -0.98666668f, -0.98666668f, 4.9866667f, 4.9866667f, -2.3720326f, -2.3720326f, 6.3720322f, 6.3720322f, + 0.1f, 0.1f, 0.2f, 0.2f, 0.1f, 0.1f, 0.2f, 0.2f, + 0.1f, 0.1f, 0.2f, 0.2f, 0.1f, 0.1f, 0.2f, 0.2f, + 0.1f, 0.1f, 0.2f, 0.2f, 0.1f, 0.1f, 0.2f, 0.2f, + 0.1f, 0.1f, 0.2f, 0.2f, 0.1f, 0.1f, 0.2f, 0.2f, }; test_case.add_input(A); test_case.add_input(B); @@ -78,11 +78,11 @@ NGRAPH_TEST(${BACKEND_NAME}, onnx_priorbox_clustered) { "onnx/priorbox_clustered.onnx")); auto test_case = test::TestCase(function, s_device); - std::vector A{15.0}; - std::vector B{10.0}; + std::vector A{15.0f}; + std::vector B{10.0f}; std::vector output = { - 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, - 0.1, 0.1, 0.2, 0.2, 0.1, 0.1, 0.2, 0.2, 0.1, 0.1, 0.2, 0.2, 0.1, 0.1, 0.2, 0.2, + 1.0f, 1.0f, 1.0f, 1.0f, 1.0f, 1.0f, 1.0f, 1.0f, 1.0f, 1.0f, 1.0f, 1.0f, 1.0f, 1.0f, 1.0f, 1.0f, + 0.1f, 0.1f, 0.2f, 0.2f, 0.1f, 0.1f, 0.2f, 0.2f, 0.1f, 0.1f, 0.2f, 0.2f, 0.1f, 0.1f, 0.2f, 0.2f, }; test_case.add_input(A); test_case.add_input(B); @@ -101,22 +101,22 @@ NGRAPH_TEST(${BACKEND_NAME}, onnx_priorbox_clustered_most_attrs_default) { std::iota(std::begin(A), std::end(A), 0.0f); std::vector B(1 * 1 * 3 * 3); std::iota(std::begin(B), std::end(B), 0.0f); - std::vector output = {-0.1666666716337203979, - -0.1666666716337203979, - 0.1666666716337203979, - 0.1666666716337203979, - -0.1666666716337203979, - 0.3333333432674407959, - 0.1666666716337203979, - 0.6666666865348815918, - 0.1, - 0.1, - 0.2, - 0.2, - 0.1, - 0.1, - 0.2, - 0.2}; + std::vector output = {-0.1666666716337203979f, + -0.1666666716337203979f, + 0.1666666716337203979f, + 0.1666666716337203979f, + -0.1666666716337203979f, + 0.3333333432674407959f, + 0.1666666716337203979f, + 0.6666666865348815918f, + 0.1f, + 0.1f, + 0.2f, + 0.2f, + 0.1f, + 0.1f, + 0.2f, + 0.2f}; test_case.add_input(A); test_case.add_input(B); test_case.add_expected_output(Shape{1, 2, 8}, output); @@ -170,11 +170,11 @@ NGRAPH_TEST(${BACKEND_NAME}, onnx_detection_output) { std::vector logits = gen_vector(12, -2, 2); std::vector class_preds = gen_vector(9, 0, 1); std::vector proposals = gen_vector(12 * 2, 0, 1); - std::vector output = {0, 1, 0.777778, 0.279849, 0.283779, 0.562743, 0.695387, - 0, 1, 0.444444, 0.12963, 0.176075, 0.212963, 0.284573, - 0, 2, 0.888889, 0.279849, 0.283779, 0.562743, 0.695387, - 0, 2, 0.555556, 0.12963, 0.176075, 0.212963, 0.284573, - 0, 2, 0.222222, -0.0608094, -0.0142007, -0.0225239, 0.0304044}; + std::vector output = {0, 1, 0.777778f, 0.279849f, 0.283779f, 0.562743f, 0.695387f, + 0, 1, 0.444444f, 0.12963f, 0.176075f, 0.212963f, 0.284573f, + 0, 2, 0.888889f, 0.279849f, 0.283779f, 0.562743f, 0.695387f, + 0, 2, 0.555556f, 0.12963f, 0.176075f, 0.212963f, 0.284573f, + 0, 2, 0.222222f, -0.0608094f, -0.0142007f, -0.0225239f, 0.0304044f}; test_case.add_input(logits); test_case.add_input(class_preds); test_case.add_input(proposals); @@ -188,18 +188,18 @@ NGRAPH_TEST(${BACKEND_NAME}, onnx_group_norm) { file_util::path_join(CommonTestUtils::getExecutableDirectory(), SERIALIZED_ZOO, "onnx/group_norm.onnx")); auto test_case = test::TestCase(function, s_device); Shape shape{2, 8, 2, 2}; - int size = shape_size(shape); + const auto size = shape_size(shape); std::vector data(size); - std::iota(data.begin(), data.end(), 0); + std::iota(data.begin(), data.end(), 0.f); std::vector output = { - -0.52752507, -0.09108937, 0.3453464, 0.78178215, 2.4364357, 3.309307, 4.1821785, 5.05505, - -1.5825753, -0.27326822, 1.0360391, 2.3453465, 4.8728714, 6.618614, 8.364357, 10.1101, - -2.6376252, -0.45544672, 1.726732, 3.9089108, 7.309307, 9.927921, 12.546536, 15.165151, - -3.6926756, -0.6376257, 2.4174247, 5.472475, 9.745743, 13.237228, 16.728714, 20.2202, - -0.52752507, -0.09108937, 0.3453464, 0.78178215, 2.4364357, 3.309307, 4.1821785, 5.05505, - -1.5825753, -0.27326822, 1.0360391, 2.3453465, 4.8728714, 6.618614, 8.364357, 10.1101, - -2.6376252, -0.45544672, 1.726732, 3.9089108, 7.309307, 9.927921, 12.546536, 15.165151, - -3.6926756, -0.6376257, 2.4174247, 5.472475, 9.745743, 13.237228, 16.728714, 20.2202, + -0.52752507f, -0.09108937f, 0.3453464f, 0.78178215f, 2.4364357f, 3.309307f, 4.1821785f, 5.05505f, + -1.5825753f, -0.27326822f, 1.0360391f, 2.3453465f, 4.8728714f, 6.618614f, 8.364357f, 10.1101f, + -2.6376252f, -0.45544672f, 1.726732f, 3.9089108f, 7.309307f, 9.927921f, 12.546536f, 15.165151f, + -3.6926756f, -0.6376257f, 2.4174247f, 5.472475f, 9.745743f, 13.237228f, 16.728714f, 20.2202f, + -0.52752507f, -0.09108937f, 0.3453464f, 0.78178215f, 2.4364357f, 3.309307f, 4.1821785f, 5.05505f, + -1.5825753f, -0.27326822f, 1.0360391f, 2.3453465f, 4.8728714f, 6.618614f, 8.364357f, 10.1101f, + -2.6376252f, -0.45544672f, 1.726732f, 3.9089108f, 7.309307f, 9.927921f, 12.546536f, 15.165151f, + -3.6926756f, -0.6376257f, 2.4174247f, 5.472475f, 9.745743f, 13.237228f, 16.728714f, 20.2202f, }; test_case.add_input(data); @@ -212,15 +212,16 @@ NGRAPH_TEST(${BACKEND_NAME}, onnx_group_norm_5d) { file_util::path_join(CommonTestUtils::getExecutableDirectory(), SERIALIZED_ZOO, "onnx/group_norm_5d.onnx")); auto test_case = test::TestCase(function, s_device); Shape shape{2, 8, 1, 2, 1}; - int size = shape_size(shape); + const auto size = shape_size(shape); std::vector data(size); - std::iota(data.begin(), data.end(), 0); - std::vector output = { - -0.34163546562, 0.55278813838, 2.89442372322, 4.68327093124, -1.02490639686, 1.65836453437, 5.78884744644, - 9.36654186248, -1.70817732810, 2.76394081115, 8.68327140808, 14.04981231689, -2.39144825935, 3.86951708793, - 11.57769489288, 18.73308372497, -0.34163546562, 0.55278813838, 2.89442372322, 4.68327093124, -1.02490639686, - 1.65836453437, 5.78884744644, 9.36654186248, -1.70817732810, 2.76394081115, 8.68327140808, 14.04981231689, - -2.39144825935, 3.86951708793, 11.57769489288, 18.73308372497}; + std::iota(data.begin(), data.end(), 0.f); + std::vector output = {-0.34163546562f, 0.55278813838f, 2.89442372322f, 4.68327093124f, -1.02490639686f, + 1.65836453437f, 5.78884744644f, 9.36654186248f, -1.70817732810f, 2.76394081115f, + 8.68327140808f, 14.04981231689f, -2.39144825935f, 3.86951708793f, 11.57769489288f, + 18.73308372497f, -0.34163546562f, 0.55278813838f, 2.89442372322f, 4.68327093124f, + -1.02490639686f, 1.65836453437f, 5.78884744644f, 9.36654186248f, -1.70817732810f, + 2.76394081115f, 8.68327140808f, 14.04981231689f, -2.39144825935f, 3.86951708793f, + 11.57769489288f, 18.73308372497f}; test_case.add_input(data); test_case.add_expected_output(shape, output); @@ -232,20 +233,20 @@ NGRAPH_TEST(${BACKEND_NAME}, onnx_normalize) { file_util::path_join(CommonTestUtils::getExecutableDirectory(), SERIALIZED_ZOO, "onnx/normalize.onnx")); auto test_case = test::TestCase(function, s_device); std::vector data(12); - std::iota(data.begin(), data.end(), 1); + std::iota(data.begin(), data.end(), 1.f); std::vector output = { - 0.19334731, - 0.33806169, - 0.44846106, - 0.53452247, - 1.4501048, - 1.5212777, - 1.5696137, - 1.6035674, - 3.4802516, - 3.3806169, - 3.2887144, - 3.2071347, + 0.19334731f, + 0.33806169f, + 0.44846106f, + 0.53452247f, + 1.4501048f, + 1.5212777f, + 1.5696137f, + 1.6035674f, + 3.4802516f, + 3.3806169f, + 3.2887144f, + 3.2071347f, }; test_case.add_input(data); test_case.add_expected_output(Shape{1, 3, 2, 2}, output); @@ -260,7 +261,7 @@ NGRAPH_TEST(${BACKEND_NAME}, onnx_model_swish_with_beta) { auto test_case = test::TestCase(function, s_device); std::vector input_data{-0.5f, 0, 0.5f}; test_case.add_input(input_data); - test_case.add_expected_output(expected_output_shape, {-0.2036667, 0.0, 0.2963333}); + test_case.add_expected_output(expected_output_shape, {-0.2036667f, 0.0f, 0.2963333f}); test_case.run_with_tolerance_as_fp(2.0e-5f); } @@ -274,7 +275,7 @@ NGRAPH_TEST(${BACKEND_NAME}, onnx_model_swish_without_beta) { auto test_case = test::TestCase(function, s_device); std::vector input_data{-0.5f, 0, 0.5f}; test_case.add_input(input_data); - test_case.add_expected_output(expected_output_shape, {-0.18877034, 0.0, 0.31122968}); + test_case.add_expected_output(expected_output_shape, {-0.18877034f, 0.0f, 0.31122968f}); test_case.run_with_tolerance_as_fp(2.0e-5f); } @@ -313,9 +314,9 @@ NGRAPH_TEST(${BACKEND_NAME}, onnx_model_experimental_detectron_detection_output) test_case.add_expected_output(Shape{5, 4}, { 0.8929862f, - 0.892986297607421875, - 12.10701370239257812, - 12.10701370239257812, + 0.892986297607421875f, + 12.10701370239257812f, + 12.10701370239257812f, 0, 0.0f, 0.0f, @@ -440,18 +441,18 @@ NGRAPH_TEST(${BACKEND_NAME}, onnx_model_experimental_detectron_group_norm) { auto test_case = test::TestCase(function, s_device); Shape shape{2, 8, 2, 2}; - int size = shape_size(shape); + const auto size = shape_size(shape); std::vector data(size); - std::iota(data.begin(), data.end(), 0); + std::iota(data.begin(), data.end(), 0.f); std::vector output = { - -0.52752507, -0.09108937, 0.3453464, 0.78178215, 2.4364357, 3.309307, 4.1821785, 5.05505, - -1.5825753, -0.27326822, 1.0360391, 2.3453465, 4.8728714, 6.618614, 8.364357, 10.1101, - -2.6376252, -0.45544672, 1.726732, 3.9089108, 7.309307, 9.927921, 12.546536, 15.165151, - -3.6926756, -0.6376257, 2.4174247, 5.472475, 9.745743, 13.237228, 16.728714, 20.2202, - -0.52752507, -0.09108937, 0.3453464, 0.78178215, 2.4364357, 3.309307, 4.1821785, 5.05505, - -1.5825753, -0.27326822, 1.0360391, 2.3453465, 4.8728714, 6.618614, 8.364357, 10.1101, - -2.6376252, -0.45544672, 1.726732, 3.9089108, 7.309307, 9.927921, 12.546536, 15.165151, - -3.6926756, -0.6376257, 2.4174247, 5.472475, 9.745743, 13.237228, 16.728714, 20.2202, + -0.52752507f, -0.09108937f, 0.3453464f, 0.78178215f, 2.4364357f, 3.309307f, 4.1821785f, 5.05505f, + -1.5825753f, -0.27326822f, 1.0360391f, 2.3453465f, 4.8728714f, 6.618614f, 8.364357f, 10.1101f, + -2.6376252f, -0.45544672f, 1.726732f, 3.9089108f, 7.309307f, 9.927921f, 12.546536f, 15.165151f, + -3.6926756f, -0.6376257f, 2.4174247f, 5.472475f, 9.745743f, 13.237228f, 16.728714f, 20.2202f, + -0.52752507f, -0.09108937f, 0.3453464f, 0.78178215f, 2.4364357f, 3.309307f, 4.1821785f, 5.05505f, + -1.5825753f, -0.27326822f, 1.0360391f, 2.3453465f, 4.8728714f, 6.618614f, 8.364357f, 10.1101f, + -2.6376252f, -0.45544672f, 1.726732f, 3.9089108f, 7.309307f, 9.927921f, 12.546536f, 15.165151f, + -3.6926756f, -0.6376257f, 2.4174247f, 5.472475f, 9.745743f, 13.237228f, 16.728714f, 20.2202f, }; test_case.add_input(data); @@ -468,13 +469,13 @@ NGRAPH_TEST(${BACKEND_NAME}, onnx_model_experimental_detectron_prior_grid_genera auto test_case = test::TestCase(function, s_device); std::vector priors(shape_size(Shape{3, 4})); - std::iota(priors.begin(), priors.end(), 0); + std::iota(priors.begin(), priors.end(), 0.f); std::vector feature_map(shape_size(Shape{1, 1, 1, 3})); - std::iota(feature_map.begin(), feature_map.end(), 0); + std::iota(feature_map.begin(), feature_map.end(), 0.f); std::vector im_data(shape_size(Shape{1, 3, 4, 7})); - std::iota(im_data.begin(), im_data.end(), 0); + std::iota(im_data.begin(), im_data.end(), 0.f); test_case.add_input(priors); test_case.add_input(feature_map); @@ -495,51 +496,51 @@ NGRAPH_TEST(${BACKEND_NAME}, onnx_model_experimental_detectron_roi_feature_extra auto test_case = test::TestCase(function, s_device); std::vector rois(shape_size(Shape{2, 4})); - std::iota(rois.begin(), rois.end(), 0); + std::iota(rois.begin(), rois.end(), 0.f); std::vector pyramid_layer_0(shape_size(Shape{1, 2, 2, 3})); - std::iota(pyramid_layer_0.begin(), pyramid_layer_0.end(), 0); + std::iota(pyramid_layer_0.begin(), pyramid_layer_0.end(), 0.f); test_case.add_input(rois); test_case.add_input(pyramid_layer_0); test_case.add_expected_output(Shape{2, 2, 3, 3}, - {1.416666746139526367, - 1.750000119209289551, - 2.083333492279052734, - 2.416666746139526367, - 2.75, - 3.083333492279052734, - 3.166666507720947266, - 3.5, - 3.833333492279052734, - 7.416666507720947266, - 7.75, - 8.083333015441894531, - 8.416666984558105469, - 8.75, - 9.083333969116210938, - 9.166666030883789062, - 9.5, - 9.833333969116210938, - 4.166666984558105469, - 4.5, - 4.833333492279052734, - 4.166666984558105469, - 4.5, - 4.833333492279052734, - 2.083333492279052734, - 2.25, - 2.416666746139526367, - 10.16666603088378906, - 10.5, - 10.83333206176757812, - 10.16666603088378906, - 10.5, - 10.83333206176757812, - 5.083333015441894531, - 5.25, - 5.416666507720947266}); + {1.416666746139526367f, + 1.750000119209289551f, + 2.083333492279052734f, + 2.416666746139526367f, + 2.75f, + 3.083333492279052734f, + 3.166666507720947266f, + 3.5f, + 3.833333492279052734f, + 7.416666507720947266f, + 7.75f, + 8.083333015441894531f, + 8.416666984558105469f, + 8.75f, + 9.083333969116210938f, + 9.166666030883789062f, + 9.5f, + 9.833333969116210938f, + 4.166666984558105469f, + 4.5f, + 4.833333492279052734f, + 4.166666984558105469f, + 4.5f, + 4.833333492279052734f, + 2.083333492279052734f, + 2.25f, + 2.416666746139526367f, + 10.16666603088378906f, + 10.5f, + 10.83333206176757812f, + 10.16666603088378906f, + 10.5f, + 10.83333206176757812f, + 5.083333015441894531f, + 5.25f, + 5.416666507720947266f}); test_case.add_expected_output(Shape{2, 4}, {0, 1, 2, 3, 4, 5, 6, 7}); test_case.run(); @@ -593,29 +594,32 @@ NGRAPH_TEST(${BACKEND_NAME}, onnx_model_generate_proposals) { // scores test_case.add_input( Shape{1, 3, 2, 6}, - {0.56637216, 0.90457034, 0.69827306, 0.4353543, 0.47985056, 0.42658508, 0.14516132, 0.08081771, 0.1799732, - 0.9229515, 0.42420176, 0.50857586, 0.82664067, 0.4972319, 0.3752427, 0.56731623, 0.18241242, 0.33252355, - 0.30608943, 0.6572437, 0.69185436, 0.88646156, 0.36985755, 0.5590753, 0.5256446, 0.03342898, 0.1344396, - 0.68642473, 0.37953874, 0.32575172, 0.21108444, 0.5661886, 0.45378175, 0.62126315, 0.26799858, 0.37272978}); + {0.56637216f, 0.90457034f, 0.69827306f, 0.4353543f, 0.47985056f, 0.42658508f, 0.14516132f, 0.08081771f, + 0.1799732f, 0.9229515f, 0.42420176f, 0.50857586f, 0.82664067f, 0.4972319f, 0.3752427f, 0.56731623f, + 0.18241242f, 0.33252355f, 0.30608943f, 0.6572437f, 0.69185436f, 0.88646156f, 0.36985755f, 0.5590753f, + 0.5256446f, 0.03342898f, 0.1344396f, 0.68642473f, 0.37953874f, 0.32575172f, 0.21108444f, 0.5661886f, + 0.45378175f, 0.62126315f, 0.26799858f, 0.37272978f}); // deltas test_case.add_input( Shape{1, 12, 2, 6}, - {0.5337073, 0.86607957, 0.55151343, 0.21626699, 0.4462629, 0.03985678, 0.5157072, 0.9932138, 0.7565954, - 0.43803605, 0.802818, 0.14834064, 0.53932905, 0.14314, 0.3817048, 0.95075196, 0.05516243, 0.2567484, - 0.25508744, 0.77438325, 0.43561, 0.2094628, 0.8299043, 0.44982538, 0.95615596, 0.5651084, 0.11801951, - 0.05352486, 0.9774733, 0.14439464, 0.62644225, 0.14370479, 0.54161614, 0.557915, 0.53102225, 0.0840179, - 0.7249888, 0.9843559, 0.5490522, 0.53788143, 0.822474, 0.3278008, 0.39688024, 0.3286012, 0.5117038, - 0.04743988, 0.9408995, 0.29885054, 0.81039643, 0.85277915, 0.06807619, 0.86430097, 0.36225632, 0.16606331, - 0.5401001, 0.7541649, 0.11998601, 0.5131829, 0.40606487, 0.327888, 0.27721855, 0.6378373, 0.22795396, - 0.4961256, 0.3215895, 0.15607187, 0.14782153, 0.8908137, 0.8835288, 0.834191, 0.29907143, 0.7983525, - 0.755875, 0.30837986, 0.0839176, 0.26624718, 0.04371626, 0.09472824, 0.20689541, 0.37622106, 0.1083321, - 0.1342548, 0.05815459, 0.7676379, 0.8105144, 0.92348766, 0.26761323, 0.7183306, 0.8947588, 0.19020908, - 0.42731014, 0.7473663, 0.85775334, 0.9340091, 0.3278848, 0.755993, 0.05307213, 0.39705503, 0.21003333, - 0.5625373, 0.66188884, 0.80521655, 0.6125863, 0.44678232, 0.97802377, 0.0204936, 0.02686367, 0.7390654, - 0.74631, 0.58399844, 0.5988792, 0.37413648, 0.5946692, 0.6955776, 0.36377597, 0.7891322, 0.40900692, - 0.99139464, 0.50169915, 0.41435778, 0.17142445, 0.26761186, 0.31591868, 0.14249913, 0.12919712, 0.5418711, - 0.6523203, 0.50259084, 0.7379765, 0.01171071, 0.94423133, 0.00841132, 0.97486794, 0.2921785, 0.7633071, - 0.88477814, 0.03563205, 0.50833166, 0.01354555, 0.535081, 0.41366324, 0.0694767, 0.9944055, 0.9981207}); + {0.5337073f, 0.86607957f, 0.55151343f, 0.21626699f, 0.4462629f, 0.03985678f, 0.5157072f, 0.9932138f, + 0.7565954f, 0.43803605f, 0.802818f, 0.14834064f, 0.53932905f, 0.14314f, 0.3817048f, 0.95075196f, + 0.05516243f, 0.2567484f, 0.25508744f, 0.77438325f, 0.43561f, 0.2094628f, 0.8299043f, 0.44982538f, + 0.95615596f, 0.5651084f, 0.11801951f, 0.05352486f, 0.9774733f, 0.14439464f, 0.62644225f, 0.14370479f, + 0.54161614f, 0.557915f, 0.53102225f, 0.0840179f, 0.7249888f, 0.9843559f, 0.5490522f, 0.53788143f, + 0.822474f, 0.3278008f, 0.39688024f, 0.3286012f, 0.5117038f, 0.04743988f, 0.9408995f, 0.29885054f, + 0.81039643f, 0.85277915f, 0.06807619f, 0.86430097f, 0.36225632f, 0.16606331f, 0.5401001f, 0.7541649f, + 0.11998601f, 0.5131829f, 0.40606487f, 0.327888f, 0.27721855f, 0.6378373f, 0.22795396f, 0.4961256f, + 0.3215895f, 0.15607187f, 0.14782153f, 0.8908137f, 0.8835288f, 0.834191f, 0.29907143f, 0.7983525f, + 0.755875f, 0.30837986f, 0.0839176f, 0.26624718f, 0.04371626f, 0.09472824f, 0.20689541f, 0.37622106f, + 0.1083321f, 0.1342548f, 0.05815459f, 0.7676379f, 0.8105144f, 0.92348766f, 0.26761323f, 0.7183306f, + 0.8947588f, 0.19020908f, 0.42731014f, 0.7473663f, 0.85775334f, 0.9340091f, 0.3278848f, 0.755993f, + 0.05307213f, 0.39705503f, 0.21003333f, 0.5625373f, 0.66188884f, 0.80521655f, 0.6125863f, 0.44678232f, + 0.97802377f, 0.0204936f, 0.02686367f, 0.7390654f, 0.74631f, 0.58399844f, 0.5988792f, 0.37413648f, + 0.5946692f, 0.6955776f, 0.36377597f, 0.7891322f, 0.40900692f, 0.99139464f, 0.50169915f, 0.41435778f, + 0.17142445f, 0.26761186f, 0.31591868f, 0.14249913f, 0.12919712f, 0.5418711f, 0.6523203f, 0.50259084f, + 0.7379765f, 0.01171071f, 0.94423133f, 0.00841132f, 0.97486794f, 0.2921785f, 0.7633071f, 0.88477814f, + 0.03563205f, 0.50833166f, 0.01354555f, 0.535081f, 0.41366324f, 0.0694767f, 0.9944055f, 0.9981207f}); // im_info test_case.add_input(Shape{1, 3}, {200, 200, 0}); // anchors @@ -623,11 +627,11 @@ NGRAPH_TEST(${BACKEND_NAME}, onnx_model_generate_proposals) { test_case.add_expected_output( Shape{6, 4}, - {0.12904608, 1.3703424, 3.6230984, 3.4675088, 0.9725206, 0., 4.4917974, 4.9623675, - 4.882682, 5.1236916, 7.1700497, 10.213073, 4.4913187, 4.305372, 8.750267, 8.803502, - 0.9777608, 1.0317986, 3.228293, 4.495021, 4.125554, 5.4091997, 6.35439, 10.124915}); + {0.12904608f, 1.3703424f, 3.6230984f, 3.4675088f, 0.9725206f, 0., 4.4917974f, 4.9623675f, + 4.882682f, 5.1236916f, 7.1700497f, 10.213073f, 4.4913187f, 4.305372f, 8.750267f, 8.803502f, + 0.9777608f, 1.0317986f, 3.228293f, 4.495021f, 4.125554f, 5.4091997f, 6.35439f, 10.124915f}); test_case.add_expected_output(Shape{6}, - {0.9229515, 0.90457034, 0.88646156, 0.82664067, 0.69827306, 0.69185436}); + {0.9229515f, 0.90457034f, 0.88646156f, 0.82664067f, 0.69827306f, 0.69185436f}); test_case.add_expected_output(Shape{1}, {6}); test_case.run(); } diff --git a/src/frontends/onnx/tests/onnx_import_org_pytorch.in.cpp b/src/frontends/onnx/tests/onnx_import_org_pytorch.in.cpp index 110fbe656614de..d57b9c6ad64b32 100644 --- a/src/frontends/onnx/tests/onnx_import_org_pytorch.in.cpp +++ b/src/frontends/onnx/tests/onnx_import_org_pytorch.in.cpp @@ -34,23 +34,23 @@ NGRAPH_TEST(${BACKEND_NAME}, onnx_model_adaptive_avg_pooling2d_nchw) { "onnx/org.pytorch/adaptive_avg_pooling2d_nchw.onnx")); auto test_case = test::TestCase(function, s_device); - test_case.add_input({0.9945, - 0.3466, - 0.2894, - 0.9318, - 0.0115, - 0.4867, - 0.7608, - 0.1550, - 0.8485, - 0.4971, - 0.8833, - 0.4579, - 0.3673, - 0.5410, - 0.2004, - 0.1519}); - test_case.add_expected_output(Shape{1, 1, 2, 2}, {0.4598249, 0.5342500, 0.5634750, 0.4233750}); + test_case.add_input({0.9945f, + 0.3466f, + 0.2894f, + 0.9318f, + 0.0115f, + 0.4867f, + 0.7608f, + 0.1550f, + 0.8485f, + 0.4971f, + 0.8833f, + 0.4579f, + 0.3673f, + 0.5410f, + 0.2004f, + 0.1519f}); + test_case.add_expected_output(Shape{1, 1, 2, 2}, {0.4598249f, 0.5342500f, 0.5634750f, 0.4233750f}); test_case.run(); } @@ -61,8 +61,8 @@ NGRAPH_TEST(${BACKEND_NAME}, onnx_model_adaptive_avg_pooling2d_chw) { "onnx/org.pytorch/adaptive_avg_pooling2d_chw.onnx")); auto test_case = test::TestCase(function, s_device); - test_case.add_input({12.0, -1.0, -56.0, 20.0, 1.0, -8.0, 7.0, 9.0}); + test_case.add_input({12.0f, -1.0f, -56.0f, 20.0f, 1.0f, -8.0f, 7.0f, 9.0f}); - test_case.add_expected_output(Shape{1, 2, 2}, {5.5, -18.0, -3.5, 8.0}); + test_case.add_expected_output(Shape{1, 2, 2}, {5.5f, -18.0f, -3.5f, 8.0f}); test_case.run(); } diff --git a/src/frontends/onnx/tests/onnx_import_quant.in.cpp b/src/frontends/onnx/tests/onnx_import_quant.in.cpp index 784bb8a0ae2ad3..d49b96c22b7f91 100644 --- a/src/frontends/onnx/tests/onnx_import_quant.in.cpp +++ b/src/frontends/onnx/tests/onnx_import_quant.in.cpp @@ -1061,7 +1061,7 @@ NGRAPH_TEST(${BACKEND_NAME}, onnx_model_fake_quantize_const_inputs_infer) { const Shape data_shape{1, 2, 3, 4}; const auto n_elements = shape_size(data_shape); std::vector input_data(n_elements); - std::iota(std::begin(input_data), std::end(input_data), 0); + std::iota(std::begin(input_data), std::end(input_data), 0.f); auto test_case = test::TestCase(function, s_device); test_case.add_input(input_data); @@ -1081,7 +1081,7 @@ NGRAPH_TEST(${BACKEND_NAME}, onnx_model_fake_quantize_nonconst_inputs_infer) { const Shape data_shape{1, 2, 3, 4}; const size_t n_elements = shape_size(data_shape); std::vector input_data(n_elements); - std::iota(std::begin(input_data), std::end(input_data), 0); + std::iota(std::begin(input_data), std::end(input_data), 0.f); auto test_case = test::TestCase(function, s_device); test_case.add_input(input_data); diff --git a/src/frontends/onnx/tests/onnx_import_reshape.in.cpp b/src/frontends/onnx/tests/onnx_import_reshape.in.cpp index 771000f8091db8..645ccd8cc4cea7 100644 --- a/src/frontends/onnx/tests/onnx_import_reshape.in.cpp +++ b/src/frontends/onnx/tests/onnx_import_reshape.in.cpp @@ -129,29 +129,29 @@ NGRAPH_TEST(${BACKEND_NAME}, onnx_model_reshape_negative_dim) { "onnx/reshape_negative_dim.onnx")); // 2x3x4 - auto input = test::NDArray({{{0.5488135, 0.71518934, 0.60276335, 0.5448832}, - {0.4236548, 0.6458941, 0.4375872, 0.891773}, - {0.96366274, 0.3834415, 0.79172504, 0.5288949}}, + auto input = test::NDArray({{{0.5488135f, 0.71518934f, 0.60276335f, 0.5448832f}, + {0.4236548f, 0.6458941f, 0.4375872f, 0.891773f}, + {0.96366274f, 0.3834415f, 0.79172504f, 0.5288949f}}, - {{0.56804454, 0.92559665, 0.07103606, 0.0871293}, - {0.0202184, 0.83261985, 0.77815676, 0.87001216}, - {0.9786183, 0.7991586, 0.46147937, 0.7805292}}}) + {{0.56804454f, 0.92559665f, 0.07103606f, 0.0871293f}, + {0.0202184f, 0.83261985f, 0.77815676f, 0.87001216f}, + {0.9786183f, 0.7991586f, 0.46147937f, 0.7805292f}}}) .get_vector(); // 2x6x2 - auto expected_output = test::NDArray({{{0.5488135, 0.71518934}, - {0.60276335, 0.5448832}, - {0.4236548, 0.6458941}, - {0.4375872, 0.891773}, - {0.96366274, 0.3834415}, - {0.79172504, 0.5288949}}, - - {{0.56804454, 0.92559665}, - {0.07103606, 0.0871293}, - {0.0202184, 0.83261985}, - {0.77815676, 0.87001216}, - {0.9786183, 0.7991586}, - {0.46147937, 0.7805292}}}) + auto expected_output = test::NDArray({{{0.5488135f, 0.71518934f}, + {0.60276335f, 0.5448832f}, + {0.4236548f, 0.6458941f}, + {0.4375872f, 0.891773f}, + {0.96366274f, 0.3834415f}, + {0.79172504f, 0.5288949f}}, + + {{0.56804454f, 0.92559665f}, + {0.07103606f, 0.0871293f}, + {0.0202184f, 0.83261985f}, + {0.77815676f, 0.87001216f}, + {0.9786183f, 0.7991586f}, + {0.46147937f, 0.7805292f}}}) .get_vector(); auto test_case = test::TestCase(function, s_device); @@ -207,7 +207,7 @@ NGRAPH_TEST(${BACKEND_NAME}, onnx_model_depth_to_space) { file_util::path_join(CommonTestUtils::getExecutableDirectory(), SERIALIZED_ZOO, "onnx/depth_to_space.onnx")); std::vector input(32); - std::iota(input.begin(), input.end(), 0); + std::iota(input.begin(), input.end(), 0.f); std::vector expected_output{0.f, 8.f, 1.f, 9.f, 16.f, 24.f, 17.f, 25.f, 2.f, 10.f, 3.f, 11.f, 18.f, 26.f, 19.f, 27.f, 4.f, 12.f, 5.f, 13.f, 20.f, 28.f, @@ -224,7 +224,7 @@ NGRAPH_TEST(${BACKEND_NAME}, onnx_model_depth_to_space_v1) { file_util::path_join(CommonTestUtils::getExecutableDirectory(), SERIALIZED_ZOO, "onnx/depth_to_space_v1.onnx")); std::vector input(32); - std::iota(input.begin(), input.end(), 0); + std::iota(input.begin(), input.end(), 0.f); std::vector expected_output{0.f, 8.f, 1.f, 9.f, 16.f, 24.f, 17.f, 25.f, 2.f, 10.f, 3.f, 11.f, 18.f, 26.f, 19.f, 27.f, 4.f, 12.f, 5.f, 13.f, 20.f, 28.f, @@ -242,7 +242,7 @@ NGRAPH_TEST(${BACKEND_NAME}, onnx_model_depth_to_space_crd) { "onnx/depth_to_space_crd.onnx")); std::vector input(32); - std::iota(input.begin(), input.end(), 0); + std::iota(input.begin(), input.end(), 0.f); std::vector expected_output{0.f, 4.f, 1.f, 5.f, 8.f, 12.f, 9.f, 13.f, 2.f, 6.f, 3.f, 7.f, 10.f, 14.f, 11.f, 15.f, 16.f, 20.f, 17.f, 21.f, 24.f, 28.f, @@ -304,7 +304,7 @@ NGRAPH_TEST(${BACKEND_NAME}, onnx_model_space_to_depth) { file_util::path_join(CommonTestUtils::getExecutableDirectory(), SERIALIZED_ZOO, "onnx/space_to_depth.onnx")); std::vector input(32); - std::iota(input.begin(), input.end(), 0); + std::iota(input.begin(), input.end(), 0.f); std::vector expected_output{ 0.f, 2.f, 8.f, 10.f, 16.f, 18.f, 24.f, 26.f, 1.f, 3.f, 9.f, 11.f, 17.f, 19.f, 25.f, 27.f, diff --git a/src/frontends/onnx/tests/onnx_import_rnn.in.cpp b/src/frontends/onnx/tests/onnx_import_rnn.in.cpp index fbe25a2b0b0227..06b28aa76a88e6 100644 --- a/src/frontends/onnx/tests/onnx_import_rnn.in.cpp +++ b/src/frontends/onnx/tests/onnx_import_rnn.in.cpp @@ -39,12 +39,12 @@ NGRAPH_TEST(${BACKEND_NAME}, onnx_model_lstm_fwd_default_const) { "onnx/lstm_fwd_default_const.onnx")); auto test_case = test::TestCase(function, s_device); - test_case.add_input({0.68172926, 1.1405563, -0.03931177, -0.03759607}); // X + test_case.add_input({0.68172926f, 1.1405563f, -0.03931177f, -0.03759607f}); // X test_case.add_expected_output(Shape{2, 1, 1, 2}, - {-0.063373, -0.20347191, -0.07230289, -0.13298286}); // Y_data - test_case.add_expected_output(Shape{1, 1, 2}, {-0.07230289, -0.13298286}); // Y_h_data - test_case.add_expected_output(Shape{1, 1, 2}, {-0.1557954, -0.24502525}); // Y_c_data + {-0.063373f, -0.20347191f, -0.07230289f, -0.13298286f}); // Y_data + test_case.add_expected_output(Shape{1, 1, 2}, {-0.07230289f, -0.13298286f}); // Y_h_data + test_case.add_expected_output(Shape{1, 1, 2}, {-0.1557954f, -0.24502525f}); // Y_c_data test_case.run(DEFAULT_FLOAT_TOLERANCE_BITS + 1); } @@ -55,12 +55,12 @@ NGRAPH_TEST(${BACKEND_NAME}, onnx_model_lstm_reverse_const) { "onnx/lstm_reverse_const.onnx")); auto test_case = test::TestCase(function, s_device); - test_case.add_input({0.68172926, 1.1405563, -0.03931177, -0.03759607}); // X + test_case.add_input({0.68172926f, 1.1405563f, -0.03931177f, -0.03759607f}); // X test_case.add_expected_output(Shape{2, 1, 1, 2}, - {-0.06082131, -0.19985214, 0.00860566, 0.00920492}); // Y_data - test_case.add_expected_output(Shape{1, 1, 2}, {-0.06082131, -0.19985214}); // Y_h_data - test_case.add_expected_output(Shape{1, 1, 2}, {-0.25917438, -0.3832652}); // Y_c_data + {-0.06082131f, -0.19985214f, 0.00860566f, 0.00920492f}); // Y_data + test_case.add_expected_output(Shape{1, 1, 2}, {-0.06082131f, -0.19985214f}); // Y_h_data + test_case.add_expected_output(Shape{1, 1, 2}, {-0.25917438f, -0.3832652f}); // Y_c_data test_case.run(DEFAULT_FLOAT_TOLERANCE_BITS + 1); } @@ -70,21 +70,21 @@ NGRAPH_TEST(${BACKEND_NAME}, onnx_model_lstm_bidir_const) { file_util::path_join(CommonTestUtils::getExecutableDirectory(), SERIALIZED_ZOO, "onnx/lstm_bidir_const.onnx")); auto test_case = test::TestCase(function, s_device); - test_case.add_input({0.68172926, 1.1405563, -0.03931177, -0.03759607}); // X + test_case.add_input({0.68172926f, 1.1405563f, -0.03931177f, -0.03759607f}); // X test_case.add_expected_output(Shape{2, 2, 1, 2}, - {-0.063373, - -0.20347191, - -0.06082131, - -0.19985214, - -0.07230289, - -0.13298286, - 0.00860566, - 0.00920492}); // Y_data + {-0.063373f, + -0.20347191f, + -0.06082131f, + -0.19985214f, + -0.07230289f, + -0.13298286f, + 0.00860566f, + 0.00920492f}); // Y_data test_case.add_expected_output(Shape{2, 1, 2}, - {-0.07230289, -0.13298286, -0.06082131, -0.19985214}); // Y_h_data + {-0.07230289f, -0.13298286f, -0.06082131f, -0.19985214f}); // Y_h_data test_case.add_expected_output(Shape{2, 1, 2}, - {-0.1557954, -0.24502525, -0.25917438, -0.3832652}); // Y_c_data + {-0.1557954f, -0.24502525f, -0.25917438f, -0.3832652f}); // Y_c_data test_case.run(DEFAULT_FLOAT_TOLERANCE_BITS + 1); } @@ -95,12 +95,12 @@ NGRAPH_TEST(${BACKEND_NAME}, onnx_model_lstm_fwd_with_clip_const) { "onnx/lstm_fwd_clip_const.onnx")); auto test_case = test::TestCase(function, s_device); - test_case.add_input({0.68172926, 1.1405563, -0.03931177, -0.03759607}); // X + test_case.add_input({0.68172926f, 1.1405563f, -0.03931177f, -0.03759607f}); // X test_case.add_expected_output(Shape{2, 1, 1, 2}, - {-0.02391884, -0.02744377, -0.01024176, -0.01188637}); // Y_data - test_case.add_expected_output(Shape{1, 1, 2}, {-0.01024176, -0.01188637}); // Y_h_data - test_case.add_expected_output(Shape{1, 1, 2}, {-0.02039271, -0.02353566}); // Y_c_data + {-0.02391884f, -0.02744377f, -0.01024176f, -0.01188637f}); // Y_data + test_case.add_expected_output(Shape{1, 1, 2}, {-0.01024176f, -0.01188637f}); // Y_h_data + test_case.add_expected_output(Shape{1, 1, 2}, {-0.02039271f, -0.02353566f}); // Y_c_data test_case.run(DEFAULT_FLOAT_TOLERANCE_BITS + 1); } @@ -111,27 +111,27 @@ NGRAPH_TEST(${BACKEND_NAME}, onnx_model_lstm_fwd_mixed_seq_const) { "onnx/lstm_fwd_mixed_seq_const.onnx")); auto test_case = test::TestCase(function, s_device); - test_case.add_input({0.68172926, 1.1405563, -0.03931177, -0.03759607}); // X + test_case.add_input({0.68172926f, 1.1405563f, -0.03931177f, -0.03759607f}); // X test_case.add_expected_output(Shape{2, 1, 2, 3}, - {0.13528088, - -0.1779867, - -0.07448981, - 0.14769037, - -0.16327181, - -0.10419653, + {0.13528088f, + -0.1779867f, + -0.07448981f, + 0.14769037f, + -0.16327181f, + -0.10419653f, 0., 0., 0., - 0.08759661, - -0.04002844, - -0.08617793}); // Y_data + 0.08759661f, + -0.04002844f, + -0.08617793f}); // Y_data test_case.add_expected_output( Shape{1, 2, 3}, - {0.13528088, -0.1779867, -0.07448981, 0.08759661, -0.04002844, -0.08617793}); // Y_h_data + {0.13528088f, -0.1779867f, -0.07448981f, 0.08759661f, -0.04002844f, -0.08617793f}); // Y_h_data test_case.add_expected_output( Shape{1, 2, 3}, - {0.367563, -0.43762812, -0.20435227, 0.17330585, -0.0732716, -0.18809439}); // Y_c_data + {0.367563f, -0.43762812f, -0.20435227f, 0.17330585f, -0.0732716f, -0.18809439f}); // Y_c_data test_case.run(DEFAULT_FLOAT_TOLERANCE_BITS + 1); } @@ -142,27 +142,27 @@ NGRAPH_TEST(${BACKEND_NAME}, onnx_model_lstm_reverse_mixed_seq_const) { "onnx/lstm_reverse_mixed_seq_const.onnx")); auto test_case = test::TestCase(function, s_device); - test_case.add_input({0.68172926, 1.1405563, -0.03931177, -0.03759607}); // X + test_case.add_input({0.68172926f, 1.1405563f, -0.03931177f, -0.03759607f}); // X test_case.add_expected_output(Shape{2, 1, 2, 3}, - {0.13528088, - -0.1779867, - -0.07448981, - 0.14696799, - -0.15571019, - -0.10270946, + {0.13528088f, + -0.1779867f, + -0.07448981f, + 0.14696799f, + -0.15571019f, + -0.10270946f, 0., 0., 0., - -0.01110403, - 0.0228607, - 0.00397353}); // Y_data + -0.01110403f, + 0.0228607f, + 0.00397353f}); // Y_data test_case.add_expected_output( Shape{1, 2, 3}, - {0.13528088, -0.1779867, -0.07448981, 0.14696799, -0.15571019, -0.10270946}); // Y_h_data + {0.13528088f, -0.1779867f, -0.07448981f, 0.14696799f, -0.15571019f, -0.10270946f}); // Y_h_data test_case.add_expected_output( Shape{1, 2, 3}, - {0.367563, -0.43762812, -0.20435227, 0.50598085, -0.42627674, -0.3641275}); // Y_c_data + {0.367563f, -0.43762812f, -0.20435227f, 0.50598085f, -0.42627674f, -0.3641275f}); // Y_c_data test_case.run(DEFAULT_FLOAT_TOLERANCE_BITS + 1); } @@ -174,43 +174,43 @@ NGRAPH_TEST(${BACKEND_NAME}, onnx_model_lstm_bidir_mixed_seq_const) { auto test_case = test::TestCase(function, s_device); test_case.add_input( - {0.68172926, 1.1405563, -0.03931177, -0.03759607, 1.1397027, 0.60444903, 1.3246384, -0.28191715}); // X + {0.68172926f, 1.1405563f, -0.03931177f, -0.03759607f, 1.1397027f, 0.60444903f, 1.3246384f, -0.28191715f}); // X test_case.add_expected_output(Shape{2, 2, 2, 2}, - {-0.063373, - -0.20347191, - 0.00860566, - 0.00920492, - -0.063373, - -0.20347191, - -0.12004475, - -0.12800421, + {-0.063373f, + -0.20347191f, + 0.00860566f, + 0.00920492f, + -0.063373f, + -0.20347191f, + -0.12004475f, + -0.12800421f, 0., 0., - -0.19095606, - -0.12459831, + -0.19095606f, + -0.12459831f, 0., 0., - -0.1911628, - -0.12813942}); // Y_data + -0.1911628f, + -0.12813942f}); // Y_data test_case.add_expected_output(Shape{2, 2, 2}, - {-0.063373, - -0.20347191, - -0.19095606, - -0.12459831, - -0.063373, - -0.20347191, - -0.12004475, - -0.12800421}); // Y_h_data + {-0.063373f, + -0.20347191f, + -0.19095606f, + -0.12459831f, + -0.063373f, + -0.20347191f, + -0.12004475f, + -0.12800421f}); // Y_h_data test_case.add_expected_output(Shape{2, 2, 2}, - {-0.2732999, - -0.38956356, - -0.48170844, - -0.34701264, - -0.2732999, - -0.38956356, - -0.27130172, - -0.253659}); // Y_c_data + {-0.2732999f, + -0.38956356f, + -0.48170844f, + -0.34701264f, + -0.2732999f, + -0.38956356f, + -0.27130172f, + -0.253659f}); // Y_c_data test_case.run(DEFAULT_FLOAT_TOLERANCE_BITS + 1); } @@ -221,8 +221,8 @@ NGRAPH_TEST(${BACKEND_NAME}, onnx_model_lstm_fwd_with_clip_peepholes) { "onnx/lstm_fwd_with_clip_peepholes.onnx")); auto test_case = test::TestCase(function, s_device); - test_case.add_input({-0.455351, -0.276391, -0.185934, -0.269585}); // X - test_case.add_input({-0.494659f, // W + test_case.add_input({-0.455351f, -0.276391f, -0.185934f, -0.269585f}); // X + test_case.add_input({-0.494659f, // W 0.0453352f, -0.487793f, 0.417264f, @@ -560,10 +560,10 @@ NGRAPH_TEST(${BACKEND_NAME}, onnx_model_lstm_dynamic_batch_size_and_seq_len) { test_case.add_input({1, 2, 3, 4, 5, 6}); test_case.add_expected_output(Shape{1, 1, 3, 2}, - {0.761594, 0.761594, 0.761594, 0.761594, 0.761594, 0.761594}); // Y + {0.761594f, 0.761594f, 0.761594f, 0.761594f, 0.761594f, 0.761594f}); // Y test_case.add_expected_output(Shape{1, 3, 2}, - {0.761594, 0.761594, 0.761594, 0.761594, 0.761594, 0.761594}); // Y_c - test_case.add_expected_output(Shape{1, 3, 2}, {1, 1, 1, 1, 1, 1}); // Y_h + {0.761594f, 0.761594f, 0.761594f, 0.761594f, 0.761594f, 0.761594f}); // Y_c + test_case.add_expected_output(Shape{1, 3, 2}, {1, 1, 1, 1, 1, 1}); // Y_h test_case.run(DEFAULT_FLOAT_TOLERANCE_BITS + 1); } @@ -769,32 +769,32 @@ NGRAPH_TEST_F(${BACKEND_NAME}, GRUSequenceOp, onnx_model_gru_fwd_activations_con // Y test_case.add_expected_output( Shape{4, 1, 3, 5}, - std::vector{0.30736187, 0.10271017, 0.91698503, 0.3471303, -0.0123809, 0.51264125, 0.51235366, - 0.45471948, 0.50601995, 0.49260828, 0.4781971, 0.0668709, 0.89421916, 0.33762455, - -0.19021586, 0.6881336, 0.7331965, 0.8887774, 0.34048334, 0.38408905, 0.49962956, - 0.2948451, 0.3651103, 0.33406913, 0.57418096, 0.49882296, 0.4321446, 0.97142136, - 0.20714557, 0.66270787, 0.53192705, 0.46424377, 0.9647801, 0.19583187, 0.7362316, - 0.48205143, -0.04748845, 0.27395952, 0.35897565, 0.5801568, 0.5889811, 0.36110958, - 1.3433081, 0.29702073, 0.5709667, 0.936689, 0.84129435, 1.1782551, 0.23925206, - 0.57521456, 0.43502977, -0.5664091, 0.6758457, 0.2958132, 0.70932186, 0.4411352, - -0.1717428, 1.7761463, 0.14413449, 0.73801273}); + std::vector{0.30736187f, 0.10271017f, 0.91698503f, 0.3471303f, -0.0123809f, 0.51264125f, 0.51235366f, + 0.45471948f, 0.50601995f, 0.49260828f, 0.4781971f, 0.0668709f, 0.89421916f, 0.33762455f, + -0.19021586f, 0.6881336f, 0.7331965f, 0.8887774f, 0.34048334f, 0.38408905f, 0.49962956f, + 0.2948451f, 0.3651103f, 0.33406913f, 0.57418096f, 0.49882296f, 0.4321446f, 0.97142136f, + 0.20714557f, 0.66270787f, 0.53192705f, 0.46424377f, 0.9647801f, 0.19583187f, 0.7362316f, + 0.48205143f, -0.04748845f, 0.27395952f, 0.35897565f, 0.5801568f, 0.5889811f, 0.36110958f, + 1.3433081f, 0.29702073f, 0.5709667f, 0.936689f, 0.84129435f, 1.1782551f, 0.23925206f, + 0.57521456f, 0.43502977f, -0.5664091f, 0.6758457f, 0.2958132f, 0.70932186f, 0.4411352f, + -0.1717428f, 1.7761463f, 0.14413449f, 0.73801273f}); // Y_h test_case.add_expected_output(Shape{1, 3, 5}, - std::vector{0.936689, - 0.84129435, - 1.1782551, - 0.23925206, - 0.57521456, - 0.43502977, - -0.5664091, - 0.6758457, - 0.2958132, - 0.70932186, - 0.4411352, - -0.1717428, - 1.7761463, - 0.14413449, - 0.73801273}); + std::vector{0.936689f, + 0.84129435f, + 1.1782551f, + 0.23925206f, + 0.57521456f, + 0.43502977f, + -0.5664091f, + 0.6758457f, + 0.2958132f, + 0.70932186f, + 0.4411352f, + -0.1717428f, + 1.7761463f, + 0.14413449f, + 0.73801273f}); test_case.run(DEFAULT_FLOAT_TOLERANCE_BITS + 5); } @@ -908,32 +908,33 @@ NGRAPH_TEST_F(${BACKEND_NAME}, GRUSequenceOp, onnx_model_gru_fwd_mixed_seq_len_c // Y test_case.add_expected_output( Shape{4, 1, 3, 5}, - std::vector{-0.9559332, 0.4372494, 0.9967716, -0.9079381, -1.2538278, 1.9265908, -0.8437393, - -1.2057271, -0.25887525, -0.52679026, -0.3619178, 0.67928517, 0.9486744, -0.12006134, - -1.3862017, -0.98941356, 0.80389524, 0.97586197, -0.9343586, -0.74858856, 1.797039, - -0.7873732, -0.72469383, -0.5866635, -0.42103744, -0.8406298, 0.85877097, 0.6349921, - -0.55897295, -0.6168443, 0., 0., 0., 0., 0., - 1.577129, -0.6935871, -0.304804, -0.75392795, -0.20703818, -0.93796504, 0.9220495, - 0.36017662, -0.7007159, 0.06962098, 0., 0., 0., 0., - 0., 0., 0., 0., 0., 0., -0.96323603, - 0.9265786, 0.54976916, -0.8037839, 0.73501444}); + std::vector{-0.9559332f, 0.4372494f, 0.9967716f, -0.9079381f, -1.2538278f, 1.9265908f, + -0.8437393f, -1.2057271f, -0.25887525f, -0.52679026f, -0.3619178f, 0.67928517f, + 0.9486744f, -0.12006134f, -1.3862017f, -0.98941356f, 0.80389524f, 0.97586197f, + -0.9343586f, -0.74858856f, 1.797039f, -0.7873732f, -0.72469383f, -0.5866635f, + -0.42103744f, -0.8406298f, 0.85877097f, 0.6349921f, -0.55897295f, -0.6168443f, + 0., 0., 0., 0., 0., 1.577129f, + -0.6935871f, -0.304804f, -0.75392795f, -0.20703818f, -0.93796504f, 0.9220495f, + 0.36017662f, -0.7007159f, 0.06962098f, 0., 0., 0., + 0., 0., 0., 0., 0., 0., + 0., -0.96323603f, 0.9265786f, 0.54976916f, -0.8037839f, 0.73501444f}); // Y_h test_case.add_expected_output(Shape{1, 3, 5}, - std::vector{-0.98941356, - 0.80389524, - 0.97586197, - -0.9343586, - -0.74858856, - 1.577129, - -0.6935871, - -0.304804, - -0.75392795, - -0.20703818, - -0.96323603, - 0.9265786, - 0.54976916, - -0.8037839, - 0.73501444}); + std::vector{-0.98941356f, + 0.80389524f, + 0.97586197f, + -0.9343586f, + -0.74858856f, + 1.577129f, + -0.6935871f, + -0.304804f, + -0.75392795f, + -0.20703818f, + -0.96323603f, + 0.9265786f, + 0.54976916f, + -0.8037839f, + 0.73501444f}); test_case.run(DEFAULT_FLOAT_TOLERANCE_BITS + 3); } @@ -949,32 +950,33 @@ NGRAPH_TEST_F(${BACKEND_NAME}, GRUSequenceOp, onnx_model_gru_reverse_mixed_seq_l // Y test_case.add_expected_output( Shape{4, 1, 3, 5}, - std::vector{-0.9917215, 0.07583051, 0.997975, -0.9315585, -0.7483002, 1.536813, -0.59922504, - -0.33637103, -0.7565539, -0.23930266, -0.7844553, 1.0393485, 0.73516595, -0.5616293, - -0.09489207, -0.9501128, 0.7905356, 0.9928266, -0.9153729, -1.1781745, 1.7955453, - -0.77754307, -0.6831806, -0.6266324, -0.39791372, -0.8030517, 1.3107346, 0.3700709, - -0.49808976, 0.52939236, 0., 0., 0., 0., 0., - 1.9345565, -0.83817405, -1.1433047, -0.35640514, -0.5191339, -0.655544, 1.3520991, - 0.42289692, -0.3171452, -0.3922639, 0., 0., 0., 0., - 0., 0., 0., 0., 0., 0., -0.24612205, - 1.6415757, 0.79883975, -0.18640287, -1.0134869}); + std::vector{-0.9917215f, 0.07583051f, 0.997975f, -0.9315585f, -0.7483002f, 1.536813f, + -0.59922504f, -0.33637103f, -0.7565539f, -0.23930266f, -0.7844553f, 1.0393485f, + 0.73516595f, -0.5616293f, -0.09489207f, -0.9501128f, 0.7905356f, 0.9928266f, + -0.9153729f, -1.1781745f, 1.7955453f, -0.77754307f, -0.6831806f, -0.6266324f, + -0.39791372f, -0.8030517f, 1.3107346f, 0.3700709f, -0.49808976f, 0.52939236f, + 0., 0., 0., 0., 0., 1.9345565f, + -0.83817405f, -1.1433047f, -0.35640514f, -0.5191339f, -0.655544f, 1.3520991f, + 0.42289692f, -0.3171452f, -0.3922639f, 0., 0., 0., + 0., 0., 0., 0., 0., 0., + 0., -0.24612205f, 1.6415757f, 0.79883975f, -0.18640287f, -1.0134869f}); // Y_h test_case.add_expected_output(Shape{1, 3, 5}, - std::vector{-0.9917215, - 0.07583051, - 0.997975, - -0.9315585, - -0.7483002, - 1.536813, - -0.59922504, - -0.33637103, - -0.7565539, - -0.23930266, - -0.7844553, - 1.0393485, - 0.73516595, - -0.5616293, - -0.09489207}); + std::vector{-0.9917215f, + 0.07583051f, + 0.997975f, + -0.9315585f, + -0.7483002f, + 1.536813f, + -0.59922504f, + -0.33637103f, + -0.7565539f, + -0.23930266f, + -0.7844553f, + 1.0393485f, + 0.73516595f, + -0.5616293f, + -0.09489207f}); test_case.run(DEFAULT_FLOAT_TOLERANCE_BITS + 3); } @@ -991,29 +993,32 @@ NGRAPH_TEST_F(${BACKEND_NAME}, GRUSequenceOp, onnx_model_gru_bidir_mixed_seq_len test_case.add_expected_output( Shape{4, 2, 3, 5}, std::vector{ - -0.3224981, -0.44282594, 0.7499796, -0.12240417, 0.12079421, 0.02534254, 0.02504561, -0.0463777, - 0.01204535, -0.01497037, -0.04651929, -0.6264307, 0.7236632, 0.06250653, 0.02594197, 0.0595789, - 0.40258542, -0.40646964, 0.70320284, -0.02962421, 0.10372428, -0.38378227, -0.4331268, -0.15696645, - -0.3451503, 0.20918667, -0.59024405, -0.845524, 0.60705113, -0.6336088, -0.0833023, -0.40062034, - 0.7579466, -0.12340625, 0.04415433, -0.24662054, 0.27420586, -0.09122991, -0.22768986, 0.19980887, - -0.218649, -0.5560231, 0.56177044, -0.25098884, 0.15462328, 0.0409361, 0.17866893, -0.2782218, - 0.27396634, -0.04992082, 0.15353821, -0.4497267, -0.44631857, -0.478926, -0.23017275, 0.25369287, - -0.7369056, -0.73285, -0.5750758, -0.533177, 0., 0., 0., 0., - 0., -0.45753813, 0.5987347, -0.07046632, -0.35819566, 0.3916747, -0.18096107, -0.24415034, - 0.38435352, -0.29881003, 0.07738188, 0., 0., 0., 0., 0., - 0.10390212, -0.29646862, -0.20532897, -0.31521815, 0.01049522, 0.19370168, -0.6386781, -0.42919028, - -0.47081998, -0.2954276, 0., 0., 0., 0., 0., 0., - 0., 0., 0., 0., -0.50112087, -0.11085765, 0.5155622, -0.5635352, - 0.54762024, 0., 0., 0., 0., 0., 0., 0., - 0., 0., 0., 0.17058733, -0.6941011, -0.27862304, -0.27050856, -0.03864266}); + -0.3224981f, -0.44282594f, 0.7499796f, -0.12240417f, 0.12079421f, 0.02534254f, 0.02504561f, + -0.0463777f, 0.01204535f, -0.01497037f, -0.04651929f, -0.6264307f, 0.7236632f, 0.06250653f, + 0.02594197f, 0.0595789f, 0.40258542f, -0.40646964f, 0.70320284f, -0.02962421f, 0.10372428f, + -0.38378227f, -0.4331268f, -0.15696645f, -0.3451503f, 0.20918667f, -0.59024405f, -0.845524f, + 0.60705113f, -0.6336088f, -0.0833023f, -0.40062034f, 0.7579466f, -0.12340625f, 0.04415433f, + -0.24662054f, 0.27420586f, -0.09122991f, -0.22768986f, 0.19980887f, -0.218649f, -0.5560231f, + 0.56177044f, -0.25098884f, 0.15462328f, 0.0409361f, 0.17866893f, -0.2782218f, 0.27396634f, + -0.04992082f, 0.15353821f, -0.4497267f, -0.44631857f, -0.478926f, -0.23017275f, 0.25369287f, + -0.7369056f, -0.73285f, -0.5750758f, -0.533177f, 0., 0., 0., + 0., 0., -0.45753813f, 0.5987347f, -0.07046632f, -0.35819566f, 0.3916747f, + -0.18096107f, -0.24415034f, 0.38435352f, -0.29881003f, 0.07738188f, 0., 0., + 0., 0., 0., 0.10390212f, -0.29646862f, -0.20532897f, -0.31521815f, + 0.01049522f, 0.19370168f, -0.6386781f, -0.42919028f, -0.47081998f, -0.2954276f, 0., + 0., 0., 0., 0., 0., 0., 0., + 0., 0., -0.50112087f, -0.11085765f, 0.5155622f, -0.5635352f, 0.54762024f, + 0., 0., 0., 0., 0., 0., 0., + 0., 0., 0., 0.17058733f, -0.6941011f, -0.27862304f, -0.27050856f, + -0.03864266f}); // Y_h test_case.add_expected_output( Shape{2, 3, 5}, - std::vector{-0.0833023, -0.40062034, 0.7579466, -0.12340625, 0.04415433, -0.45753813, - 0.5987347, -0.07046632, -0.35819566, 0.3916747, -0.50112087, -0.11085765, - 0.5155622, -0.5635352, 0.54762024, 0.0595789, 0.40258542, -0.40646964, - 0.70320284, -0.02962421, 0.10372428, -0.38378227, -0.4331268, -0.15696645, - -0.3451503, 0.20918667, -0.59024405, -0.845524, 0.60705113, -0.6336088}); + std::vector{-0.0833023f, -0.40062034f, 0.7579466f, -0.12340625f, 0.04415433f, -0.45753813f, + 0.5987347f, -0.07046632f, -0.35819566f, 0.3916747f, -0.50112087f, -0.11085765f, + 0.5155622f, -0.5635352f, 0.54762024f, 0.0595789f, 0.40258542f, -0.40646964f, + 0.70320284f, -0.02962421f, 0.10372428f, -0.38378227f, -0.4331268f, -0.15696645f, + -0.3451503f, 0.20918667f, -0.59024405f, -0.845524f, 0.60705113f, -0.6336088f}); test_case.run(DEFAULT_FLOAT_TOLERANCE_BITS + 4); } @@ -1901,32 +1906,32 @@ NGRAPH_TEST_F(${BACKEND_NAME}, RNNSequenceOp, onnx_model_rnn_reverse_mixed_seq_l // Y test_case.add_expected_output( Shape{4, 1, 3, 5}, - std::vector{-0.27398264, 0.96948624, 0.26404798, 0.8068119, 0.99935544, 0.73694086, 0.44305325, - -0.9964632, 0.7063714, 0.9999049, -0.7241098, 0.08538079, -0.785874, 0.60833323, - 0.99999666, 0.53703666, 0.0267657, 0.37151086, -0.68740594, 0.9992448, 0.3254757, - 0.7716811, -0.9996745, 0.9957807, 0.9995338, 0.9997339, 0.9888724, -0.8992324, - -0.797282, 0.98666525, 0., 0., 0., 0., 0., - 0.95711637, -0.8986079, -0.99998885, 0.96265936, 0.9380511, -0.86523867, 0.3528558, - -0.99675506, 0.946875, 0.79539406, 0., 0., 0., 0., - 0., 0., 0., 0., 0., 0., 0.99903, - 0.9998094, 0.9499353, 0.6077225, -0.9921822}); + std::vector{ + -0.27398264f, 0.96948624f, 0.26404798f, 0.8068119f, 0.99935544f, 0.73694086f, 0.44305325f, -0.9964632f, + 0.7063714f, 0.9999049f, -0.7241098f, 0.08538079f, -0.785874f, 0.60833323f, 0.99999666f, 0.53703666f, + 0.0267657f, 0.37151086f, -0.68740594f, 0.9992448f, 0.3254757f, 0.7716811f, -0.9996745f, 0.9957807f, + 0.9995338f, 0.9997339f, 0.9888724f, -0.8992324f, -0.797282f, 0.98666525f, 0., 0., + 0., 0., 0., 0.95711637f, -0.8986079f, -0.99998885f, 0.96265936f, 0.9380511f, + -0.86523867f, 0.3528558f, -0.99675506f, 0.946875f, 0.79539406f, 0., 0., 0., + 0., 0., 0., 0., 0., 0., 0., 0.99903f, + 0.9998094f, 0.9499353f, 0.6077225f, -0.9921822f}); // Y_h test_case.add_expected_output(Shape{1, 3, 5}, - std::vector{-0.27398264, - 0.96948624, - 0.26404798, - 0.8068119, - 0.99935544, - 0.73694086, - 0.44305325, - -0.9964632, - 0.7063714, - 0.9999049, - -0.7241098, - 0.08538079, - -0.785874, - 0.60833323, - 0.99999666}); + std::vector{-0.27398264f, + 0.96948624f, + 0.26404798f, + 0.8068119f, + 0.99935544f, + 0.73694086f, + 0.44305325f, + -0.9964632f, + 0.7063714f, + 0.9999049f, + -0.7241098f, + 0.08538079f, + -0.785874f, + 0.60833323f, + 0.99999666f}); test_case.run(DEFAULT_FLOAT_TOLERANCE_BITS + 4); } @@ -1942,29 +1947,32 @@ NGRAPH_TEST_F(${BACKEND_NAME}, RNNSequenceOp, onnx_model_rnn_bidir_mixed_seq_len test_case.add_expected_output( Shape{4, 2, 3, 5}, std::vector{ - 0.02254748, 0.15776646, -0.8229023, 0.19205809, 0.76984656, -0.00603169, -0.0286147, 0.04512155, - -0.0011912, -0.02572936, -0.13703543, -0.49651444, -0.78868157, 0.3566854, 0.8758509, -0.99602485, - -0.8151508, -0.5803147, 0.4985683, 0.30210292, 0.11550081, -0.30236644, 0.99622667, -0.8732492, - -0.43772405, -0.9284624, -0.5595875, 0.9986867, -0.18373811, 0.8451735, -0.43823165, -0.1904698, - 0.8320786, 0.9830735, 0.61861455, 0.19109797, 0.64407, 0.00962067, -0.32752877, -0.5050589, - -0.23455954, 0.9517933, 0.9050665, 0.91091585, -0.77941567, -0.71390504, -0.24422187, -0.38115412, - 0.3462553, 0.44084883, -0.81455964, -0.23556596, 0.85043025, -0.7840209, -0.82087713, -0.8349008, - -0.7880142, 0.99017143, -0.9816452, -0.93827677, 0., 0., 0., 0., - 0., 0.28117967, 0.20685148, 0.01166701, -0.5441828, -0.5463747, -0.85301256, 0.52109087, - -0.8317892, -0.9676957, -0.30258918, 0., 0., 0., 0., 0., - -0.7010546, -0.3106169, -0.04788882, -0.21822351, -0.33518708, -0.9073148, 0.16276085, 0.9518349, - -0.8635942, -0.92539954, 0., 0., 0., 0., 0., 0., - 0., 0., 0., 0., 0.9948462, -0.6242633, -0.19065344, -0.36072153, - -0.99407107, 0., 0., 0., 0., 0., 0., 0., - 0., 0., 0., -0.9957684, -0.7924, -0.40261805, -0.34061068, -0.55580306}); + 0.02254748f, 0.15776646f, -0.8229023f, 0.19205809f, 0.76984656f, -0.00603169f, -0.0286147f, + 0.04512155f, -0.0011912f, -0.02572936f, -0.13703543f, -0.49651444f, -0.78868157f, 0.3566854f, + 0.8758509f, -0.99602485f, -0.8151508f, -0.5803147f, 0.4985683f, 0.30210292f, 0.11550081f, + -0.30236644f, 0.99622667f, -0.8732492f, -0.43772405f, -0.9284624f, -0.5595875f, 0.9986867f, + -0.18373811f, 0.8451735f, -0.43823165f, -0.1904698f, 0.8320786f, 0.9830735f, 0.61861455f, + 0.19109797f, 0.64407f, 0.00962067f, -0.32752877f, -0.5050589f, -0.23455954f, 0.9517933f, + 0.9050665f, 0.91091585f, -0.77941567f, -0.71390504f, -0.24422187f, -0.38115412f, 0.3462553f, + 0.44084883f, -0.81455964f, -0.23556596f, 0.85043025f, -0.7840209f, -0.82087713f, -0.8349008f, + -0.7880142f, 0.99017143f, -0.9816452f, -0.93827677f, 0., 0., 0., + 0., 0., 0.28117967f, 0.20685148f, 0.01166701f, -0.5441828f, -0.5463747f, + -0.85301256f, 0.52109087f, -0.8317892f, -0.9676957f, -0.30258918f, 0., 0., + 0., 0., 0., -0.7010546f, -0.3106169f, -0.04788882f, -0.21822351f, + -0.33518708f, -0.9073148f, 0.16276085f, 0.9518349f, -0.8635942f, -0.92539954f, 0., + 0., 0., 0., 0., 0., 0., 0., + 0., 0., 0.9948462f, -0.6242633f, -0.19065344f, -0.36072153f, -0.99407107f, + 0., 0., 0., 0., 0., 0., 0., + 0., 0., 0., -0.9957684f, -0.7924f, -0.40261805f, -0.34061068f, + -0.55580306f}); // Y_h test_case.add_expected_output( Shape{2, 3, 5}, - std::vector{-0.43823165, -0.1904698, 0.8320786, 0.9830735, 0.61861455, 0.28117967, - 0.20685148, 0.01166701, -0.5441828, -0.5463747, 0.9948462, -0.6242633, - -0.19065344, -0.36072153, -0.99407107, -0.99602485, -0.8151508, -0.5803147, - 0.4985683, 0.30210292, 0.11550081, -0.30236644, 0.99622667, -0.8732492, - -0.43772405, -0.9284624, -0.5595875, 0.9986867, -0.18373811, 0.8451735}); + std::vector{-0.43823165f, -0.1904698f, 0.8320786f, 0.9830735f, 0.61861455f, 0.28117967f, + 0.20685148f, 0.01166701f, -0.5441828f, -0.5463747f, 0.9948462f, -0.6242633f, + -0.19065344f, -0.36072153f, -0.99407107f, -0.99602485f, -0.8151508f, -0.5803147f, + 0.4985683f, 0.30210292f, 0.11550081f, -0.30236644f, 0.99622667f, -0.8732492f, + -0.43772405f, -0.9284624f, -0.5595875f, 0.9986867f, -0.18373811f, 0.8451735f}); // loosest match @ mantissa bit: // 16 or next bit (0.01166688557714223862 vs 0.01166701037436723709) diff --git a/src/frontends/onnx/tests/onnx_transformations.cpp b/src/frontends/onnx/tests/onnx_transformations.cpp index 76420f41e314c3..ca4535c921ff76 100644 --- a/src/frontends/onnx/tests/onnx_transformations.cpp +++ b/src/frontends/onnx/tests/onnx_transformations.cpp @@ -42,7 +42,7 @@ bool after_func_expand_name_comp(std::string lhs, std::string rhs) { if (is_hex_symbol(name[i])) { ++founded_hex; if (cut_begin == -1) { - cut_begin = i; + cut_begin = static_cast(i); } if (founded_hex >= min_address) { cut_length = founded_hex; diff --git a/src/frontends/paddle/tests/CMakeLists.txt b/src/frontends/paddle/tests/CMakeLists.txt index cdf879a94eb849..8945f7e083d82e 100644 --- a/src/frontends/paddle/tests/CMakeLists.txt +++ b/src/frontends/paddle/tests/CMakeLists.txt @@ -25,10 +25,21 @@ ov_add_test_target( ) # Test model generating -ov_check_pip_packages(REQUIREMENTS_FILE "${CMAKE_CURRENT_SOURCE_DIR}/requirements.txt" - MESSAGE_MODE WARNING - WARNING_MESSAGE "PaddlePaddle frontend unit tests will be skipped" - RESULT_VAR paddlepaddle_FOUND) +set(PADDLE_REQ "${CMAKE_CURRENT_SOURCE_DIR}/requirements.txt") +if(PYTHONINTERP_FOUND) + execute_process( + COMMAND ${PYTHON_EXECUTABLE} "${CMAKE_CURRENT_SOURCE_DIR}/paddle_pip_check.py" ${PADDLE_REQ} + RESULT_VARIABLE EXIT_CODE + OUTPUT_VARIABLE OUTPUT_TEXT + ERROR_VARIABLE ERROR_TEXT) +endif() + +if(NOT EXIT_CODE EQUAL 0) + set(paddlepaddle_FOUND OFF) + message(WARNING "Python requirement file ${PADDLE_REQ} is not installed, PaddlePaddle frontend unit tests will be skipped") +else() + set(paddlepaddle_FOUND ON) +endif() set(TEST_PADDLE_MODELS_DIRNAME test_model_zoo/paddle_test_models) target_compile_definitions(${TARGET_NAME} PRIVATE -D TEST_PADDLE_MODELS_DIRNAME=\"${TEST_PADDLE_MODELS_DIRNAME}/\") diff --git a/src/frontends/paddle/tests/paddle_pip_check.py b/src/frontends/paddle/tests/paddle_pip_check.py new file mode 100644 index 00000000000000..4c7f860202b4f0 --- /dev/null +++ b/src/frontends/paddle/tests/paddle_pip_check.py @@ -0,0 +1,20 @@ +import pkg_resources +import re +import sys + +req_file=sys.argv[1] + +try: + pkg_resources.require(open(req_file, mode='r')) +except Exception as inst: + pattern = re.compile(r"protobuf .*, Requirement.parse\('protobuf<=3\.20\.0,>=3\.1\.0'\), {'paddlepaddle'}") + result = pattern.findall(str(inst)) + if len(result) == 0: + raise inst + else: + env = pkg_resources.Environment() + env['protobuf'].clear() + env.add(pkg_resources.DistInfoDistribution(project_name="protobuf", version="3.20.0")) + ws = pkg_resources.working_set + reqs = pkg_resources.parse_requirements(open(req_file, mode='r')) + dists = ws.resolve(reqs, env, replace_conflicting=True) diff --git a/src/frontends/pytorch/include/openvino/frontend/pytorch/frontend.hpp b/src/frontends/pytorch/include/openvino/frontend/pytorch/frontend.hpp index 123f24fd4a16a1..9bd62ada8ff706 100644 --- a/src/frontends/pytorch/include/openvino/frontend/pytorch/frontend.hpp +++ b/src/frontends/pytorch/include/openvino/frontend/pytorch/frontend.hpp @@ -60,7 +60,7 @@ class PYTORCH_API FrontEnd : public ov::frontend::FrontEnd { bool supported_impl(const std::vector& variants) const override; ov::frontend::InputModel::Ptr load_impl(const std::vector& variants) const override; - std::map m_op_translators; + std::map m_op_translators; }; } // namespace pytorch diff --git a/src/frontends/pytorch/include/openvino/frontend/pytorch/node_context.hpp b/src/frontends/pytorch/include/openvino/frontend/pytorch/node_context.hpp index 41205130137c09..a3c5504c5c0f3a 100644 --- a/src/frontends/pytorch/include/openvino/frontend/pytorch/node_context.hpp +++ b/src/frontends/pytorch/include/openvino/frontend/pytorch/node_context.hpp @@ -19,20 +19,22 @@ typedef std::unordered_map> TensorMap; class NodeContext : public frontend::NodeContext { public: NodeContext(std::shared_ptr decoder, - TensorMap* tensor_map, - ParameterVector* external_parameters, const TensorMap& ext_tensor_map, + std::shared_ptr tensor_map, + std::shared_ptr external_parameters, + std::shared_ptr> mutated_tensors, TranslateSession* translate_session) : frontend::NodeContext(decoder->get_op_type()), m_decoder(decoder), - m_tensor_map(tensor_map), m_ext_tensor_map(ext_tensor_map), + m_tensor_map(tensor_map), m_external_parameters(external_parameters), + m_mutated_tensors(mutated_tensors), m_translate_session(translate_session), m_decoder_inputs(decoder->inputs()), m_decoder_outputs(decoder->outputs()) { - FRONT_END_GENERAL_CHECK(tensor_map != nullptr && external_parameters != nullptr && - translate_session != nullptr); + FRONT_END_GENERAL_CHECK(m_tensor_map != nullptr && m_external_parameters != nullptr && + m_mutated_tensors != nullptr && m_translate_session != nullptr); } // Do not search for input in tensor map; try to access it as a constant of specified type T and return its value @@ -106,11 +108,7 @@ class NodeContext : public frontend::NodeContext { "There is no any named attributes in PyTorch node, query by attribute name is not implemented"); } - void mutate_input(size_t index, Output ov_output); - - std::set get_mutated_tensors() const { - return m_mutated_tensors; - } + void mutate_input(size_t index, Output ov_output) const; std::shared_ptr get_decoder() const { return m_decoder; @@ -120,7 +118,7 @@ class NodeContext : public frontend::NodeContext { return m_translate_session; } - void add_tensor_to_context(size_t index, Output ov_output); + void add_tensor_to_context(size_t index, Output ov_output) const; Output get_tensor_from_model(size_t index) const { if (m_tensor_map->find(index) != m_tensor_map->end()) { @@ -130,22 +128,22 @@ class NodeContext : public frontend::NodeContext { } } - Output get_tensor_from_model_or_create_input(size_t index); + Output get_tensor_from_model_or_create_input(size_t index) const; Output get_input_from_visible_context(size_t index) const; - std::shared_ptr convert_subgraph(size_t index); + std::shared_ptr convert_subgraph(size_t index) const; private: std::shared_ptr m_decoder; - std::set m_mutated_tensors; - TensorMap* m_tensor_map; const TensorMap& m_ext_tensor_map; - ParameterVector* m_external_parameters; - TranslateSession* m_translate_session; + std::shared_ptr m_tensor_map; + std::shared_ptr m_external_parameters; + std::shared_ptr> m_mutated_tensors; + TranslateSession* m_translate_session = nullptr; const std::vector m_decoder_inputs; const std::vector m_decoder_outputs; }; -using PytorchCreatorFunction = std::function; +using CreatorFunction = std::function; } // namespace pytorch } // namespace frontend diff --git a/src/frontends/pytorch/src/node_context.cpp b/src/frontends/pytorch/src/node_context.cpp index 10ac4fc5d005e9..49495749d570f4 100644 --- a/src/frontends/pytorch/src/node_context.cpp +++ b/src/frontends/pytorch/src/node_context.cpp @@ -42,16 +42,16 @@ std::shared_ptr NodeContext::mark_node(std::shared_ptr ov_node) cons return m_decoder->mark_node(ov_node); } -void NodeContext::mutate_input(size_t index, Output ov_output) { +void NodeContext::mutate_input(size_t index, Output ov_output) const { FRONT_END_GENERAL_CHECK(!m_decoder->input_is_none(index), "Input is none with index: ", index); auto input_id = m_decoder_inputs.at(index); FRONT_END_GENERAL_CHECK(m_tensor_map->count(input_id), "No tensor corresponding input: ", input_id, " exist."); m_translate_session->encode_tensor_name(ov_output, input_id, m_decoder->get_input_debug_name(index)); (*m_tensor_map)[input_id] = ov_output; - m_mutated_tensors.insert(input_id); + m_mutated_tensors->insert(input_id); } -void NodeContext::add_tensor_to_context(size_t index, Output ov_output) { +void NodeContext::add_tensor_to_context(size_t index, Output ov_output) const { if (m_tensor_map->count(index)) { OPENVINO_DEBUG << "[ WARNING ] Current context has tensor. Rewriting.\n"; } @@ -59,7 +59,7 @@ void NodeContext::add_tensor_to_context(size_t index, Output ov_output) { (*m_tensor_map)[index] = ov_output; } -Output NodeContext::get_tensor_from_model_or_create_input(size_t index) { +Output NodeContext::get_tensor_from_model_or_create_input(size_t index) const { if (m_tensor_map->find(index) != m_tensor_map->end()) { return m_tensor_map->at(index); } else { @@ -87,7 +87,7 @@ Output NodeContext::get_input_from_visible_context(size_t index) const { return input_tensor; } -std::shared_ptr NodeContext::convert_subgraph(size_t index) { +std::shared_ptr NodeContext::convert_subgraph(size_t index) const { auto subgraph_decoder = m_decoder->get_subgraph_decoder(index); // Extend external context with internal tensors except Parameter nodes, because internal Parameters are created to diff --git a/src/frontends/pytorch/src/op/adaptive_avg_pool3d.cpp b/src/frontends/pytorch/src/op/adaptive_avg_pool3d.cpp index 75d000ce1d7da8..42aa3da1d8586b 100644 --- a/src/frontends/pytorch/src/op/adaptive_avg_pool3d.cpp +++ b/src/frontends/pytorch/src/op/adaptive_avg_pool3d.cpp @@ -19,7 +19,7 @@ namespace op { using namespace ov::op; -OutputVector translate_adaptive_avg_pool3d(NodeContext& context) { +OutputVector translate_adaptive_avg_pool3d(const NodeContext& context) { num_inputs_check(context, 2, 2); auto const_tile_params = context.mark_node(v0::Constant::create(element::i32, Shape{5}, {1, 1, 1, 1, 1})); auto const_0 = context.mark_node(v0::Constant::create(element::i32, Shape{1}, {0})); diff --git a/src/frontends/pytorch/src/op/adaptive_max_pool2d.cpp b/src/frontends/pytorch/src/op/adaptive_max_pool2d.cpp index bbd72927fc6fac..5705fd22e70f47 100644 --- a/src/frontends/pytorch/src/op/adaptive_max_pool2d.cpp +++ b/src/frontends/pytorch/src/op/adaptive_max_pool2d.cpp @@ -11,7 +11,7 @@ namespace frontend { namespace pytorch { namespace op { -OutputVector translate_adaptive_max_pool2d(NodeContext& context) { +OutputVector translate_adaptive_max_pool2d(const NodeContext& context) { num_inputs_check(context, 2, 2); auto x = context.get_input(0); auto y = context.get_input(1); diff --git a/src/frontends/pytorch/src/op/add.cpp b/src/frontends/pytorch/src/op/add.cpp index 3c4976efe2cdef..65ecfe021c5b7e 100644 --- a/src/frontends/pytorch/src/op/add.cpp +++ b/src/frontends/pytorch/src/op/add.cpp @@ -15,7 +15,7 @@ namespace frontend { namespace pytorch { namespace op { -OutputVector translate_add(NodeContext& context) { +OutputVector translate_add(const NodeContext& context) { num_inputs_check(context, 2, 3); auto lhs = context.get_input(0); auto rhs = context.get_input(1); diff --git a/src/frontends/pytorch/src/op/addcmul.cpp b/src/frontends/pytorch/src/op/addcmul.cpp index 50d2cec4a3d045..7933acfde1d176 100644 --- a/src/frontends/pytorch/src/op/addcmul.cpp +++ b/src/frontends/pytorch/src/op/addcmul.cpp @@ -17,7 +17,7 @@ namespace op { using namespace ov::op; -OutputVector translate_addcmul(NodeContext& context) { +OutputVector translate_addcmul(const NodeContext& context) { num_inputs_check(context, 4, 4); const auto eltwise_mult = std::make_shared(context.get_input(1), context.get_input(2)); const auto value = context.get_input(3); diff --git a/src/frontends/pytorch/src/op/addmm.cpp b/src/frontends/pytorch/src/op/addmm.cpp index 4a2e16906b2b09..e8ba9b7e9159ab 100644 --- a/src/frontends/pytorch/src/op/addmm.cpp +++ b/src/frontends/pytorch/src/op/addmm.cpp @@ -16,7 +16,7 @@ namespace op { using namespace ov::op; -OutputVector translate_addmm(NodeContext& context) { +OutputVector translate_addmm(const NodeContext& context) { num_inputs_check(context, 5, 5); auto input = context.get_input(0); auto m1 = context.get_input(1); diff --git a/src/frontends/pytorch/src/op/arange.cpp b/src/frontends/pytorch/src/op/arange.cpp index bacd8d113bc3c3..c238cd12bbc394 100644 --- a/src/frontends/pytorch/src/op/arange.cpp +++ b/src/frontends/pytorch/src/op/arange.cpp @@ -17,7 +17,7 @@ namespace op { using namespace ov::op; -OutputVector translate_arange(NodeContext& context) { +OutputVector translate_arange(const NodeContext& context) { auto zero = context.mark_node(v0::Constant::create(element::i32, Shape{}, {0})); auto one = context.mark_node(v0::Constant::create(element::i32, Shape{}, {1})); int dtype_port = -1; diff --git a/src/frontends/pytorch/src/op/as_tensor.cpp b/src/frontends/pytorch/src/op/as_tensor.cpp index 114f4e6fe6ad34..ae2c15d0a1eba9 100644 --- a/src/frontends/pytorch/src/op/as_tensor.cpp +++ b/src/frontends/pytorch/src/op/as_tensor.cpp @@ -16,7 +16,7 @@ namespace op { using namespace ov::op; -OutputVector translate_as_tensor(NodeContext& context) { +OutputVector translate_as_tensor(const NodeContext& context) { // aten::tensor(t[] data, *, ScalarType? dtype=None, Device? device=None, bool requires_grad=False) -> Tensor num_inputs_check(context, 1, 4); auto dtype = element::f32; diff --git a/src/frontends/pytorch/src/op/avg_poolnd.cpp b/src/frontends/pytorch/src/op/avg_poolnd.cpp index bb1d16b99df2cb..77f35a0569e76b 100644 --- a/src/frontends/pytorch/src/op/avg_poolnd.cpp +++ b/src/frontends/pytorch/src/op/avg_poolnd.cpp @@ -18,7 +18,7 @@ namespace op { using namespace ov::op; -OutputVector translate_avg_poolnd(NodeContext& context) { +OutputVector translate_avg_poolnd(const NodeContext& context) { num_inputs_check(context, 6, 7); auto input = context.get_input(0); auto kernel = context.const_input(1); diff --git a/src/frontends/pytorch/src/op/batch_norm.cpp b/src/frontends/pytorch/src/op/batch_norm.cpp index 495295e212e213..a306dd21832e82 100644 --- a/src/frontends/pytorch/src/op/batch_norm.cpp +++ b/src/frontends/pytorch/src/op/batch_norm.cpp @@ -32,7 +32,7 @@ Output broadcast_const_to_channel_dim(const NodeContext& context, } } // namespace -OutputVector translate_batch_norm(NodeContext& context) { +OutputVector translate_batch_norm(const NodeContext& context) { // Schema: aten::batch_norm(Tensor input, Tensor? weight, Tensor? bias, Tensor? running_mean, Tensor? running_var, // bool training, float momentum, float eps, bool cudnn_enabled) -> Tensor num_inputs_check(context, 8, 9); diff --git a/src/frontends/pytorch/src/op/bitwise_not.cpp b/src/frontends/pytorch/src/op/bitwise_not.cpp new file mode 100644 index 00000000000000..55ba1203b80a04 --- /dev/null +++ b/src/frontends/pytorch/src/op/bitwise_not.cpp @@ -0,0 +1,29 @@ +// Copyright (C) 2018-2023 Intel Corporation +// SPDX-License-Identifier: Apache-2.0 +// + +#include "openvino/frontend/pytorch/node_context.hpp" +#include "openvino/op/logical_not.hpp" +#include "utils.hpp" + +namespace ov { +namespace frontend { +namespace pytorch { +namespace op { + +OutputVector translate_bitwise_not(const NodeContext& context) { + num_inputs_check(context, 1, 2); + auto x = context.get_input(0); + FRONT_END_OP_CONVERSION_CHECK(x.get_element_type().compatible(element::boolean), + "aten::bitwise_not suppored only for boolean input"); + auto not_x = context.mark_node(std::make_shared(x)); + if (!context.input_is_none(1)) { + context.mutate_input(1, not_x); + } + return {not_x}; +}; + +} // namespace op +} // namespace pytorch +} // namespace frontend +} // namespace ov \ No newline at end of file diff --git a/src/frontends/pytorch/src/op/bool.cpp b/src/frontends/pytorch/src/op/bool.cpp index 60d7a4e0158631..0d0d53ad608fb1 100644 --- a/src/frontends/pytorch/src/op/bool.cpp +++ b/src/frontends/pytorch/src/op/bool.cpp @@ -11,7 +11,7 @@ namespace frontend { namespace pytorch { namespace op { -OutputVector translate_bool(NodeContext& context) { +OutputVector translate_bool(const NodeContext& context) { num_inputs_check(context, 1, 1); return {context.mark_node(std::make_shared(context.get_input(0), element::boolean))}; }; diff --git a/src/frontends/pytorch/src/op/cat.cpp b/src/frontends/pytorch/src/op/cat.cpp index ce359c6384885f..8fbdd0f3e6f32f 100644 --- a/src/frontends/pytorch/src/op/cat.cpp +++ b/src/frontends/pytorch/src/op/cat.cpp @@ -12,7 +12,7 @@ namespace frontend { namespace pytorch { namespace op { -OutputVector translate_cat(NodeContext& context) { +OutputVector translate_cat(const NodeContext& context) { // This translator is only needed to get axis as constant from external scope num_inputs_check(context, 2, 2); const auto&& list_elems = get_list_as_outputs(context.get_input(0)); diff --git a/src/frontends/pytorch/src/op/clamp.cpp b/src/frontends/pytorch/src/op/clamp.cpp index afbe349cf4a25f..fa28ca301df48a 100644 --- a/src/frontends/pytorch/src/op/clamp.cpp +++ b/src/frontends/pytorch/src/op/clamp.cpp @@ -15,7 +15,7 @@ namespace op { using namespace ov::op; -OutputVector translate_clamp(NodeContext& context) { +OutputVector translate_clamp(const NodeContext& context) { num_inputs_check(context, 1, 3); auto x = context.get_input(0); if (!context.input_is_none(1)) { diff --git a/src/frontends/pytorch/src/op/constant.cpp b/src/frontends/pytorch/src/op/constant.cpp index 944a1e197867dc..6fc6444f04fa35 100644 --- a/src/frontends/pytorch/src/op/constant.cpp +++ b/src/frontends/pytorch/src/op/constant.cpp @@ -9,7 +9,7 @@ namespace frontend { namespace pytorch { namespace op { -OutputVector translate_constant(NodeContext& context) { +OutputVector translate_constant(const NodeContext& context) { return context.as_constant(); }; diff --git a/src/frontends/pytorch/src/op/conv_transposend.cpp b/src/frontends/pytorch/src/op/conv_transposend.cpp index d4a0cc2e047114..1f281f90486fad 100644 --- a/src/frontends/pytorch/src/op/conv_transposend.cpp +++ b/src/frontends/pytorch/src/op/conv_transposend.cpp @@ -15,7 +15,7 @@ namespace op { using namespace ov::op; -OutputVector translate_conv_transposend(NodeContext& context) { +OutputVector translate_conv_transposend(const NodeContext& context) { num_inputs_check(context, 8, 8); auto strides = context.const_input(3); // PyTorch support only symmetric padding, padding sizes are the same for begins and ends for each dimension diff --git a/src/frontends/pytorch/src/op/convnd.cpp b/src/frontends/pytorch/src/op/convnd.cpp index 9b91985182479a..861cb68f1fa30f 100644 --- a/src/frontends/pytorch/src/op/convnd.cpp +++ b/src/frontends/pytorch/src/op/convnd.cpp @@ -15,7 +15,7 @@ namespace op { using namespace ov::op; -OutputVector translate_convnd(NodeContext& context) { +OutputVector translate_convnd(const NodeContext& context) { num_inputs_check(context, 7, 7); auto strides = context.const_input(3); // In torch pads at beginning are same as at end diff --git a/src/frontends/pytorch/src/op/convolution.cpp b/src/frontends/pytorch/src/op/convolution.cpp index d0e77e4adadafe..7a250115b54a51 100644 --- a/src/frontends/pytorch/src/op/convolution.cpp +++ b/src/frontends/pytorch/src/op/convolution.cpp @@ -16,7 +16,7 @@ namespace op { using namespace ov::op; -OutputVector translate_convolution(NodeContext& context) { +OutputVector translate_convolution(const NodeContext& context) { // Schema: aten::_convolution(Tensor input, Tensor weight, Tensor? bias, int[] stride, int[] padding, int[] // dilation, bool transposed, int[] output_padding, int groups, bool benchmark, bool deterministic, bool // cudnn_enabled, bool allow_tf32) -> Tensor diff --git a/src/frontends/pytorch/src/op/convolution_mode.cpp b/src/frontends/pytorch/src/op/convolution_mode.cpp index c732aa6688941f..7aa9d8f991b97b 100644 --- a/src/frontends/pytorch/src/op/convolution_mode.cpp +++ b/src/frontends/pytorch/src/op/convolution_mode.cpp @@ -15,7 +15,7 @@ namespace op { using namespace ov::op; -OutputVector translate_convolution_mode(NodeContext& context) { +OutputVector translate_convolution_mode(const NodeContext& context) { // Schema: aten::_convolution_mode(Tensor input, Tensor weight, Tensor? bias, int[] stride, str padding, int[] // dilation, int groups) -> Tensor num_inputs_check(context, 7, 7); diff --git a/src/frontends/pytorch/src/op/cumsum.cpp b/src/frontends/pytorch/src/op/cumsum.cpp index 98cef936de88be..c396521a9e402b 100644 --- a/src/frontends/pytorch/src/op/cumsum.cpp +++ b/src/frontends/pytorch/src/op/cumsum.cpp @@ -3,11 +3,7 @@ // #include "openvino/frontend/pytorch/node_context.hpp" -#include "openvino/op/constant.hpp" -#include "openvino/op/convert.hpp" -#include "openvino/op/convert_like.hpp" #include "openvino/op/cum_sum.hpp" -#include "pt_framework_node.hpp" #include "utils.hpp" namespace ov { @@ -17,21 +13,13 @@ namespace op { using namespace ov::op; -OutputVector translate_cumsum(NodeContext& context) { +OutputVector translate_cumsum(const NodeContext& context) { // aten::cumsum(Tensor self, int dim, *, ScalarType? dtype=None, Tensor out=None) num_inputs_check(context, 2, 4); auto x = context.get_input(0); auto dim = context.get_input(1); if (!context.input_is_none(2)) { - if (std::dynamic_pointer_cast(context.get_input_from_visible_context(2).get_node_shared_ptr())) { - auto dtype = convert_dtype(context.const_input(2)); - x = context.mark_node(std::make_shared(x, dtype)); - } else if (const auto& fw_node = cast_fw_node(context.get_input(2).get_node_shared_ptr(), "prim::dtype")) { - auto out_tensor = fw_node->input_value(0); - x = context.mark_node(std::make_shared(x, out_tensor)); - } else { - FRONT_END_OP_CONVERSION_CHECK(false, "Couldn't get dtype input"); - } + x = apply_dtype(context, 2, x); } auto result = context.mark_node(std::make_shared(x, dim)); if (!context.input_is_none(3)) { diff --git a/src/frontends/pytorch/src/op/dim.cpp b/src/frontends/pytorch/src/op/dim.cpp index 7af4aa8fe3147e..2d69cb3e37a796 100644 --- a/src/frontends/pytorch/src/op/dim.cpp +++ b/src/frontends/pytorch/src/op/dim.cpp @@ -12,7 +12,7 @@ namespace op { using namespace ov::op; -OutputVector translate_dim(NodeContext& context) { +OutputVector translate_dim(const NodeContext& context) { num_inputs_check(context, 1, 1); Output rank; std::tie(std::ignore, rank) = get_shape_rank(context, context.get_input(0), true); diff --git a/src/frontends/pytorch/src/op/div.cpp b/src/frontends/pytorch/src/op/div.cpp index 54cab6325ae7f2..e9dd7136e35bef 100644 --- a/src/frontends/pytorch/src/op/div.cpp +++ b/src/frontends/pytorch/src/op/div.cpp @@ -17,7 +17,7 @@ namespace frontend { namespace pytorch { namespace op { -OutputVector translate_div(NodeContext& context) { +OutputVector translate_div(const NodeContext& context) { num_inputs_check(context, 2, 3); auto x = context.get_input(0); auto y = context.get_input(1); diff --git a/src/frontends/pytorch/src/op/elu.cpp b/src/frontends/pytorch/src/op/elu.cpp index f60d76b96638e5..4f96371ee83ebd 100644 --- a/src/frontends/pytorch/src/op/elu.cpp +++ b/src/frontends/pytorch/src/op/elu.cpp @@ -12,7 +12,7 @@ namespace frontend { namespace pytorch { namespace op { -OutputVector translate_elu(NodeContext& context) { +OutputVector translate_elu(const NodeContext& context) { // aten::elu(Tensor self, Scalar alpha=1, Scalar scale=1, Scalar input_scale=1) -> Tensor num_inputs_check(context, 2, 4); auto x = context.get_input(0); diff --git a/src/frontends/pytorch/src/op/embedding.cpp b/src/frontends/pytorch/src/op/embedding.cpp index c920992bdaafec..e5dc85a0ddfebf 100644 --- a/src/frontends/pytorch/src/op/embedding.cpp +++ b/src/frontends/pytorch/src/op/embedding.cpp @@ -13,7 +13,7 @@ namespace frontend { namespace pytorch { namespace op { -OutputVector translate_embedding(NodeContext& context) { +OutputVector translate_embedding(const NodeContext& context) { // aten::embedding(Tensor weight, Tensor indices, SymInt padding_idx=-1, bool scale_grad_by_freq=False, bool // sparse=False) num_inputs_check(context, 5, 5); diff --git a/src/frontends/pytorch/src/op/expand.cpp b/src/frontends/pytorch/src/op/expand.cpp index 34f0a9d70c323b..9210cedc6eba6f 100644 --- a/src/frontends/pytorch/src/op/expand.cpp +++ b/src/frontends/pytorch/src/op/expand.cpp @@ -30,7 +30,7 @@ OutputVector base_expand(const NodeContext& context, const Output& x, cons }; } // namespace -OutputVector translate_expand(NodeContext& context) { +OutputVector translate_expand(const NodeContext& context) { // aten::expand(Tensor(a) self, SymInt[] size, *, bool implicit=False) -> Tensor(a) num_inputs_check(context, 2, 3); auto x = context.get_input(0); @@ -41,7 +41,7 @@ OutputVector translate_expand(NodeContext& context) { return base_expand(context, x, sizes); }; -OutputVector translate_expand_as(NodeContext& context) { +OutputVector translate_expand_as(const NodeContext& context) { num_inputs_check(context, 2, 2); auto x = context.get_input(0); auto y = context.get_input(1); diff --git a/src/frontends/pytorch/src/op/eye.cpp b/src/frontends/pytorch/src/op/eye.cpp index ab35c56569c5c4..9b7f7ef8c3bc29 100644 --- a/src/frontends/pytorch/src/op/eye.cpp +++ b/src/frontends/pytorch/src/op/eye.cpp @@ -16,7 +16,7 @@ namespace op { using namespace ov::op; -OutputVector translate_eye(NodeContext& context) { +OutputVector translate_eye(const NodeContext& context) { size_t num_inputs = context.get_input_size(); auto x = context.get_input(0); // num rows and cols should be integer, but at the moment conversion their data type can be unknown yet diff --git a/src/frontends/pytorch/src/op/flatten.cpp b/src/frontends/pytorch/src/op/flatten.cpp index 6d9005a64b8643..6022661c3aa8cf 100644 --- a/src/frontends/pytorch/src/op/flatten.cpp +++ b/src/frontends/pytorch/src/op/flatten.cpp @@ -18,7 +18,7 @@ namespace op { using namespace ov::op; -OutputVector translate_flatten(NodeContext& context) { +OutputVector translate_flatten(const NodeContext& context) { num_inputs_check(context, 1, 3); auto x = context.get_input(0); int64_t start_dim = 0; diff --git a/src/frontends/pytorch/src/op/floor_divide.cpp b/src/frontends/pytorch/src/op/floor_divide.cpp index 77caa98e469090..4fb1b230d44c21 100644 --- a/src/frontends/pytorch/src/op/floor_divide.cpp +++ b/src/frontends/pytorch/src/op/floor_divide.cpp @@ -14,10 +14,11 @@ namespace op { using namespace ov::op; -OutputVector translate_floor_divide(NodeContext& context) { +OutputVector translate_floor_divide(const NodeContext& context) { num_inputs_check(context, 2, 2); auto x = context.get_input(0); auto y = context.get_input(1); + align_eltwise_input_types(context, x, y, true); auto div = context.mark_node(std::make_shared(x, y, true)); return {context.mark_node(std::make_shared(div))}; }; diff --git a/src/frontends/pytorch/src/op/floordiv.cpp b/src/frontends/pytorch/src/op/floordiv.cpp index 48360a4a0ced0d..91c03e74d7f0b1 100644 --- a/src/frontends/pytorch/src/op/floordiv.cpp +++ b/src/frontends/pytorch/src/op/floordiv.cpp @@ -11,7 +11,7 @@ namespace frontend { namespace pytorch { namespace op { -OutputVector translate_floordiv(NodeContext& context) { +OutputVector translate_floordiv(const NodeContext& context) { num_inputs_check(context, 2, 2); auto x = context.get_input(0); auto y = context.get_input(1); @@ -21,4 +21,4 @@ OutputVector translate_floordiv(NodeContext& context) { } // namespace op } // namespace pytorch } // namespace frontend -} // namespace ov \ No newline at end of file +} // namespace ov diff --git a/src/frontends/pytorch/src/op/full.cpp b/src/frontends/pytorch/src/op/full.cpp index d74cc707af6b67..bbb7f98022f42b 100644 --- a/src/frontends/pytorch/src/op/full.cpp +++ b/src/frontends/pytorch/src/op/full.cpp @@ -5,7 +5,6 @@ #include "openvino/frontend/pytorch/node_context.hpp" #include "openvino/op/broadcast.hpp" #include "openvino/op/constant.hpp" -#include "openvino/op/convert.hpp" #include "openvino/op/convert_like.hpp" #include "openvino/op/shape_of.hpp" #include "utils.hpp" @@ -22,28 +21,28 @@ Output base_translate_full(const NodeContext& context, const Output& return context.mark_node(std::make_shared(value, sizes)); } +Output base_translate_full_with_convertlike(const NodeContext& context, + const Output& sizes, + const Output& value, + const Output& out) { + auto filled_tensor = base_translate_full(context, sizes, value); + return context.mark_node(std::make_shared(filled_tensor, out)); +} + Output base_translate_full_with_convert(const NodeContext& context, const Output& sizes, - const Output& value, + Output value, size_t dtype_id) { - auto filled_tensor = base_translate_full(context, sizes, value); if (!context.input_is_none(dtype_id)) { - auto dtype = convert_dtype(context.const_input(dtype_id)); - filled_tensor = context.mark_node(std::make_shared(filled_tensor, dtype)); + value = apply_dtype(context, dtype_id, value); } - return filled_tensor; -} -Output base_translate_full_with_convertlike(const NodeContext& context, - const Output& sizes, - const Output& value, - const Output& out) { auto filled_tensor = base_translate_full(context, sizes, value); - return context.mark_node(std::make_shared(filled_tensor, out)); + return filled_tensor; } } // namespace -OutputVector translate_full(NodeContext& context) { +OutputVector translate_full(const NodeContext& context) { num_inputs_check(context, 2, 6); auto sizes = context.get_input(0); auto value = context.get_input(1); @@ -60,19 +59,19 @@ OutputVector translate_full(NodeContext& context) { return {base_translate_full_with_convert(context, sizes, value, dtype_id)}; }; -OutputVector translate_full_like(NodeContext& context) { +OutputVector translate_full_like(const NodeContext& context) { num_inputs_check(context, 2, 7); auto input = context.get_input(0); auto value = context.get_input(1); auto sizes = context.mark_node(std::make_shared(input, element::i32)); - if (context.get_input_size() == 7) { + if (context.get_input_size() == 7 && !context.input_is_none(2)) { return {base_translate_full_with_convert(context, sizes, value, 2)}; } auto out = context.input_is_none(3) ? input : context.get_input(3); return {base_translate_full_with_convertlike(context, sizes, value, out)}; }; -OutputVector translate_fill_(NodeContext& context) { +OutputVector translate_fill_(const NodeContext& context) { num_inputs_check(context, 2, 2); auto input = context.get_input(0); auto value = context.get_input(1); @@ -80,7 +79,7 @@ OutputVector translate_fill_(NodeContext& context) { return {base_translate_full_with_convertlike(context, sizes, value, input)}; }; -OutputVector translate_new_full(NodeContext& context) { +OutputVector translate_new_full(const NodeContext& context) { num_inputs_check(context, 3, 7); auto input = context.get_input(0); auto sizes = context.get_input(1); @@ -91,7 +90,7 @@ OutputVector translate_new_full(NodeContext& context) { return {base_translate_full_with_convertlike(context, sizes, value, input)}; }; -OutputVector translate_zeros(NodeContext& context) { +OutputVector translate_zeros(const NodeContext& context) { num_inputs_check(context, 2, 5); auto sizes = context.get_input(0); auto value = context.mark_node(v0::Constant::create(element::f32, Shape{}, {0})); @@ -108,19 +107,19 @@ OutputVector translate_zeros(NodeContext& context) { return {base_translate_full_with_convert(context, sizes, value, dtype_id)}; }; -OutputVector translate_zeros_like(NodeContext& context) { +OutputVector translate_zeros_like(const NodeContext& context) { num_inputs_check(context, 1, 6); auto input = context.get_input(0); auto value = context.mark_node(v0::Constant::create(element::f32, Shape{}, {0})); auto sizes = context.mark_node(std::make_shared(input, element::i32)); - if (context.get_input_size() == 6) { + if (context.get_input_size() == 6 && !context.input_is_none(1)) { return {base_translate_full_with_convert(context, sizes, value, 1)}; } auto out = context.input_is_none(2) ? input : context.get_input(2); return {base_translate_full_with_convertlike(context, sizes, value, out)}; }; -OutputVector translate_new_zeros(NodeContext& context) { +OutputVector translate_new_zeros(const NodeContext& context) { num_inputs_check(context, 2, 6); auto input = context.get_input(0); auto sizes = context.get_input(1); @@ -131,7 +130,7 @@ OutputVector translate_new_zeros(NodeContext& context) { return {base_translate_full_with_convertlike(context, sizes, value, input)}; }; -OutputVector translate_ones(NodeContext& context) { +OutputVector translate_ones(const NodeContext& context) { num_inputs_check(context, 1, 5); auto sizes = context.get_input(0); auto value = context.mark_node(v0::Constant::create(element::f32, Shape{}, {1})); @@ -148,19 +147,19 @@ OutputVector translate_ones(NodeContext& context) { return {base_translate_full_with_convert(context, sizes, value, dtype_id)}; }; -OutputVector translate_ones_like(NodeContext& context) { +OutputVector translate_ones_like(const NodeContext& context) { num_inputs_check(context, 1, 6); auto input = context.get_input(0); auto value = context.mark_node(v0::Constant::create(element::f32, Shape{}, {1})); auto sizes = context.mark_node(std::make_shared(input, element::i32)); - if (context.get_input_size() == 6) { + if (context.get_input_size() == 6 && !context.input_is_none(1)) { return {base_translate_full_with_convert(context, sizes, value, 1)}; } auto out = context.input_is_none(2) ? input : context.get_input(2); return {base_translate_full_with_convertlike(context, sizes, value, out)}; }; -OutputVector translate_new_ones(NodeContext& context) { +OutputVector translate_new_ones(const NodeContext& context) { num_inputs_check(context, 2, 6); auto input = context.get_input(0); auto sizes = context.get_input(1); @@ -171,8 +170,11 @@ OutputVector translate_new_ones(NodeContext& context) { return {base_translate_full_with_convertlike(context, sizes, value, input)}; }; -OutputVector translate_empty(NodeContext& context) { - num_inputs_check(context, 1, 2); +OutputVector translate_empty(const NodeContext& context) { + // aten::empty(SymInt[] size, *, ScalarType? dtype=None, Layout? layout=None, Device? device=None, bool? + // pin_memory=None, MemoryFormat? memory_format=None) -> Tensor layout, device and work with memory ignored on our + // side, so just skip these parameters + num_inputs_check(context, 1, 6); auto sizes = context.get_input(0); // In OV uninitialised data is not supported, so we create a tensor filled with zeros with a given shape and type. auto value = context.mark_node(v0::Constant::create(element::f32, Shape{}, {0})); @@ -185,8 +187,7 @@ OutputVector translate_empty(NodeContext& context) { } return {empty}; }; - } // namespace op } // namespace pytorch } // namespace frontend -} // namespace ov \ No newline at end of file +} // namespace ov diff --git a/src/frontends/pytorch/src/op/gelu.cpp b/src/frontends/pytorch/src/op/gelu.cpp index 598f6865b3e2b0..c64ea647688584 100644 --- a/src/frontends/pytorch/src/op/gelu.cpp +++ b/src/frontends/pytorch/src/op/gelu.cpp @@ -12,7 +12,7 @@ namespace frontend { namespace pytorch { namespace op { -OutputVector translate_gelu(NodeContext& context) { +OutputVector translate_gelu(const NodeContext& context) { num_inputs_check(context, 2, 2); auto x = context.get_input(0); auto approximate = context.const_input(1); diff --git a/src/frontends/pytorch/src/op/get_attr.cpp b/src/frontends/pytorch/src/op/get_attr.cpp index 3575a5210a8518..1d0ae0e4d13d1b 100644 --- a/src/frontends/pytorch/src/op/get_attr.cpp +++ b/src/frontends/pytorch/src/op/get_attr.cpp @@ -12,7 +12,7 @@ namespace frontend { namespace pytorch { namespace op { -OutputVector translate_get_attr(NodeContext& context) { +OutputVector translate_get_attr(const NodeContext& context) { auto res = context.get_decoder()->try_decode_get_attr(); FRONT_END_OP_CONVERSION_CHECK(res.size() > 0, "GetAttr must have at least one output."); return res; diff --git a/src/frontends/pytorch/src/op/getitem.cpp b/src/frontends/pytorch/src/op/getitem.cpp index 1bf9f4a0e8a274..0a1243196f4d6c 100644 --- a/src/frontends/pytorch/src/op/getitem.cpp +++ b/src/frontends/pytorch/src/op/getitem.cpp @@ -13,7 +13,7 @@ namespace frontend { namespace pytorch { namespace op { -OutputVector translate_getitem(NodeContext& context) { +OutputVector translate_getitem(const NodeContext& context) { num_inputs_check(context, 2, 2); auto input = context.get_input(0); if (std::dynamic_pointer_cast(input.get_node_shared_ptr())) { diff --git a/src/frontends/pytorch/src/op/glu.cpp b/src/frontends/pytorch/src/op/glu.cpp index e650e9c4a54c05..dbe979fb1f2870 100644 --- a/src/frontends/pytorch/src/op/glu.cpp +++ b/src/frontends/pytorch/src/op/glu.cpp @@ -16,7 +16,7 @@ namespace op { using namespace ov::op; -OutputVector translate_glu(NodeContext& context) { +OutputVector translate_glu(const NodeContext& context) { num_inputs_check(context, 2, 2); auto x = context.get_input(0); auto dim = context.input_is_none(1) ? context.mark_node(v0::Constant::create(element::i32, Shape{}, {-1})) diff --git a/src/frontends/pytorch/src/op/grid_sampler.cpp b/src/frontends/pytorch/src/op/grid_sampler.cpp index 9011abd8566a67..8c603813d888f7 100644 --- a/src/frontends/pytorch/src/op/grid_sampler.cpp +++ b/src/frontends/pytorch/src/op/grid_sampler.cpp @@ -13,7 +13,7 @@ namespace op { using namespace ov::op; -OutputVector translate_grid_sampler(NodeContext& context) { +OutputVector translate_grid_sampler(const NodeContext& context) { num_inputs_check(context, 4, 5); auto x = context.get_input(0); auto grid = context.get_input(1); diff --git a/src/frontends/pytorch/src/op/group_norm.cpp b/src/frontends/pytorch/src/op/group_norm.cpp index 7b3ac53bbb4772..6ce36aac6601b1 100644 --- a/src/frontends/pytorch/src/op/group_norm.cpp +++ b/src/frontends/pytorch/src/op/group_norm.cpp @@ -20,7 +20,7 @@ namespace op { using namespace ov::op; -OutputVector translate_group_norm(NodeContext& context) { +OutputVector translate_group_norm(const NodeContext& context) { // aten::group_norm(Tensor input, int num_groups, Tensor? weight=None, Tensor? bias=None, float // eps=1.0000000000000001e-05, bool cudnn_enabled=True) -> Tensor num_inputs_check(context, 2, 6); diff --git a/src/frontends/pytorch/src/op/hardtanh.cpp b/src/frontends/pytorch/src/op/hardtanh.cpp index 52551a259a97d2..a85bedbf00626b 100644 --- a/src/frontends/pytorch/src/op/hardtanh.cpp +++ b/src/frontends/pytorch/src/op/hardtanh.cpp @@ -11,7 +11,7 @@ namespace frontend { namespace pytorch { namespace op { -OutputVector translate_hardtanh(NodeContext& context) { +OutputVector translate_hardtanh(const NodeContext& context) { num_inputs_check(context, 1, 3); float min = -1; float max = 1; diff --git a/src/frontends/pytorch/src/op/if.cpp b/src/frontends/pytorch/src/op/if.cpp index 1e5d3a26778cc1..7fb3ecce123a26 100644 --- a/src/frontends/pytorch/src/op/if.cpp +++ b/src/frontends/pytorch/src/op/if.cpp @@ -13,7 +13,7 @@ namespace frontend { namespace pytorch { namespace op { -OutputVector translate_if(NodeContext& context) { +OutputVector translate_if(const NodeContext& context) { auto if_node = std::make_shared(context.get_input(0)); context.mark_node(if_node); auto decoder = context.get_decoder(); diff --git a/src/frontends/pytorch/src/op/im2col.cpp b/src/frontends/pytorch/src/op/im2col.cpp index 12fb4f3b7c4a04..718e0eadaa4ca0 100644 --- a/src/frontends/pytorch/src/op/im2col.cpp +++ b/src/frontends/pytorch/src/op/im2col.cpp @@ -56,7 +56,7 @@ std::shared_ptr get_im2col_indices_along_dim(const NodeContext& context, } } // namespace -OutputVector translate_im2col(NodeContext& context) { +OutputVector translate_im2col(const NodeContext& context) { num_inputs_check(context, 5, 5); auto input = context.get_input(0); auto kernel_size = context.const_input>(1); diff --git a/src/frontends/pytorch/src/op/index_put_.cpp b/src/frontends/pytorch/src/op/index_put_.cpp index 1ce4ea0e96d6a1..d8a599eaf5abcf 100644 --- a/src/frontends/pytorch/src/op/index_put_.cpp +++ b/src/frontends/pytorch/src/op/index_put_.cpp @@ -10,9 +10,7 @@ namespace frontend { namespace pytorch { namespace op { -using namespace ov::op; - -OutputVector translate_index_put_(NodeContext& context) { +OutputVector translate_index_put_(const NodeContext& context) { // Pass as PtFrameworkNode to register as `inplace_op`. Conversion to OV operators is done as transformation. auto node = std::make_shared(context.get_decoder(), context.inputs()); return {context.mark_node(node)}; diff --git a/src/frontends/pytorch/src/op/instance_norm.cpp b/src/frontends/pytorch/src/op/instance_norm.cpp index b00b7bff260bd1..ff53bd11e92ffb 100644 --- a/src/frontends/pytorch/src/op/instance_norm.cpp +++ b/src/frontends/pytorch/src/op/instance_norm.cpp @@ -88,7 +88,7 @@ OutputVector translate_instance_norm_train(const NodeContext& context, } // namespace -OutputVector translate_instance_norm(NodeContext& context) { +OutputVector translate_instance_norm(const NodeContext& context) { num_inputs_check(context, 8, 9); auto input = context.get_input(0); auto eps = context.const_input(7); diff --git a/src/frontends/pytorch/src/op/int.cpp b/src/frontends/pytorch/src/op/int.cpp index e11397cb6f11e3..5a407a1a7254c3 100644 --- a/src/frontends/pytorch/src/op/int.cpp +++ b/src/frontends/pytorch/src/op/int.cpp @@ -11,7 +11,7 @@ namespace frontend { namespace pytorch { namespace op { -OutputVector translate_int(NodeContext& context) { +OutputVector translate_int(const NodeContext& context) { num_inputs_check(context, 1, 1); return {context.mark_node(std::make_shared(context.get_input(0), element::i32))}; }; diff --git a/src/frontends/pytorch/src/op/layer_norm.cpp b/src/frontends/pytorch/src/op/layer_norm.cpp index c954110111e799..204d7164531c72 100644 --- a/src/frontends/pytorch/src/op/layer_norm.cpp +++ b/src/frontends/pytorch/src/op/layer_norm.cpp @@ -16,7 +16,7 @@ namespace op { using namespace ov::op; -OutputVector translate_layer_norm(NodeContext& context) { +OutputVector translate_layer_norm(const NodeContext& context) { num_inputs_check(context, 5, 6); auto eps = context.const_input(4); auto normalized_shape = context.const_input(1); diff --git a/src/frontends/pytorch/src/op/len.cpp b/src/frontends/pytorch/src/op/len.cpp index 71f8bdf3a32b88..9a22658500913f 100644 --- a/src/frontends/pytorch/src/op/len.cpp +++ b/src/frontends/pytorch/src/op/len.cpp @@ -16,7 +16,7 @@ namespace op { using namespace ov::op; -OutputVector translate_len(NodeContext& context) { +OutputVector translate_len(const NodeContext& context) { num_inputs_check(context, 1, 1); auto const_0 = context.mark_node(v0::Constant::create(element::i32, Shape{1}, {0})); auto const_1 = context.mark_node(v0::Constant::create(element::i32, Shape{1}, {1})); diff --git a/src/frontends/pytorch/src/op/linear.cpp b/src/frontends/pytorch/src/op/linear.cpp index e94ff7c9168003..8288220f320251 100644 --- a/src/frontends/pytorch/src/op/linear.cpp +++ b/src/frontends/pytorch/src/op/linear.cpp @@ -11,7 +11,7 @@ namespace frontend { namespace pytorch { namespace op { -OutputVector translate_linear(NodeContext& context) { +OutputVector translate_linear(const NodeContext& context) { // schema: aten::linear(Tensor input, Tensor weight, Tensor? bias=None) -> Tensor num_inputs_check(context, 2, 3); auto x = context.get_input(0); diff --git a/src/frontends/pytorch/src/op/list_construct.cpp b/src/frontends/pytorch/src/op/list_construct.cpp index e69188e23d89a3..e58a3c4744ff61 100644 --- a/src/frontends/pytorch/src/op/list_construct.cpp +++ b/src/frontends/pytorch/src/op/list_construct.cpp @@ -15,7 +15,7 @@ namespace op { using namespace ov::op; -OutputVector translate_list_construct(NodeContext& context) { +OutputVector translate_list_construct(const NodeContext& context) { // Process the case when prim::ListConstruct has all inputs constant auto const_0 = context.mark_node(v0::Constant::create(element::i32, Shape{}, {0})); ov::OutputVector consts; diff --git a/src/frontends/pytorch/src/op/log.cpp b/src/frontends/pytorch/src/op/log.cpp index 85947b7694ee06..808dff6ed32822 100644 --- a/src/frontends/pytorch/src/op/log.cpp +++ b/src/frontends/pytorch/src/op/log.cpp @@ -17,7 +17,7 @@ namespace op { using namespace ov::op; -OutputVector translate_log(NodeContext& context) { +OutputVector translate_log(const NodeContext& context) { // torch.log returns a tensor with the natural logarithm of the elements of input. num_inputs_check(context, 1, 1); auto x = context.get_input(0); @@ -26,7 +26,7 @@ OutputVector translate_log(NodeContext& context) { return {log}; }; -OutputVector translate_log2(NodeContext& context) { +OutputVector translate_log2(const NodeContext& context) { // torch.log2 returns a tensor with the logarithm to the base 2 of the elements of input. num_inputs_check(context, 1, 1); auto x = context.get_input(0); diff --git a/src/frontends/pytorch/src/op/loop.cpp b/src/frontends/pytorch/src/op/loop.cpp index 7bf03cfcd30138..36369ea63bd4ee 100644 --- a/src/frontends/pytorch/src/op/loop.cpp +++ b/src/frontends/pytorch/src/op/loop.cpp @@ -13,7 +13,7 @@ namespace frontend { namespace pytorch { namespace op { -OutputVector translate_loop(NodeContext& context) { +OutputVector translate_loop(const NodeContext& context) { const auto& inputs = context.inputs(); FRONT_END_OP_CONVERSION_CHECK(inputs.size() >= 2, "Loop must have at least 2 inputs."); auto loop = std::make_shared(inputs[0], inputs[1]); diff --git a/src/frontends/pytorch/src/op/masked_fill.cpp b/src/frontends/pytorch/src/op/masked_fill.cpp index 2a071755b3a145..5ed090e0b619b7 100644 --- a/src/frontends/pytorch/src/op/masked_fill.cpp +++ b/src/frontends/pytorch/src/op/masked_fill.cpp @@ -18,7 +18,7 @@ namespace op { using namespace ov::op; -OutputVector translate_masked_fill(NodeContext& context) { +OutputVector translate_masked_fill(const NodeContext& context) { num_inputs_check(context, 3, 3); auto data = context.get_input(0); auto mask = context.get_input(1); diff --git a/src/frontends/pytorch/src/op/max_poolnd.cpp b/src/frontends/pytorch/src/op/max_poolnd.cpp index f594b0a2b0798c..f756b1488ce9ea 100644 --- a/src/frontends/pytorch/src/op/max_poolnd.cpp +++ b/src/frontends/pytorch/src/op/max_poolnd.cpp @@ -13,7 +13,7 @@ namespace op { using namespace ov::op; -OutputVector translate_max_poolnd(NodeContext& context) { +OutputVector translate_max_poolnd(const NodeContext& context) { num_inputs_check(context, 6, 6); auto kernel = context.const_input(1); auto strides = context.const_input(2); diff --git a/src/frontends/pytorch/src/op/mean.cpp b/src/frontends/pytorch/src/op/mean.cpp index b7a5acfb6fb22f..46c42f6be1a4b7 100644 --- a/src/frontends/pytorch/src/op/mean.cpp +++ b/src/frontends/pytorch/src/op/mean.cpp @@ -11,7 +11,7 @@ namespace frontend { namespace pytorch { namespace op { -OutputVector translate_mean(NodeContext& context) { +OutputVector translate_mean(const NodeContext& context) { num_inputs_check(context, 3, 4); auto x = context.get_input(0); auto y = context.get_input(1); diff --git a/src/frontends/pytorch/src/op/meshgrid.cpp b/src/frontends/pytorch/src/op/meshgrid.cpp index 841de80fcbf494..c9b5833ae68d60 100644 --- a/src/frontends/pytorch/src/op/meshgrid.cpp +++ b/src/frontends/pytorch/src/op/meshgrid.cpp @@ -10,7 +10,7 @@ namespace frontend { namespace pytorch { namespace op { -OutputVector translate_meshgrid(NodeContext& context) { +OutputVector translate_meshgrid(const NodeContext& context) { std::string indexing = "ij"; if (!context.input_is_none(1)) { indexing = context.const_input(1); diff --git a/src/frontends/pytorch/src/op/min_max.cpp b/src/frontends/pytorch/src/op/min_max.cpp index 34c70219f1137c..898403bf82b7cf 100644 --- a/src/frontends/pytorch/src/op/min_max.cpp +++ b/src/frontends/pytorch/src/op/min_max.cpp @@ -20,7 +20,7 @@ namespace op { using namespace ov::op; -OutputVector translate_max(NodeContext& context) { +OutputVector translate_max(const NodeContext& context) { // torch.max (same for torch.min) actually has two interfaces smashed together: // torch.max(x, dim, keepdim) and torch.max(x, y) num_inputs_check(context, 1, 3); @@ -49,7 +49,7 @@ OutputVector translate_max(NodeContext& context) { return {values, indicies}; }; -OutputVector translate_min(NodeContext& context) { +OutputVector translate_min(const NodeContext& context) { // torch.min (same for torch.max) actually has two interfaces smashed together: // torch.min(x, dim, keepdim) and torch.min(x, y) num_inputs_check(context, 1, 3); diff --git a/src/frontends/pytorch/src/op/narrow.cpp b/src/frontends/pytorch/src/op/narrow.cpp index a212b22503434e..ffae7d2ec55a03 100644 --- a/src/frontends/pytorch/src/op/narrow.cpp +++ b/src/frontends/pytorch/src/op/narrow.cpp @@ -16,7 +16,7 @@ namespace op { using namespace ov::op; -OutputVector translate_narrow(NodeContext& context) { +OutputVector translate_narrow(const NodeContext& context) { num_inputs_check(context, 4, 4); auto const_1 = context.mark_node(v0::Constant::create(element::i32, Shape{1}, {1})); diff --git a/src/frontends/pytorch/src/op/neg.cpp b/src/frontends/pytorch/src/op/neg.cpp index e902eb0f21fefb..423118c3b2fc24 100644 --- a/src/frontends/pytorch/src/op/neg.cpp +++ b/src/frontends/pytorch/src/op/neg.cpp @@ -15,7 +15,7 @@ namespace op { using namespace ov::op; -OutputVector translate_neg(NodeContext& context) { +OutputVector translate_neg(const NodeContext& context) { num_inputs_check(context, 1, 1); auto x = context.get_input(0); auto const_neg_1 = context.mark_node(v0::Constant::create(element::i32, Shape{}, {-1})); diff --git a/src/frontends/pytorch/src/op/nms.cpp b/src/frontends/pytorch/src/op/nms.cpp index 2454d94a78e6a8..86ecb3df73cf87 100644 --- a/src/frontends/pytorch/src/op/nms.cpp +++ b/src/frontends/pytorch/src/op/nms.cpp @@ -18,7 +18,7 @@ namespace op { using namespace ov::op; -OutputVector translate_nms(NodeContext& context) { +OutputVector translate_nms(const NodeContext& context) { num_inputs_check(context, 3, 3); auto const_0 = context.mark_node(v0::Constant::create(element::i32, Shape{}, {0})); auto const_1 = context.mark_node(v0::Constant::create(element::i32, Shape{}, {1})); diff --git a/src/frontends/pytorch/src/op/nonzero.cpp b/src/frontends/pytorch/src/op/nonzero.cpp index 80edef3f079b6b..29a6aa51175008 100644 --- a/src/frontends/pytorch/src/op/nonzero.cpp +++ b/src/frontends/pytorch/src/op/nonzero.cpp @@ -15,7 +15,7 @@ namespace op { using namespace ov::op; -OutputVector translate_nonzero(NodeContext& context) { +OutputVector translate_nonzero(const NodeContext& context) { num_inputs_check(context, 1, 1); auto cond = context.get_input(0); auto non_zero = context.mark_node(std::make_shared(cond)); diff --git a/src/frontends/pytorch/src/op/norm.cpp b/src/frontends/pytorch/src/op/norm.cpp index d35c0fec25867f..34a0bdd01c4cfa 100644 --- a/src/frontends/pytorch/src/op/norm.cpp +++ b/src/frontends/pytorch/src/op/norm.cpp @@ -20,7 +20,7 @@ namespace op { using namespace ov::op; -OutputVector translate_norm(NodeContext& context) { +OutputVector translate_norm(const NodeContext& context) { num_inputs_check(context, 4, 4); auto input_tensor = context.get_input(0); auto p = context.const_input(1); diff --git a/src/frontends/pytorch/src/op/numel.cpp b/src/frontends/pytorch/src/op/numel.cpp index 721ed7e173bdc6..a4d2a836c6a2bd 100644 --- a/src/frontends/pytorch/src/op/numel.cpp +++ b/src/frontends/pytorch/src/op/numel.cpp @@ -10,7 +10,7 @@ namespace frontend { namespace pytorch { namespace op { -OutputVector translate_numel(NodeContext& context) { +OutputVector translate_numel(const NodeContext& context) { num_inputs_check(context, 1, 1); return {numel(context, context.get_input(0))}; }; diff --git a/src/frontends/pytorch/src/op/pad.cpp b/src/frontends/pytorch/src/op/pad.cpp index 8a0568ece9cc3d..170544654542e5 100644 --- a/src/frontends/pytorch/src/op/pad.cpp +++ b/src/frontends/pytorch/src/op/pad.cpp @@ -22,7 +22,7 @@ namespace op { using namespace ov::op; -OutputVector translate_pad(NodeContext& context) { +OutputVector translate_pad(const NodeContext& context) { num_inputs_check(context, 2, 4); auto data = context.get_input(0); auto paddings = context.const_input>(1); diff --git a/src/frontends/pytorch/src/op/pow.cpp b/src/frontends/pytorch/src/op/pow.cpp index d418f3385340bf..d3a39694bf3953 100644 --- a/src/frontends/pytorch/src/op/pow.cpp +++ b/src/frontends/pytorch/src/op/pow.cpp @@ -11,7 +11,7 @@ namespace frontend { namespace pytorch { namespace op { -OutputVector translate_pow(NodeContext& context) { +OutputVector translate_pow(const NodeContext& context) { num_inputs_check(context, 2, 2); auto lhs = context.get_input(0); auto rhs = context.get_input(1); diff --git a/src/frontends/pytorch/src/op/pythonop.cpp b/src/frontends/pytorch/src/op/pythonop.cpp index 36a4b388738c02..4aa142f04b58ed 100644 --- a/src/frontends/pytorch/src/op/pythonop.cpp +++ b/src/frontends/pytorch/src/op/pythonop.cpp @@ -11,7 +11,7 @@ namespace frontend { namespace pytorch { namespace op { -OutputVector translate_pythonop(NodeContext& context) { +OutputVector translate_pythonop(const NodeContext& context) { auto decoder = context.get_decoder(); FRONT_END_OP_CONVERSION_CHECK(decoder->get_subgraph_size() == 1, "PythonOp must have 1 subgraph to be able to translate it to OV."); diff --git a/src/frontends/pytorch/src/op/reciprocal.cpp b/src/frontends/pytorch/src/op/reciprocal.cpp index 67c5bdba78b39f..38b12fee06cb18 100644 --- a/src/frontends/pytorch/src/op/reciprocal.cpp +++ b/src/frontends/pytorch/src/op/reciprocal.cpp @@ -15,7 +15,7 @@ namespace op { using namespace ov::op; -OutputVector translate_reciprocal(NodeContext& context) { +OutputVector translate_reciprocal(const NodeContext& context) { num_inputs_check(context, 1, 1); auto x = context.get_input(0); auto const_neg_1 = context.mark_node(v0::Constant::create(element::i32, Shape{}, {-1})); diff --git a/src/frontends/pytorch/src/op/relu6.cpp b/src/frontends/pytorch/src/op/relu6.cpp index 5dd5906061bc7f..08996811249dcc 100644 --- a/src/frontends/pytorch/src/op/relu6.cpp +++ b/src/frontends/pytorch/src/op/relu6.cpp @@ -11,7 +11,7 @@ namespace frontend { namespace pytorch { namespace op { -OutputVector translate_relu6(NodeContext& context) { +OutputVector translate_relu6(const NodeContext& context) { num_inputs_check(context, 1, 1); auto x = context.get_input(0); return {context.mark_node(std::make_shared(x, 0., 6.))}; diff --git a/src/frontends/pytorch/src/op/remainder.cpp b/src/frontends/pytorch/src/op/remainder.cpp index 55d33e00c3f321..622e20eba52fa4 100644 --- a/src/frontends/pytorch/src/op/remainder.cpp +++ b/src/frontends/pytorch/src/op/remainder.cpp @@ -16,7 +16,7 @@ namespace op { using namespace ov::op; -OutputVector translate_remainder(NodeContext& context) { +OutputVector translate_remainder(const NodeContext& context) { num_inputs_check(context, 2, 2); auto x = context.get_input(0); auto y = context.get_input(1); diff --git a/src/frontends/pytorch/src/op/repeat.cpp b/src/frontends/pytorch/src/op/repeat.cpp index 574951aaf82c20..15dc03a466ec92 100644 --- a/src/frontends/pytorch/src/op/repeat.cpp +++ b/src/frontends/pytorch/src/op/repeat.cpp @@ -16,7 +16,7 @@ namespace op { using namespace ov::op; -OutputVector translate_repeat(NodeContext& context) { +OutputVector translate_repeat(const NodeContext& context) { num_inputs_check(context, 2, 2); auto x = context.get_input(0); auto repeats = context.get_input(1); diff --git a/src/frontends/pytorch/src/op/repeat_interleave.cpp b/src/frontends/pytorch/src/op/repeat_interleave.cpp index 06d8333e04cceb..64971f6e3f28bc 100644 --- a/src/frontends/pytorch/src/op/repeat_interleave.cpp +++ b/src/frontends/pytorch/src/op/repeat_interleave.cpp @@ -34,7 +34,7 @@ OutputVector generate_indices_from_repeats_tensor(const NodeContext& context, co }; } // namespace -OutputVector translate_repeat_interleave(NodeContext& context) { +OutputVector translate_repeat_interleave(const NodeContext& context) { num_inputs_check(context, 2, 3); // constants auto const_0 = context.mark_node(v0::Constant::create(element::i32, Shape{}, {0})); diff --git a/src/frontends/pytorch/src/op/reshape.cpp b/src/frontends/pytorch/src/op/reshape.cpp index b0d669e47be63b..c5c33f4f6e61da 100644 --- a/src/frontends/pytorch/src/op/reshape.cpp +++ b/src/frontends/pytorch/src/op/reshape.cpp @@ -12,7 +12,7 @@ namespace frontend { namespace pytorch { namespace op { -OutputVector translate_reshape(NodeContext& context) { +OutputVector translate_reshape(const NodeContext& context) { // Translation is used by both aten::view and aten::reshape. // Schema: aten::view(Tensor input, int[] shape) -> Tensor // Schema: aten::reshape(Tensor input, int[] shape) -> Tensor diff --git a/src/frontends/pytorch/src/op/reshape_as.cpp b/src/frontends/pytorch/src/op/reshape_as.cpp index 63d18ee468f087..1c9be43d4a2d15 100644 --- a/src/frontends/pytorch/src/op/reshape_as.cpp +++ b/src/frontends/pytorch/src/op/reshape_as.cpp @@ -12,7 +12,7 @@ namespace frontend { namespace pytorch { namespace op { -OutputVector translate_reshape_as(NodeContext& context) { +OutputVector translate_reshape_as(const NodeContext& context) { num_inputs_check(context, 2, 2); auto input_tensor = context.get_input(0); auto shape_tesnor = context.get_input(1); diff --git a/src/frontends/pytorch/src/op/roi_align.cpp b/src/frontends/pytorch/src/op/roi_align.cpp index d3a389c59654b9..fb2ad3a41d7455 100644 --- a/src/frontends/pytorch/src/op/roi_align.cpp +++ b/src/frontends/pytorch/src/op/roi_align.cpp @@ -19,7 +19,7 @@ namespace op { using namespace ov::op; -OutputVector translate_roi_align(NodeContext& context) { +OutputVector translate_roi_align(const NodeContext& context) { num_inputs_check(context, 7, 7); auto const_1 = context.mark_node(v0::Constant::create(element::i32, Shape{1}, {1})); auto const_neg_1 = context.mark_node(v0::Constant::create(element::i32, Shape{1}, {-1})); diff --git a/src/frontends/pytorch/src/op/roll.cpp b/src/frontends/pytorch/src/op/roll.cpp index b0aef51a6872b9..9f358368fbce8c 100644 --- a/src/frontends/pytorch/src/op/roll.cpp +++ b/src/frontends/pytorch/src/op/roll.cpp @@ -17,7 +17,7 @@ namespace op { using namespace ov::op; -OutputVector translate_roll(NodeContext& context) { +OutputVector translate_roll(const NodeContext& context) { num_inputs_check(context, 3, 3); const auto data = context.get_input(0); const auto shifts = context.get_input(1); diff --git a/src/frontends/pytorch/src/op/rsqrt.cpp b/src/frontends/pytorch/src/op/rsqrt.cpp index 9e9ba9330c87ca..d4f56040da27c5 100644 --- a/src/frontends/pytorch/src/op/rsqrt.cpp +++ b/src/frontends/pytorch/src/op/rsqrt.cpp @@ -16,7 +16,7 @@ namespace op { using namespace ov::op; -OutputVector translate_rsqrt(NodeContext& context) { +OutputVector translate_rsqrt(const NodeContext& context) { num_inputs_check(context, 1, 1); auto data = context.get_input(0); auto input_shape = context.mark_node(std::make_shared(data, element::i32)); diff --git a/src/frontends/pytorch/src/op/rsub.cpp b/src/frontends/pytorch/src/op/rsub.cpp index 21b109e9037182..200094b6eecede 100644 --- a/src/frontends/pytorch/src/op/rsub.cpp +++ b/src/frontends/pytorch/src/op/rsub.cpp @@ -15,7 +15,7 @@ namespace op { using namespace ov::op; -OutputVector translate_rsub(NodeContext& context) { +OutputVector translate_rsub(const NodeContext& context) { num_inputs_check(context, 3, 3); auto self = context.get_input(0); auto other = context.get_input(1); diff --git a/src/frontends/pytorch/src/op/select.cpp b/src/frontends/pytorch/src/op/select.cpp index c6d7cb0048f325..ea5255f2410ffa 100644 --- a/src/frontends/pytorch/src/op/select.cpp +++ b/src/frontends/pytorch/src/op/select.cpp @@ -20,7 +20,7 @@ namespace op { using namespace ov::op; -OutputVector translate_select(NodeContext& context) { +OutputVector translate_select(const NodeContext& context) { num_inputs_check(context, 3, 3); auto const_1 = context.mark_node(v0::Constant::create(element::i32, Shape{1}, {1})); auto const_minus_1 = context.mark_node(v0::Constant::create(element::i32, Shape{1}, {-1})); diff --git a/src/frontends/pytorch/src/op/selu.cpp b/src/frontends/pytorch/src/op/selu.cpp index 9ec08af77facc8..aef54491e74bd1 100644 --- a/src/frontends/pytorch/src/op/selu.cpp +++ b/src/frontends/pytorch/src/op/selu.cpp @@ -16,7 +16,7 @@ namespace op { using namespace ov::op; -OutputVector translate_selu(NodeContext& context) { +OutputVector translate_selu(const NodeContext& context) { num_inputs_check(context, 1, 1); auto x = context.get_input(0); auto alpha = context.mark_node(v0::Constant::create(element::f64, Shape{}, {1.6732632423543772848170429916717})); diff --git a/src/frontends/pytorch/src/op/set_item.cpp b/src/frontends/pytorch/src/op/set_item.cpp index 9ce33fce24e8d2..ef11a2a391c39a 100644 --- a/src/frontends/pytorch/src/op/set_item.cpp +++ b/src/frontends/pytorch/src/op/set_item.cpp @@ -15,7 +15,7 @@ namespace op { using namespace ov::op; -OutputVector translate_set_item(NodeContext& context) { +OutputVector translate_set_item(const NodeContext& context) { // schema: aten::_set_item.t(t[](a!) l, int idx, t(b -> *) el) -> t[](a!) // _set_item inserts element in list num_inputs_check(context, 3, 3); diff --git a/src/frontends/pytorch/src/op/size.cpp b/src/frontends/pytorch/src/op/size.cpp index a4d70cef19ad2c..289facd0fe7f44 100644 --- a/src/frontends/pytorch/src/op/size.cpp +++ b/src/frontends/pytorch/src/op/size.cpp @@ -15,7 +15,7 @@ namespace op { using namespace ov::op; -OutputVector translate_size(NodeContext& context) { +OutputVector translate_size(const NodeContext& context) { num_inputs_check(context, 1, 2); auto shape = context.mark_node(std::make_shared(context.get_input(0), element::i32)); if (context.input_is_none(1)) { diff --git a/src/frontends/pytorch/src/op/slice.cpp b/src/frontends/pytorch/src/op/slice.cpp index 756efc1590b796..391b1c834fd535 100644 --- a/src/frontends/pytorch/src/op/slice.cpp +++ b/src/frontends/pytorch/src/op/slice.cpp @@ -18,7 +18,7 @@ namespace op { using namespace ov::op; -OutputVector translate_slice(NodeContext& context) { +OutputVector translate_slice(const NodeContext& context) { // aten::slice.t(t[] l, int? start=None, int? end=None, int step=1) -> (t[]) // aten::slice.Tensor(Tensor(a) self, int dim=0, int? start=None, int? end=None, int step=1) -> (Tensor(a)) ov::Output dim; diff --git a/src/frontends/pytorch/src/op/softmax.cpp b/src/frontends/pytorch/src/op/softmax.cpp index b7d0d5fdc7ca3b..10c3afea7cda0b 100644 --- a/src/frontends/pytorch/src/op/softmax.cpp +++ b/src/frontends/pytorch/src/op/softmax.cpp @@ -12,11 +12,15 @@ namespace frontend { namespace pytorch { namespace op { -OutputVector translate_softmax(NodeContext& context) { - num_inputs_check(context, 2, 2); +using namespace ov::op; +OutputVector translate_softmax(const NodeContext& context) { + num_inputs_check(context, 2, 3); auto x = context.get_input(0); auto axis = context.const_input(1); - return {context.mark_node(std::make_shared(x, axis))}; + if (!context.input_is_none(2)) { + x = apply_dtype(context, 2, x); + } + return {context.mark_node(std::make_shared(x, axis))}; }; } // namespace op diff --git a/src/frontends/pytorch/src/op/sort.cpp b/src/frontends/pytorch/src/op/sort.cpp index c0e54d54d9be2a..715a7a52d3efe8 100644 --- a/src/frontends/pytorch/src/op/sort.cpp +++ b/src/frontends/pytorch/src/op/sort.cpp @@ -9,7 +9,7 @@ namespace frontend { namespace pytorch { namespace op { -OutputVector translate_sort(NodeContext& context) { +OutputVector translate_sort(const NodeContext& context) { num_inputs_check(context, 3, 4); const auto input_tensor = context.get_input(0); bool stable, descending; @@ -40,7 +40,7 @@ OutputVector translate_sort(NodeContext& context) { return topk->outputs(); }; -OutputVector translate_argsort(NodeContext& context) { +OutputVector translate_argsort(const NodeContext& context) { auto sort = translate_sort(context); return {sort[1]}; }; diff --git a/src/frontends/pytorch/src/op/square.cpp b/src/frontends/pytorch/src/op/square.cpp index 7194aafd9abb36..2310fda75aa574 100644 --- a/src/frontends/pytorch/src/op/square.cpp +++ b/src/frontends/pytorch/src/op/square.cpp @@ -14,7 +14,7 @@ namespace op { using namespace ov::op; -OutputVector translate_square(NodeContext& context) { +OutputVector translate_square(const NodeContext& context) { num_inputs_check(context, 1, 1); auto input_0 = context.get_input(0); auto const_2 = context.mark_node(v0::Constant::create(input_0.get_element_type(), Shape{1}, {2})); diff --git a/src/frontends/pytorch/src/op/squeeze.cpp b/src/frontends/pytorch/src/op/squeeze.cpp index dacf2c55a4d596..fb15801367a564 100644 --- a/src/frontends/pytorch/src/op/squeeze.cpp +++ b/src/frontends/pytorch/src/op/squeeze.cpp @@ -12,7 +12,7 @@ namespace frontend { namespace pytorch { namespace op { -OutputVector translate_squeeze(NodeContext& context) { +OutputVector translate_squeeze(const NodeContext& context) { num_inputs_check(context, 1, 2); auto x = context.get_input(0); if (context.input_is_none(1)) { diff --git a/src/frontends/pytorch/src/op/sub.cpp b/src/frontends/pytorch/src/op/sub.cpp index fd449c12bbd2d3..94963ed9bdb61f 100644 --- a/src/frontends/pytorch/src/op/sub.cpp +++ b/src/frontends/pytorch/src/op/sub.cpp @@ -15,7 +15,7 @@ namespace op { using namespace ov::op; -OutputVector translate_sub(NodeContext& context) { +OutputVector translate_sub(const NodeContext& context) { num_inputs_check(context, 2, 3); auto x = context.get_input(0); auto y = context.get_input(1); diff --git a/src/frontends/pytorch/src/op/sum.cpp b/src/frontends/pytorch/src/op/sum.cpp index 3dc4601b1083a9..7a87dc0c507f6c 100644 --- a/src/frontends/pytorch/src/op/sum.cpp +++ b/src/frontends/pytorch/src/op/sum.cpp @@ -11,7 +11,7 @@ namespace frontend { namespace pytorch { namespace op { -OutputVector translate_sum(NodeContext& context) { +OutputVector translate_sum(const NodeContext& context) { num_inputs_check(context, 1, 3); bool keep_dims = false; ov::Output axes; diff --git a/src/frontends/pytorch/src/op/to.cpp b/src/frontends/pytorch/src/op/to.cpp index 6e5b0ebda639c4..2499b8346f5f02 100644 --- a/src/frontends/pytorch/src/op/to.cpp +++ b/src/frontends/pytorch/src/op/to.cpp @@ -16,7 +16,7 @@ namespace op { using namespace ov::op; -OutputVector translate_to(NodeContext& context) { +OutputVector translate_to(const NodeContext& context) { int dtype_idx; int memory_format_idx; if (context.get_input_size() == 5) { diff --git a/src/frontends/pytorch/src/op/topk.cpp b/src/frontends/pytorch/src/op/topk.cpp index 26addb856c6445..06916c4ea03e2f 100644 --- a/src/frontends/pytorch/src/op/topk.cpp +++ b/src/frontends/pytorch/src/op/topk.cpp @@ -15,7 +15,7 @@ namespace op { using namespace ov::op; -OutputVector translate_topk(NodeContext& context) { +OutputVector translate_topk(const NodeContext& context) { num_inputs_check(context, 5, 5); const auto input_tensor = context.get_input(0); const auto largest = context.const_input(3); diff --git a/src/frontends/pytorch/src/op/transpose.cpp b/src/frontends/pytorch/src/op/transpose.cpp index 60fee576613374..9a6cddb3ffb896 100644 --- a/src/frontends/pytorch/src/op/transpose.cpp +++ b/src/frontends/pytorch/src/op/transpose.cpp @@ -20,7 +20,7 @@ namespace op { using namespace ov::op; -OutputVector translate_transpose(NodeContext& context) { +OutputVector translate_transpose(const NodeContext& context) { num_inputs_check(context, 3, 3); auto dim0 = context.const_input(1); auto dim1 = context.const_input(2); diff --git a/src/frontends/pytorch/src/op/trilu.cpp b/src/frontends/pytorch/src/op/trilu.cpp index 1726cf2f895956..1ef4d50fd6affa 100644 --- a/src/frontends/pytorch/src/op/trilu.cpp +++ b/src/frontends/pytorch/src/op/trilu.cpp @@ -60,11 +60,11 @@ OutputVector translate_base_triu_tril(const NodeContext& context, bool upper) { } }; // namespace -OutputVector translate_triu(NodeContext& context) { +OutputVector translate_triu(const NodeContext& context) { return translate_base_triu_tril(context, true); }; -OutputVector translate_tril(NodeContext& context) { +OutputVector translate_tril(const NodeContext& context) { return translate_base_triu_tril(context, false); }; diff --git a/src/frontends/pytorch/src/op/unfold.cpp b/src/frontends/pytorch/src/op/unfold.cpp index 949f7991391b7e..e7aa129b2935cc 100644 --- a/src/frontends/pytorch/src/op/unfold.cpp +++ b/src/frontends/pytorch/src/op/unfold.cpp @@ -13,7 +13,7 @@ namespace frontend { namespace pytorch { namespace op { -OutputVector translate_unfold(NodeContext& context) { +OutputVector translate_unfold(const NodeContext& context) { num_inputs_check(context, 4, 4); // constants auto const_0 = context.mark_node(Constant::create(element::i32, Shape{}, {0})); diff --git a/src/frontends/pytorch/src/op/upsample.cpp b/src/frontends/pytorch/src/op/upsample.cpp index 111a07a28c70e9..484387b8f3931d 100644 --- a/src/frontends/pytorch/src/op/upsample.cpp +++ b/src/frontends/pytorch/src/op/upsample.cpp @@ -69,32 +69,32 @@ OutputVector base_translate_upsample(const NodeContext& context, }; } // namespace -OutputVector translate_upsample_linear1d(NodeContext& context) { +OutputVector translate_upsample_linear1d(const NodeContext& context) { return base_translate_upsample(context, v4::Interpolate::InterpolateMode::LINEAR_ONNX, 1); }; -OutputVector translate_upsample_bilinear2d(NodeContext& context) { +OutputVector translate_upsample_bilinear2d(const NodeContext& context) { return base_translate_upsample(context, v4::Interpolate::InterpolateMode::LINEAR_ONNX, 2); }; -OutputVector translate_upsample_trilinear3d(NodeContext& context) { +OutputVector translate_upsample_trilinear3d(const NodeContext& context) { return base_translate_upsample(context, v4::Interpolate::InterpolateMode::LINEAR_ONNX, 3); }; -OutputVector translate_upsample_nearest1d(NodeContext& context) { +OutputVector translate_upsample_nearest1d(const NodeContext& context) { return base_translate_upsample(context, v4::Interpolate::InterpolateMode::NEAREST, 1); }; -OutputVector translate_upsample_nearest2d(NodeContext& context) { +OutputVector translate_upsample_nearest2d(const NodeContext& context) { return base_translate_upsample(context, v4::Interpolate::InterpolateMode::NEAREST, 2); }; -OutputVector translate_upsample_nearest3d(NodeContext& context) { +OutputVector translate_upsample_nearest3d(const NodeContext& context) { return base_translate_upsample(context, v4::Interpolate::InterpolateMode::NEAREST, 3); }; // bicubic is only supported for 2d in pytorch -OutputVector translate_upsample_bicubic2d(NodeContext& context) { +OutputVector translate_upsample_bicubic2d(const NodeContext& context) { return base_translate_upsample(context, v4::Interpolate::InterpolateMode::CUBIC, 2); }; diff --git a/src/frontends/pytorch/src/op/var_mean.cpp b/src/frontends/pytorch/src/op/var_mean.cpp index 936038fecdcc2c..f021161722cd39 100644 --- a/src/frontends/pytorch/src/op/var_mean.cpp +++ b/src/frontends/pytorch/src/op/var_mean.cpp @@ -20,7 +20,7 @@ namespace op { using namespace ov::op; -OutputVector translate_var_mean(NodeContext& context) { +OutputVector translate_var_mean(const NodeContext& context) { num_inputs_check(context, 1, 4); auto data = context.get_input(0); bool unbiased = true; @@ -75,7 +75,7 @@ OutputVector translate_var_mean(NodeContext& context) { return {var, mean}; }; -OutputVector translate_var(NodeContext& context) { +OutputVector translate_var(const NodeContext& context) { auto res = translate_var_mean(context); return {res[0]}; } diff --git a/src/frontends/pytorch/src/op/where.cpp b/src/frontends/pytorch/src/op/where.cpp index 454d23938a2c0c..4a9de9f69edab8 100644 --- a/src/frontends/pytorch/src/op/where.cpp +++ b/src/frontends/pytorch/src/op/where.cpp @@ -14,7 +14,7 @@ namespace op { using namespace ov::op; -OutputVector translate_where(NodeContext& context) { +OutputVector translate_where(const NodeContext& context) { num_inputs_check(context, 1, 3); auto cond = context.get_input(0); FRONT_END_OP_CONVERSION_CHECK(!context.input_is_none(1), "aten::where(cond) unsupported"); diff --git a/src/frontends/pytorch/src/op_table.cpp b/src/frontends/pytorch/src/op_table.cpp index 0da3b11e5b1c99..c42024fa36f4df 100644 --- a/src/frontends/pytorch/src/op_table.cpp +++ b/src/frontends/pytorch/src/op_table.cpp @@ -12,7 +12,7 @@ namespace frontend { namespace pytorch { namespace op { -#define OP_CONVERTER(op) OutputVector op(NodeContext& node) +#define OP_CONVERTER(op) OutputVector op(const NodeContext& node) OP_CONVERTER(translate_adaptive_avg_pool3d); OP_CONVERTER(translate_adaptive_max_pool2d); @@ -25,6 +25,7 @@ OP_CONVERTER(translate_as_tensor); OP_CONVERTER(translate_avg_poolnd); OP_CONVERTER(translate_bool); OP_CONVERTER(translate_batch_norm); +OP_CONVERTER(translate_bitwise_not); OP_CONVERTER(translate_cat); OP_CONVERTER(translate_clamp); OP_CONVERTER(translate_constant); @@ -129,11 +130,12 @@ OP_CONVERTER(translate_zeros_like); } // namespace op -const std::map get_supported_ops() { +const std::map get_supported_ops() { return { {"aten::__and__", op::translate_1to1_match_2_inputs}, // TODO: cover numerical cases {"aten::__getitem__", op::translate_getitem}, {"aten::__not__", op::translate_1to1_match_1_inputs}, + {"aten::__or__", op::translate_1to1_match_2_inputs}, {"aten::_convolution", op::translate_convolution}, {"aten::_convolution_mode", op::translate_convolution_mode}, {"aten::_set_item", op::translate_set_item}, @@ -163,7 +165,9 @@ const std::map get_supported_ops() { {"aten::avg_pool1d", op::translate_avg_poolnd}, {"aten::avg_pool2d", op::translate_avg_poolnd}, {"aten::avg_pool3d", op::translate_avg_poolnd}, + {"aten::baddbmm", op::translate_addmm}, {"aten::batch_norm", op::translate_batch_norm}, + {"aten::bitwise_not", op::translate_bitwise_not}, {"aten::bmm", op::translate_1to1_match_2_inputs}, {"aten::Bool", op::translate_bool}, {"aten::cat", op::translate_cat}, @@ -254,6 +258,7 @@ const std::map get_supported_ops() { {"aten::narrow", op::translate_narrow}, {"aten::ne", op::translate_1to1_match_2_inputs_align_types}, {"aten::neg", op::translate_neg}, + {"aten::new_empty", op::translate_new_zeros}, {"aten::new_full", op::translate_new_full}, {"aten::new_ones", op::translate_new_ones}, {"aten::new_zeros", op::translate_new_zeros}, diff --git a/src/frontends/pytorch/src/op_table.hpp b/src/frontends/pytorch/src/op_table.hpp index 7a67c9101578b0..e15a988e98175b 100644 --- a/src/frontends/pytorch/src/op_table.hpp +++ b/src/frontends/pytorch/src/op_table.hpp @@ -10,7 +10,7 @@ namespace ov { namespace frontend { namespace pytorch { -const std::map get_supported_ops(); +const std::map get_supported_ops(); } // namespace pytorch } // namespace frontend diff --git a/src/frontends/pytorch/src/translate_session.cpp b/src/frontends/pytorch/src/translate_session.cpp index 025a325eebf939..376b466c0a268c 100644 --- a/src/frontends/pytorch/src/translate_session.cpp +++ b/src/frontends/pytorch/src/translate_session.cpp @@ -20,7 +20,7 @@ namespace pytorch { using namespace ov::op; TranslateSession::TranslateSession(const ov::frontend::InputModel::Ptr& input_model, - const std::map& translator_map) + const std::map& translator_map) : m_input_model(input_model), m_translator_map(translator_map), m_ov_model(nullptr) {} @@ -45,9 +45,9 @@ std::shared_ptr TranslateSession::convert_pytorch_model( const std::unordered_map& external_descriptors) { std::shared_ptr resulting_model; // define here to make a conversion in a nested scope { - ParameterVector parameters; - TensorMap tensor_map; // tensor map of the current context - std::set mutated_tensors; + auto parameters = std::make_shared(); + auto tensor_map = std::make_shared(); // tensor map of the current context + auto mutated_tensors = std::make_shared>(); // Go over all pytorch_model inputs and register them in the tensor map: auto inputs = pytorch_model->inputs(); @@ -74,7 +74,7 @@ std::shared_ptr TranslateSession::convert_pytorch_model( if (!input_node) { auto parameter = std::make_shared(type, pshape); encode_tensor_name(parameter->output(0), inputs.at(i), pytorch_model->get_input_debug_name(i)); - parameters.push_back(parameter); + parameters->push_back(parameter); input_node = parameter; auto order = pytorch_model->get_input_transpose_order(i); if (order.size() > 0 && !std::is_sorted(order.begin(), order.end())) { @@ -91,7 +91,7 @@ std::shared_ptr TranslateSession::convert_pytorch_model( input_node = transpose; } } - tensor_map[inputs.at(i)] = input_node; + (*tensor_map)[inputs.at(i)] = input_node; } auto node_visitor = [&](std::shared_ptr node) { @@ -102,7 +102,7 @@ std::shared_ptr TranslateSession::convert_pytorch_model( auto raw_inputs = node->inputs(); for (size_t i = 0; i < raw_inputs.size(); ++i) { auto input = raw_inputs.at(i); - if (tensor_map.find(input) == tensor_map.end()) { + if (tensor_map->find(input) == tensor_map->end()) { // Input refers value in the outer scope, need to create a new Parameter in the current scope // Linkage to external scope will be performed on the level of the parent operation (if or loop) // TODO: Eliminate duplication with the main code for Parameters creation @@ -111,18 +111,15 @@ std::shared_ptr TranslateSession::convert_pytorch_model( // TODO: Use special API to set custom type specification auto parameter = std::make_shared(element::dynamic, ps); // TODO: Missing get_input_transpose_order handling for not trivial layouts - tensor_map[input] = parameter; + (*tensor_map)[input] = parameter; // set name of parameter to the index of node in the model encode_tensor_name(parameter->output(0), input); - parameters.push_back(parameter); + parameters->push_back(parameter); } } - auto context = NodeContext(node, &tensor_map, ¶meters, external_tensor_map, this); + auto context = NodeContext(node, external_tensor_map, tensor_map, parameters, mutated_tensors, this); auto converted_outputs = convert_node(context); - auto mutated_t = context.get_mutated_tensors(); - mutated_tensors.insert(mutated_t.begin(), mutated_t.end()); - auto fw_outputs = node->outputs(); // Ops with subgraphs or with mutated inputs may have more outputs after conversion compared to pytorch ones FRONT_END_OP_CONVERSION_CHECK(fw_outputs.size() <= converted_outputs.size(), @@ -134,10 +131,10 @@ std::shared_ptr TranslateSession::convert_pytorch_model( // FIXME: Now it is not true for at least prim::Constant for (size_t i = 0; i < fw_outputs.size(); ++i) { size_t fw_tensor_id = node->output(i); - FRONT_END_GENERAL_CHECK(tensor_map.find(fw_tensor_id) == tensor_map.end(), + FRONT_END_GENERAL_CHECK(tensor_map->find(fw_tensor_id) == tensor_map->end(), "Duplicated producer for PT value with unique ID: ", fw_tensor_id); - tensor_map[fw_tensor_id] = converted_outputs[i]; + (*tensor_map)[fw_tensor_id] = converted_outputs[i]; encode_tensor_name(converted_outputs[i], fw_tensor_id, node->get_output_debug_name(i)); } }; @@ -148,14 +145,14 @@ std::shared_ptr TranslateSession::convert_pytorch_model( ResultVector results; for (size_t i = 0; i < pytorch_model->num_of_outputs(); ++i) { size_t id = pytorch_model->output(i); - if (tensor_map.find(id) == tensor_map.end()) { + if (tensor_map->find(id) == tensor_map->end()) { // Not found in this scope, adding Parameter to connect to external scope auto parameter = std::make_shared(element::dynamic, PartialShape::dynamic()); encode_tensor_name(parameter->output(0), id); - parameters.push_back(parameter); - tensor_map[id] = parameter; + parameters->push_back(parameter); + (*tensor_map)[id] = parameter; } - auto ov_output = tensor_map[id]; + auto ov_output = tensor_map->at(id); auto order = pytorch_model->get_output_transpose_order(i); FRONT_END_GENERAL_CHECK(order.size() == 0 || std::is_sorted(order.begin(), order.end()), "Output strides have wrong order."); @@ -168,32 +165,32 @@ std::shared_ptr TranslateSession::convert_pytorch_model( // Since parameters can be added we need to list all current parameters std::set param_names; - for (const auto& param : parameters) { + for (const auto& param : *parameters) { auto input_idx = decode_tensor_name(param->output(0)); param_names.insert(input_idx); } - for (const auto& tensor_id : mutated_tensors) { + for (const auto& tensor_id : *mutated_tensors) { if (param_names.count(tensor_id)) { - FRONT_END_GENERAL_CHECK(tensor_map.count(tensor_id), + FRONT_END_GENERAL_CHECK(tensor_map->count(tensor_id), "Tensor with id: ", tensor_id, " doesn't exist in tensor map."); // model input was mutated we need to make a result for it - auto mutated_tensor = tensor_map.at(tensor_id); + auto mutated_tensor = tensor_map->at(tensor_id); // empty external_tensor_map means this is main body of the model and we don't want to create // additional outputs in that case. if (mutated_tensor.get_target_inputs().empty() && !external_tensor_map.empty()) - results.push_back(std::make_shared(tensor_map.at(tensor_id))); + results.push_back(std::make_shared(tensor_map->at(tensor_id))); } } - resulting_model = std::make_shared(results, parameters); + resulting_model = std::make_shared(results, *parameters); // Did a conversion in a nested scope to automatically remove any holders of nodes except those in the graph } return resulting_model; } -OutputVector TranslateSession::convert_node(NodeContext& context) { +OutputVector TranslateSession::convert_node(const NodeContext& context) { try { auto it = m_translator_map.find(context.get_op_type()); if (it != m_translator_map.end()) { diff --git a/src/frontends/pytorch/src/translate_session.hpp b/src/frontends/pytorch/src/translate_session.hpp index e33ea31c63091d..4931c274984485 100644 --- a/src/frontends/pytorch/src/translate_session.hpp +++ b/src/frontends/pytorch/src/translate_session.hpp @@ -17,7 +17,7 @@ namespace pytorch { class TranslateSession { public: TranslateSession(const frontend::InputModel::Ptr& input_model, - const std::map& translator_map); + const std::map& translator_map); std::shared_ptr get_converted_model(); std::shared_ptr translate_graph(const frontend::InputModel::Ptr& input_model); @@ -38,10 +38,10 @@ class TranslateSession { size_t m_friendly_name_counter = 0; private: - OutputVector convert_node(NodeContext& context); + OutputVector convert_node(const NodeContext& context); const frontend::InputModel::Ptr m_input_model; - const std::map& m_translator_map; + const std::map& m_translator_map; std::shared_ptr m_ov_model; std::map>> m_counter_map; diff --git a/src/frontends/pytorch/src/utils.cpp b/src/frontends/pytorch/src/utils.cpp index 02b23e8e528c87..bdae3e9e75e397 100644 --- a/src/frontends/pytorch/src/utils.cpp +++ b/src/frontends/pytorch/src/utils.cpp @@ -142,6 +142,21 @@ element::Type convert_dtype(int64_t pt_type) { return TORCH_TO_OV_TYPE.at(pt_type); }; +Output apply_dtype(const NodeContext& context, size_t dtype_port, const Output& input_tensor) { + if (std::dynamic_pointer_cast( + context.get_input_from_visible_context(dtype_port).get_node_shared_ptr())) { + auto dtype = convert_dtype(context.const_input(dtype_port)); + return context.mark_node(std::make_shared(input_tensor, dtype)); + } else if (const auto& fw_node = + cast_fw_node(context.get_input(static_cast(dtype_port)).get_node_shared_ptr(), "prim::dtype")) { + auto out_tensor = fw_node->input_value(0); + return context.mark_node(std::make_shared(input_tensor, out_tensor)); + } else { + FRONT_END_OP_CONVERSION_CHECK(false, "Couldn't get dtype input"); + } + return input_tensor; +}; + ov::op::PadType convert_pad(const std::string& pt_pad) { FRONT_END_OP_CONVERSION_CHECK(TORCH_AUTO_PAD_TO_OV.count(pt_pad), "Unknown pad: ", pt_pad); return TORCH_AUTO_PAD_TO_OV.at(pt_pad); @@ -162,7 +177,7 @@ std::shared_ptr concat_list_construct(std::shared_ptr input) { return input; } -OutputVector make_framework_node(NodeContext& context) { +OutputVector make_framework_node(const NodeContext& context) { auto schema = context.get_schema(); // TODO: properly process schema to get the actual position of mutable input // Hack. Can indicate mutable inputs, but can it be reliable? diff --git a/src/frontends/pytorch/src/utils.hpp b/src/frontends/pytorch/src/utils.hpp index c808b0957046c9..029b349c77bac2 100644 --- a/src/frontends/pytorch/src/utils.hpp +++ b/src/frontends/pytorch/src/utils.hpp @@ -41,11 +41,14 @@ std::shared_ptr get_axes_range(const NodeContext& context, int input_id); std::shared_ptr numel(const NodeContext& context, const Output& x); element::Type convert_dtype(int64_t dtype_value); + +Output apply_dtype(const NodeContext& context, size_t dtype_port, const Output& input_tensor); + op::PadType convert_pad(const std::string& pt_pad); std::shared_ptr concat_list_construct(std::shared_ptr input); -OutputVector make_framework_node(NodeContext& context); +OutputVector make_framework_node(const NodeContext& context); std::shared_ptr cast_fw_node(std::shared_ptr node, const std::string& type); @@ -60,8 +63,8 @@ void align_eltwise_input_types(const NodeContext& context, std::deque> get_list_as_outputs(const Output& start); namespace op { -template -OutputVector inplace_op(NodeContext& context) { +template +OutputVector inplace_op(const NodeContext& context) { auto translation_res = T(context); FRONT_END_OP_CONVERSION_CHECK(translation_res.size() == 1, "inplace_op function must be used on single output translators"); @@ -70,21 +73,21 @@ OutputVector inplace_op(NodeContext& context) { } template -OutputVector translate_1to1_match_1_inputs(NodeContext& context) { +OutputVector translate_1to1_match_1_inputs(const NodeContext& context) { num_inputs_check(context, 1, 1); FRONT_END_OP_CONVERSION_CHECK(!context.input_is_none(0), "Input should not be None."); return {context.mark_node(std::make_shared(context.get_input(0)))}; } template -OutputVector translate_1to1_match_2_inputs(NodeContext& context) { +OutputVector translate_1to1_match_2_inputs(const NodeContext& context) { num_inputs_check(context, 2, 2); FRONT_END_OP_CONVERSION_CHECK(!context.input_is_none(0) && !context.input_is_none(1), "Inputs should not be None."); return {context.mark_node(std::make_shared(context.get_input(0), context.get_input(1)))}; } template -OutputVector translate_1to1_match_2_inputs_align_types(NodeContext& context) { +OutputVector translate_1to1_match_2_inputs_align_types(const NodeContext& context) { num_inputs_check(context, 2, 2); FRONT_END_OP_CONVERSION_CHECK(!context.input_is_none(0) && !context.input_is_none(1), "Inputs should not be None."); auto lhs = context.get_input(0); @@ -93,11 +96,11 @@ OutputVector translate_1to1_match_2_inputs_align_types(NodeContext& context) { return {context.mark_node(std::make_shared(lhs, rhs))}; } -inline OutputVector return_false_scalar(NodeContext& context) { +inline OutputVector return_false_scalar(const NodeContext& context) { return {context.mark_node(ov::op::v0::Constant::create(element::boolean, Shape{}, {false}))}; } -inline OutputVector skip_node(NodeContext& context) { +inline OutputVector skip_node(const NodeContext& context) { return {context.get_input(0).get_node_shared_ptr()}; } diff --git a/src/frontends/tensorflow/src/decoder_argdef.hpp b/src/frontends/tensorflow/src/decoder_argdef.hpp index 5b01025ee4df7a..a188a8a5cb8890 100644 --- a/src/frontends/tensorflow/src/decoder_argdef.hpp +++ b/src/frontends/tensorflow/src/decoder_argdef.hpp @@ -10,6 +10,8 @@ #include "openvino/frontend/tensorflow/decoder.hpp" namespace tensorflow { +class GraphDef; +class FunctionDef; class OpDef_ArgDef; } // namespace tensorflow @@ -19,14 +21,23 @@ namespace tensorflow { class DecoderArgDef : public ov::frontend::tensorflow::DecoderBase { public: - explicit DecoderArgDef(const ::tensorflow::OpDef_ArgDef* arg_def, const std::string& op_type) + explicit DecoderArgDef(const ::tensorflow::OpDef_ArgDef* arg_def, + const std::shared_ptr<::tensorflow::GraphDef>& graph_def, + const std::shared_ptr<::tensorflow::FunctionDef>& func_def, + const std::string& op_type) : m_arg_def(arg_def), + m_graph_def(graph_def), + m_func_def(func_def), m_op_type(op_type) {} explicit DecoderArgDef(const ::tensorflow::OpDef_ArgDef* arg_def, + const std::shared_ptr<::tensorflow::GraphDef>& graph_def, + const std::shared_ptr<::tensorflow::FunctionDef>& func_def, const std::string& op_type, const std::string& producer_name) : m_arg_def(arg_def), + m_graph_def(graph_def), + m_func_def(func_def), m_op_type(op_type), m_producer_name(producer_name) {} @@ -49,6 +60,12 @@ class DecoderArgDef : public ov::frontend::tensorflow::DecoderBase { private: const ::tensorflow::OpDef_ArgDef* m_arg_def; + // For existence of OpDef_ArgDef object corresponding to the main graph node, + // GraphDef object must live in the memory + const std::shared_ptr<::tensorflow::GraphDef> m_graph_def; + // For existence of OpDef_ArgDef object corresponding to the body graph node, + // both GraphDef and FunctionDef objects must be alive in the memory + const std::shared_ptr<::tensorflow::FunctionDef> m_func_def; const std::string m_op_type; const std::string m_producer_name; }; diff --git a/src/frontends/tensorflow/src/decoder_proto.hpp b/src/frontends/tensorflow/src/decoder_proto.hpp index db1f113882451d..570fd7e7eebda5 100644 --- a/src/frontends/tensorflow/src/decoder_proto.hpp +++ b/src/frontends/tensorflow/src/decoder_proto.hpp @@ -12,6 +12,8 @@ #include "types.pb.h" namespace tensorflow { +class GraphDef; +class FunctionDef; class NodeDef; class AttrValue; } // namespace tensorflow @@ -29,7 +31,18 @@ void parse_producer_name(const std::string& producer_port_name, class DecoderProto : public ov::frontend::tensorflow::DecoderBase { public: - explicit DecoderProto(const ::tensorflow::NodeDef* node_def) : m_node_def(node_def) {} + explicit DecoderProto(const ::tensorflow::NodeDef* node_def, + const std::shared_ptr<::tensorflow::GraphDef>& graph_def) + : m_node_def(node_def), + m_graph_def(graph_def), + m_func_def(nullptr) {} + + explicit DecoderProto(const ::tensorflow::NodeDef* node_def, + const std::shared_ptr<::tensorflow::GraphDef>& graph_def, + const std::shared_ptr<::tensorflow::FunctionDef>& func_def) + : m_node_def(node_def), + m_graph_def(graph_def), + m_func_def(func_def) {} ov::Any get_attribute(const std::string& name) const override; @@ -51,6 +64,12 @@ class DecoderProto : public ov::frontend::tensorflow::DecoderBase { private: std::vector<::tensorflow::AttrValue> decode_attribute_helper(const std::string& name) const; const ::tensorflow::NodeDef* m_node_def; + // For existence of NodeDef object corresponding to the main graph node, + // GraphDef object must live in the memory + const std::shared_ptr<::tensorflow::GraphDef> m_graph_def; + // For existence of NodeDef object corresponding to the body graph node, + // both GraphDef and FunctionDef objects must be alive in the memory + const std::shared_ptr<::tensorflow::FunctionDef> m_func_def; }; } // namespace tensorflow } // namespace frontend diff --git a/src/frontends/tensorflow/src/graph_iterator_proto.hpp b/src/frontends/tensorflow/src/graph_iterator_proto.hpp index 1fa836e3b036e1..a5e2fc1ae6c0c0 100644 --- a/src/frontends/tensorflow/src/graph_iterator_proto.hpp +++ b/src/frontends/tensorflow/src/graph_iterator_proto.hpp @@ -45,12 +45,13 @@ class GraphIteratorProto : public GraphIterator { for (int input_ind = 0; input_ind < input_size; ++input_ind) { auto input_arg = &m_func_def->signature().input_arg(input_ind); m_input_names.push_back(input_arg->name()); - m_decoders.push_back(std::make_shared(input_arg, "input_arg")); + m_decoders.push_back(std::make_shared(input_arg, m_graph_def, m_func_def, "input_arg")); } // fill all node defs from library functions for (int node_ind = 0; node_ind < nodes_size; ++node_ind) { - m_decoders.push_back(std::make_shared(&(m_func_def->node_def(node_ind)))); + m_decoders.push_back( + std::make_shared(&(m_func_def->node_def(node_ind)), m_graph_def, m_func_def)); } // fill all outputs from library functions @@ -60,7 +61,8 @@ class GraphIteratorProto : public GraphIterator { auto output_arg = &m_func_def->signature().output_arg(output_ind); m_output_names.push_back(output_arg->name()); auto producer_name = ret_map.at(output_arg->name()); - m_decoders.push_back(std::make_shared(output_arg, "output_arg", producer_name)); + m_decoders.push_back( + std::make_shared(output_arg, m_graph_def, m_func_def, "output_arg", producer_name)); } } @@ -76,7 +78,7 @@ class GraphIteratorProto : public GraphIterator { auto nodes_size = m_graph_def->node_size(); m_decoders.resize(static_cast(nodes_size)); for (int node_ind = 0; node_ind < nodes_size; ++node_ind) { - m_decoders[node_ind] = std::make_shared(&m_graph_def->node(node_ind)); + m_decoders[node_ind] = std::make_shared(&m_graph_def->node(node_ind), m_graph_def); } // initialize a library map diff --git a/src/frontends/tensorflow/src/op/lookup_table_insert.cpp b/src/frontends/tensorflow/src/op/lookup_table_insert.cpp deleted file mode 100644 index 3fb679e170be38..00000000000000 --- a/src/frontends/tensorflow/src/op/lookup_table_insert.cpp +++ /dev/null @@ -1,24 +0,0 @@ -// Copyright (C) 2018-2023 Intel Corporation -// SPDX-License-Identifier: Apache-2.0 -// - -#include "common_op_table.hpp" -#include "openvino/frontend/tensorflow/node_context.hpp" -#include "utils.hpp" - -namespace ov { -namespace frontend { -namespace tensorflow { -namespace op { - -OutputVector translate_lookup_table_insert_op(const ov::frontend::tensorflow::NodeContext& node) { - // auto-pruning of unsupported sub-graphs that contain - // operations working with dictionaries - default_op_checks(node, 3, {"LookupTableInsert", "LookupTableInsertV2"}); - return {}; -} - -} // namespace op -} // namespace tensorflow -} // namespace frontend -} // namespace ov diff --git a/src/frontends/tensorflow/src/op_table.cpp b/src/frontends/tensorflow/src/op_table.cpp index dddab20848841f..26a2a332345f99 100644 --- a/src/frontends/tensorflow/src/op_table.cpp +++ b/src/frontends/tensorflow/src/op_table.cpp @@ -26,7 +26,6 @@ TF_OP_CONVERTER(translate_gru_block_cell_op); TF_OP_CONVERTER(translate_hash_table_op); TF_OP_CONVERTER(translate_iterator_get_next_op); TF_OP_CONVERTER(translate_iterator_op); -TF_OP_CONVERTER(translate_lookup_table_insert_op); TF_OP_CONVERTER(translate_partitioned_call_op); TF_OP_CONVERTER(translate_queue_dequeue_op); TF_OP_CONVERTER(translate_queue_dequeue_many_op); @@ -105,7 +104,7 @@ const std::map get_supported_ops() { {"AddN", translate_add_n_op}, {"ArgMax", translate_arg_max_op}, {"ArgMin", translate_arg_min_op}, - {"Assert", translate_assert_op}, + {"Assert", translate_no_op}, {"AvgPool", translate_avg_pool_op}, {"AvgPool3D", translate_avg_pool_op}, {"BatchMatMul", translate_batch_mat_mul_op}, @@ -164,8 +163,8 @@ const std::map get_supported_ops() { {"ListDiff", translate_list_diff_op}, {"LogSoftmax", translate_log_softmax_op}, {"Log1p", translate_log_1p_op}, - {"LookupTableInsert", translate_lookup_table_insert_op}, - {"LookupTableInsertV2", translate_lookup_table_insert_op}, + {"LookupTableInsert", translate_no_op}, + {"LookupTableInsertV2", translate_no_op}, {"LRN", translate_lrn_op}, {"MatMul", translate_mat_mul_op}, {"MatrixDiag", translate_matrix_diag_op}, diff --git a/src/frontends/tensorflow/tests/convert_tricky_models.cpp b/src/frontends/tensorflow/tests/convert_tricky_models.cpp index 6dd2a5a510b325..e1bde1af03784f 100644 --- a/src/frontends/tensorflow/tests/convert_tricky_models.cpp +++ b/src/frontends/tensorflow/tests/convert_tricky_models.cpp @@ -346,6 +346,7 @@ TEST_F(TransformationTestsF, ModelWithIteratorGetNextAndUnsupportedOp) { model_ref = make_shared(OutputVector{add}, ParameterVector{x, y}); } } + TEST_F(TransformationTestsF, ModelWithMultioutputBodyGraphNode) { { model = convert_model("partitioned_call2/partitioned_call2.pb"); } { @@ -376,3 +377,13 @@ TEST_F(TransformationTestsF, ModelWithEmptyTensorListAndPushBack) { model_ref = make_shared(OutputVector{recover_item}, ParameterVector{x}); } } + +TEST_F(TransformationTestsF, ModelWithAssertNode) { + { model = convert_model("model_with_assert/model_with_assert.pb"); } + { + auto x = make_shared(i32, PartialShape{Dimension::dynamic()}); + auto y = make_shared(i32, PartialShape{Dimension::dynamic()}); + auto add = make_shared(x, y); + model_ref = make_shared(OutputVector{add}, ParameterVector{x, y}); + } +} diff --git a/src/frontends/tensorflow/tests/convert_unsupported.cpp b/src/frontends/tensorflow/tests/convert_unsupported.cpp index 90060f9ec8216b..20edc81da9a6fa 100644 --- a/src/frontends/tensorflow/tests/convert_unsupported.cpp +++ b/src/frontends/tensorflow/tests/convert_unsupported.cpp @@ -2,18 +2,81 @@ // SPDX-License-Identifier: Apache-2.0 // +#include #include #include #include +#include #include "common_test_utils/ngraph_test_utils.hpp" +#include "tf_framework_node.hpp" #include "tf_utils.hpp" #include "utils.hpp" using namespace std; -using namespace ngraph; +using namespace ov; +using namespace ov::element; +using namespace ov::opset10; using namespace ov::frontend; +namespace { +class TestDecoder : public ov::frontend::DecoderBase { +public: + explicit TestDecoder(const std::string& op_type) : m_op_type(op_type) {} + + ov::Any get_attribute(const std::string& name) const override { + throw "Not implemented"; + } + + size_t get_input_size() const override { + throw "Not implemented"; + } + + void get_input_node(size_t input_port_idx, + std::string& producer_name, + size_t& producer_output_port_index) const override { + throw "Not implemented"; + } + + void get_input_node(size_t input_port_idx, + std::string& producer_name, + size_t& producer_output_port_index, + const OpTypeByName& op_type_by_name) const override { + throw "Not implemented"; + } + + const std::string& get_op_type() const override { + return m_op_type; + } + + const std::string& get_op_name() const override { + throw "Not implemented"; + } + +private: + const std::string m_op_type; +}; + +shared_ptr convert_model_partially(const string& model_path) { + FrontEndManager fem; + auto front_end = fem.load_by_framework(TF_FE); + if (!front_end) { + throw "TensorFlow Frontend is not initialized"; + } + auto model_filename = FrontEndTestUtils::make_model_path(string(TEST_TENSORFLOW_MODELS_DIRNAME) + model_path); + auto input_model = front_end->load(model_filename); + if (!input_model) { + throw "Input model is not read"; + } + auto model = front_end->convert_partially(input_model); + if (!model) { + throw "Model is not converted partially"; + } + + return model; +} +} // namespace + TEST(FrontEndConvertModelTest, test_unsupported_op) { FrontEndManager fem; FrontEnd::Ptr frontEnd; @@ -24,20 +87,20 @@ TEST(FrontEndConvertModelTest, test_unsupported_op) { string("relu_unsupported/relu_unsupported.pb")); ASSERT_NO_THROW(inputModel = frontEnd->load(model_filename)); ASSERT_NE(inputModel, nullptr); - shared_ptr function; - ASSERT_THROW(function = frontEnd->convert(inputModel), OpConversionFailure); - ASSERT_EQ(function, nullptr); - ASSERT_NO_THROW(function = frontEnd->decode(inputModel)); - ASSERT_THROW(frontEnd->convert(function), OpConversionFailure); - ASSERT_NO_THROW(function = frontEnd->convert_partially(inputModel)); - ASSERT_THROW(frontEnd->convert(function), OpConversionFailure); - - for (auto& node : function->get_ordered_ops()) { + shared_ptr model; + ASSERT_THROW(model = frontEnd->convert(inputModel), OpConversionFailure); + ASSERT_EQ(model, nullptr); + ASSERT_NO_THROW(model = frontEnd->decode(inputModel)); + ASSERT_THROW(frontEnd->convert(model), OpConversionFailure); + ASSERT_NO_THROW(model = frontEnd->convert_partially(inputModel)); + ASSERT_THROW(frontEnd->convert(model), OpConversionFailure); + + for (auto& node : model->get_ordered_ops()) { if (node->get_friendly_name() == "relu_0" && dynamic_pointer_cast(node)) { - function->replace_node(node, make_shared(node->input(0).get_source_output())); + model->replace_node(node, make_shared(node->input(0).get_source_output())); } } - ASSERT_NO_THROW(frontEnd->convert(function)); + ASSERT_NO_THROW(frontEnd->convert(model)); } TEST(FrontEndConvertModelTest, test_unsupported_tf1_while) { @@ -50,18 +113,35 @@ TEST(FrontEndConvertModelTest, test_unsupported_tf1_while) { string("model_tf1_while/model_tf1_while.pb")); ASSERT_NO_THROW(inputModel = frontEnd->load(model_filename)); ASSERT_NE(inputModel, nullptr); - shared_ptr function; + shared_ptr model; try { - function = frontEnd->convert(inputModel); + model = frontEnd->convert(inputModel); FAIL() << "TensorFlow 1 While is not supported in TF FE but conversion passed without errors. " "OpConversionFailure is expected."; } catch (const OpConversionFailure& error) { string error_message = error.what(); string ref_message = "No translator found for Enter node."; ASSERT_TRUE(error_message.find(ref_message) != string::npos); - ASSERT_EQ(function, nullptr); + ASSERT_EQ(model, nullptr); } catch (...) { FAIL() << "Conversion of TensorFlow 1 While failed by wrong reason."; } } + +TEST_F(TransformationTestsF, ModelWithDynamicType) { + { model = convert_model_partially("dynamic_type_model/dynamic_type_model.pb"); } + { + auto x = make_shared(f32, Shape{2, 3}); + auto unsupported_op = make_shared(make_shared("Rrrr"), + ov::OutputVector{x}, + 1); + ASSERT_EQ(unsupported_op->get_output_element_type(0), ov::element::dynamic); + ov::Output const_one = make_shared(ov::element::f32, ov::Shape{}, 1); + const_one = make_shared(const_one, unsupported_op); + auto input_plus_one = make_shared(unsupported_op, const_one); + auto log1p_node = make_shared(input_plus_one); + ASSERT_EQ(log1p_node->get_output_element_type(0), ov::element::dynamic); + model_ref = make_shared(OutputVector{log1p_node}, ParameterVector{x}); + } +} diff --git a/src/frontends/tensorflow/tests/test_models/gen_scripts/generate_dynamic_type_model.py b/src/frontends/tensorflow/tests/test_models/gen_scripts/generate_dynamic_type_model.py new file mode 100644 index 00000000000000..729e67fb17349d --- /dev/null +++ b/src/frontends/tensorflow/tests/test_models/gen_scripts/generate_dynamic_type_model.py @@ -0,0 +1,38 @@ +# Copyright (C) 2018-2023 Intel Corporation +# SPDX-License-Identifier: Apache-2.0 + +# +# dynamic type is below FW node for unsupported operation +# + +import os +import sys + +import tensorflow as tf + + +def main(): + tf.compat.v1.reset_default_graph() + + # Create the graph and model + with tf.compat.v1.Session() as sess: + x = tf.compat.v1.placeholder(dtype=tf.float32, shape=[2, 3], name='x') + relu = tf.raw_ops.Relu(features=x) + tf.raw_ops.Log1p(x=relu) + + tf.compat.v1.global_variables_initializer() + tf_net = sess.graph_def + + tf.io.write_graph(tf_net, os.path.join(sys.argv[1], "dynamic_type_model"), "dynamic_type_model.pb", False) + + with open(os.path.join(sys.argv[1], "dynamic_type_model", "dynamic_type_model.pb"), mode='rb') as file: + modelContent = file.read() + + modelContent = modelContent.replace(b"Relu", b"Rrrr") + + with open(os.path.join(sys.argv[1], "dynamic_type_model", "dynamic_type_model.pb"), mode='wb') as file: + file.write(modelContent) + + +if __name__ == "__main__": + main() diff --git a/src/frontends/tensorflow/tests/test_models/gen_scripts/generate_model_with_assert.py b/src/frontends/tensorflow/tests/test_models/gen_scripts/generate_model_with_assert.py new file mode 100644 index 00000000000000..79c6d84bf2a2c5 --- /dev/null +++ b/src/frontends/tensorflow/tests/test_models/gen_scripts/generate_model_with_assert.py @@ -0,0 +1,38 @@ +# Copyright (C) 2018-2023 Intel Corporation +# SPDX-License-Identifier: Apache-2.0 + +# +# model with Assert node generator +# + +import os +import sys + +import numpy as np +import tensorflow as tf + + +def main(): + tf.compat.v1.reset_default_graph() + + # Create the graph and model + with tf.compat.v1.Session() as sess: + x = tf.compat.v1.placeholder(dtype=tf.int32, shape=[None], name='x') + y = tf.compat.v1.placeholder(dtype=tf.int32, shape=[None], name='y') + tf.raw_ops.AddV2(x=x, y=y) + shape1 = tf.raw_ops.Shape(input=x) + shape2 = tf.raw_ops.Shape(input=y) + equal = tf.raw_ops.Equal(x=shape1, y=shape2) + axis = tf.constant([0], dtype=tf.int32) + all_equal = tf.raw_ops.All(input=equal, axis=axis) + message = tf.constant("Shapes of operands are incompatible", dtype=tf.string) + tf.raw_ops.Assert(condition=all_equal, data=[message]) + + tf.compat.v1.global_variables_initializer() + tf_net = sess.graph_def + + tf.io.write_graph(tf_net, os.path.join(sys.argv[1], "model_with_assert"), "model_with_assert.pb", False) + + +if __name__ == "__main__": + main() diff --git a/src/frontends/tensorflow_common/include/common_op_table.hpp b/src/frontends/tensorflow_common/include/common_op_table.hpp index eeec128e8c1de3..20fefeabd811b4 100644 --- a/src/frontends/tensorflow_common/include/common_op_table.hpp +++ b/src/frontends/tensorflow_common/include/common_op_table.hpp @@ -34,7 +34,6 @@ OP_T_CONVERTER(translate_direct_reduce_op); OP_CONVERTER(translate_add_n_op); OP_CONVERTER(translate_arg_max_op); OP_CONVERTER(translate_arg_min_op); -OP_CONVERTER(translate_assert_op); OP_CONVERTER(translate_avg_pool_op); OP_CONVERTER(translate_batch_mat_mul_op); OP_CONVERTER(translate_batch_to_space_nd_op); diff --git a/src/frontends/tensorflow_common/include/utils.hpp b/src/frontends/tensorflow_common/include/utils.hpp index ae6ab1b63597b7..06e68771aef80b 100644 --- a/src/frontends/tensorflow_common/include/utils.hpp +++ b/src/frontends/tensorflow_common/include/utils.hpp @@ -34,13 +34,26 @@ void set_node_name(const std::string& node_name, const std::shared_ptr& no bool is_conditional_edge(const std::string& input_tensor_name); template -ov::Output create_same_type_const_scalar(const ov::Output& same_type_output, T value) { +ov::Output create_same_type_const_scalar(const ov::Output& same_type_output, const T& value) { if (same_type_output.get_element_type().is_static()) { return std::make_shared(same_type_output.get_element_type(), ov::Shape{}, value); } else { ov::Output const_res = std::make_shared(ov::element::from(), ov::Shape{}, value); - const_res = std::make_shared(same_type_output, const_res); + const_res = std::make_shared(const_res, same_type_output); + return const_res; + } +} + +template +ov::Output create_same_type_const(const ov::Output& same_type_output, + const std::vector& value, + const ov::Shape& shape) { + if (same_type_output.get_element_type().is_static()) { + return std::make_shared(same_type_output.get_element_type(), shape, value); + } else { + ov::Output const_res = std::make_shared(ov::element::from(), shape, value); + const_res = std::make_shared(const_res, same_type_output); return const_res; } } diff --git a/src/frontends/tensorflow_common/src/op/assert.cpp b/src/frontends/tensorflow_common/src/op/assert.cpp deleted file mode 100644 index 5275e85a8c2edc..00000000000000 --- a/src/frontends/tensorflow_common/src/op/assert.cpp +++ /dev/null @@ -1,36 +0,0 @@ -// Copyright (C) 2018-2023 Intel Corporation -// SPDX-License-Identifier: Apache-2.0 -// - -#include - -#include "common_op_table.hpp" -#include "openvino/core/validation_util.hpp" - -using namespace std; - -namespace ov { -namespace frontend { -namespace tensorflow { -namespace op { - -OutputVector translate_assert_op(const NodeContext& node) { - default_op_checks(node, 1, {"Assert"}); - auto cond = node.get_input(0); - auto cond_const = get_constant_from_source(cond); - TENSORFLOW_OP_VALIDATION(node, - cond_const, - "[TensorFlow Frontend] The condition must be constant for further model conversion."); - auto cond_values = cond_const->cast_vector(); - TENSORFLOW_OP_VALIDATION(node, - cond_values.size() == 1, - "[TensorFlow Frontend] Incorrect model - the condition must have one element."); - TENSORFLOW_OP_VALIDATION(node, - cond_values[0], - "[TensorFlow Frontend] The condition must be true for further model conversion."); - return {}; -} -} // namespace op -} // namespace tensorflow -} // namespace frontend -} // namespace ov diff --git a/src/frontends/tensorflow_common/src/op/broadcast_args.cpp b/src/frontends/tensorflow_common/src/op/broadcast_args.cpp index 47e42cd473f1ff..3e5deb5d9d62e5 100644 --- a/src/frontends/tensorflow_common/src/op/broadcast_args.cpp +++ b/src/frontends/tensorflow_common/src/op/broadcast_args.cpp @@ -4,6 +4,7 @@ #include "common_op_table.hpp" #include "openvino/opsets/opset8.hpp" +#include "utils.hpp" using namespace std; using namespace ov::opset8; @@ -26,18 +27,10 @@ OutputVector translate_broadcast_args_op(const NodeContext& node) { // pad the shortest shape value with minus ones // to take dynamic shapes into account - auto padded_s0 = - make_shared(s0, - diff1, - make_shared(diff1->get_element_type(), Shape{1}, std::vector{0}), - make_shared(s0.get_element_type(), Shape{}, std::vector{-1}), - ov::op::PadMode::CONSTANT); - auto padded_s1 = - make_shared(s1, - diff2, - make_shared(diff2->get_element_type(), Shape{1}, std::vector{0}), - make_shared(s1.get_element_type(), Shape{}, std::vector{-1}), - ov::op::PadMode::CONSTANT); + auto const_zero = create_same_type_const(diff1, std::vector{0}, Shape{1}); + auto const_minus_one = create_same_type_const_scalar(s0, -1); + auto padded_s0 = make_shared(s0, diff1, const_zero, const_minus_one, ov::op::PadMode::CONSTANT); + auto padded_s1 = make_shared(s1, diff2, const_zero, const_minus_one, ov::op::PadMode::CONSTANT); auto broadcasted_shape = make_shared(padded_s0, padded_s1); set_node_name(node.get_name(), broadcasted_shape); diff --git a/src/frontends/tensorflow_common/src/op/ctc_greedy_decoder.cpp b/src/frontends/tensorflow_common/src/op/ctc_greedy_decoder.cpp index 884ec38d573caf..49f2c35eeec46d 100644 --- a/src/frontends/tensorflow_common/src/op/ctc_greedy_decoder.cpp +++ b/src/frontends/tensorflow_common/src/op/ctc_greedy_decoder.cpp @@ -4,6 +4,7 @@ #include "common_op_table.hpp" #include "openvino/opsets/opset8.hpp" +#include "utils.hpp" using namespace std; using namespace ov; @@ -37,7 +38,7 @@ OutputVector translate_ctc_greedy_decoder_op(const NodeContext& node) { ctc_greedy_decoder = make_shared(inputs, sequence_length, merge_repeated, element::i64, element::i64); } else { - auto blank_index_const = make_shared(sequence_length.get_element_type(), Shape{}, blank_index); + auto blank_index_const = create_same_type_const_scalar(sequence_length, blank_index); ctc_greedy_decoder = make_shared(inputs, sequence_length, blank_index_const, @@ -48,7 +49,7 @@ OutputVector translate_ctc_greedy_decoder_op(const NodeContext& node) { // CTCGreedyDecoderSeqLen returns dense tensor holding the decoded results. // We need to transform this output into a sparse format. - auto minus_one_const = make_shared(ctc_greedy_decoder->output(0).get_element_type(), Shape{}, -1); + auto minus_one_const = make_shared(element::i64, Shape{}, -1); auto decoded_mask = make_shared(ctc_greedy_decoder->output(0), minus_one_const); auto decoded_indices = make_shared(decoded_mask, element::i64)->output(0); diff --git a/src/frontends/tensorflow_common/src/op/ctc_loss.cpp b/src/frontends/tensorflow_common/src/op/ctc_loss.cpp index 223fe36c087bd6..84f2f6914f35e5 100644 --- a/src/frontends/tensorflow_common/src/op/ctc_loss.cpp +++ b/src/frontends/tensorflow_common/src/op/ctc_loss.cpp @@ -33,33 +33,33 @@ OutputVector translate_ctc_loss_op(const NodeContext& node) { // since OpenVINO CTCLoss accepts only batch-major logist // we need to transpose it into [batch_size, time_size, num_classes] format // from [time_size, batch_size, num_classes] - ov::AxisVector logits_order = {1, 0, 2}; - logits = ov::frontend::tensorflow::make_transpose(logits, logits_order); + AxisVector logits_order = {1, 0, 2}; + logits = tensorflow::make_transpose(logits, logits_order); } // Transform decoded labels from the sparse format into dense format // Convert to the signed type since the mask with minus one is formed below - decoded_values = make_shared(decoded_values, ov::element::i64); + decoded_values = make_shared(decoded_values, element::i64); // OpenVINO ScatterND operation requires indices to be signed - decoded_indices = make_shared(decoded_indices, ov::element::i64); + decoded_indices = make_shared(decoded_indices, element::i64); // OpenVINO CTCLoss requires logit_length to be signed - logit_length = make_shared(logit_length, ov::element::i64); + logit_length = make_shared(logit_length, element::i64); - auto logits_shape = make_shared(logits, ov::element::i64); + auto logits_shape = make_shared(logits, element::i64); auto dense_shape = make_shared(logits_shape, - make_shared(ov::element::i64, ov::Shape{1}, 0), - make_shared(ov::element::i64, ov::Shape{1}, 2), - make_shared(ov::element::i64, ov::Shape{1}, 1)); - auto minus_one_value = make_shared(decoded_values.get_element_type(), ov::Shape{}, -1); + make_shared(element::i64, Shape{}, 0), + make_shared(element::i64, Shape{}, 2), + make_shared(element::i64, Shape{}, 1)); + auto minus_one_value = make_shared(element::i64, Shape{}, -1); auto init_decoded_values = make_shared(minus_one_value, dense_shape); auto decoded_values_dense = make_shared(init_decoded_values, decoded_indices, decoded_values); // Compute label_lenght for each batch auto minus_one_mask = make_shared(decoded_values_dense, minus_one_value); auto mask01 = make_shared(min_greater_zero, zero, minimum); diff --git a/src/frontends/tensorflow_common/src/op/fused_batch_norm.cpp b/src/frontends/tensorflow_common/src/op/fused_batch_norm.cpp index 08580f2ffcd1d4..7af6ce29497384 100644 --- a/src/frontends/tensorflow_common/src/op/fused_batch_norm.cpp +++ b/src/frontends/tensorflow_common/src/op/fused_batch_norm.cpp @@ -32,13 +32,13 @@ void generate_axes_range_except_c(const Output& x_rank, bool is_nhwc, Outp } void adjust_coeff(const Output& x_rank, - element::Type x_type, + const Output& x, const Output& coeff, Output& adjusted_coeff, bool is_nhwc) { // adjust types of the normalizing coefficients // they can vary for FusedBatchNormV2 and FusedBatchNormV3 operations - adjusted_coeff = make_shared(coeff, x_type)->output(0); + adjusted_coeff = make_shared(coeff, x)->output(0); if (is_nhwc) { return; @@ -81,7 +81,7 @@ void compute_batch_mean_and_variance(const Output& x, auto gather_axis = make_shared(element::i32, Shape{}, 0); auto needed_dim_values = make_shared(x_shape, reduce_axes, gather_axis); auto n = make_shared(needed_dim_values, gather_axis, false)->output(0); - n = make_shared(n, batch_variance.get_element_type())->output(0); + n = make_shared(n, batch_variance)->output(0); auto const_one = create_same_type_const_scalar(batch_variance, 1); auto bessel_correction = make_shared(n, const_one)->output(0); bessel_correction = make_shared(n, bessel_correction); @@ -155,10 +155,10 @@ void compute_fused_batch_norm_inference(const NodeContext& node, // adjust normalizing coefficients: scale, offset, mean, and variance auto x_rank = compute_subgraph_scalar_rank(x, element::i32, true); Output adjusted_scale, adjusted_offset, adjusted_mean, adjusted_variance; - adjust_coeff(x_rank, x.get_element_type(), scale, adjusted_scale, is_nhwc); - adjust_coeff(x_rank, x.get_element_type(), offset, adjusted_offset, is_nhwc); - adjust_coeff(x_rank, x.get_element_type(), mean, adjusted_mean, is_nhwc); - adjust_coeff(x_rank, x.get_element_type(), variance, adjusted_variance, is_nhwc); + adjust_coeff(x_rank, x, scale, adjusted_scale, is_nhwc); + adjust_coeff(x_rank, x, offset, adjusted_offset, is_nhwc); + adjust_coeff(x_rank, x, mean, adjusted_mean, is_nhwc); + adjust_coeff(x_rank, x, variance, adjusted_variance, is_nhwc); // perform the main part of the transformation // 1. subtract mean from the input @@ -197,8 +197,8 @@ void compute_fused_batch_norm_training(const NodeContext& node, // adjust normalizing coefficients: scale, offset auto x_rank = compute_subgraph_scalar_rank(x, element::i32, true); Output adjusted_scale, adjusted_offset; - adjust_coeff(x_rank, x.get_element_type(), scale, adjusted_scale, is_nhwc); - adjust_coeff(x_rank, x.get_element_type(), offset, adjusted_offset, is_nhwc); + adjust_coeff(x_rank, x, scale, adjusted_scale, is_nhwc); + adjust_coeff(x_rank, x, offset, adjusted_offset, is_nhwc); // generate axes for MVN operations Output mvn_axes; diff --git a/src/frontends/tensorflow_common/src/op/l2_loss.cpp b/src/frontends/tensorflow_common/src/op/l2_loss.cpp index 989952b5b6953f..e502d4d010f896 100644 --- a/src/frontends/tensorflow_common/src/op/l2_loss.cpp +++ b/src/frontends/tensorflow_common/src/op/l2_loss.cpp @@ -4,6 +4,7 @@ #include "common_op_table.hpp" #include "openvino/opsets/opset10.hpp" +#include "utils.hpp" using namespace std; using namespace ov; @@ -19,7 +20,7 @@ OutputVector translate_l2_loss_op(const NodeContext& node) { default_op_checks(node, 1, {"L2Loss"}); auto input = node.get_input(0); - auto const_two = make_shared(input.get_element_type(), Shape{}, 2); + auto const_two = create_same_type_const_scalar(input, 2); auto squared_input = make_shared(input, const_two); auto input_rank = compute_subgraph_scalar_rank(input, element::i32, true); diff --git a/src/frontends/tensorflow_common/src/op/linspace.cpp b/src/frontends/tensorflow_common/src/op/linspace.cpp index 43902fc1a059a0..ab6d6770e255b7 100644 --- a/src/frontends/tensorflow_common/src/op/linspace.cpp +++ b/src/frontends/tensorflow_common/src/op/linspace.cpp @@ -4,6 +4,7 @@ #include "common_op_table.hpp" #include "openvino/opsets/opset8.hpp" +#include "utils.hpp" using namespace std; using namespace ov::opset8; @@ -21,16 +22,17 @@ OutputVector translate_linspace_op(const NodeContext& node) { auto num = node.get_input(2); // compute delta value, i.e. distance between neighbor values of the result - auto const_one = make_shared(num.get_element_type(), Shape{}, 1); + auto const_one = create_same_type_const_scalar(num, 1); Output num_minus_one = make_shared(num, const_one); - num_minus_one = make_shared(num_minus_one, start.get_element_type()); + num_minus_one = make_shared(num_minus_one, start); Output delta = make_shared(stop, start); delta = make_shared(delta, num_minus_one); // generate a range of numbers [0, 1, ..., num) // to have exact numbers of elements equal to num - auto const_zero = make_shared(num.get_element_type(), Shape{}, 0); - auto range0_n = make_shared(const_zero, num, const_one, start.get_element_type()); + auto const_zero = create_same_type_const_scalar(num, 0); + Output range0_n = make_shared(const_zero, num, const_one, ov::element::f32); + range0_n = make_shared(range0_n, start); // compute the result Output linspace = make_shared(range0_n, delta); diff --git a/src/frontends/tensorflow_common/src/op/log1p.cpp b/src/frontends/tensorflow_common/src/op/log1p.cpp index 678e3a4c3904a4..6d47e9e7cb2f0b 100644 --- a/src/frontends/tensorflow_common/src/op/log1p.cpp +++ b/src/frontends/tensorflow_common/src/op/log1p.cpp @@ -4,6 +4,7 @@ #include "common_op_table.hpp" #include "openvino/opsets/opset8.hpp" +#include "utils.hpp" using namespace std; using namespace ov::opset8; @@ -18,7 +19,7 @@ OutputVector translate_log_1p_op(const NodeContext& node) { // where x - input default_op_checks(node, 1, {"Log1p"}); auto x = node.get_input(0); - auto const_one = make_shared(x.get_element_type(), Shape{}, 1); + auto const_one = create_same_type_const_scalar(x, 1); auto x_plus_one = make_shared(x, const_one); auto log1p = make_shared(x_plus_one); set_node_name(node.get_name(), log1p); diff --git a/src/frontends/tensorflow_common/src/op/matrix_diag.cpp b/src/frontends/tensorflow_common/src/op/matrix_diag.cpp index 8657e05e2f1f17..d51777d049ddeb 100644 --- a/src/frontends/tensorflow_common/src/op/matrix_diag.cpp +++ b/src/frontends/tensorflow_common/src/op/matrix_diag.cpp @@ -16,6 +16,7 @@ namespace tensorflow { namespace op { OutputVector translate_matrix_diag_op(const NodeContext& node) { + default_op_checks(node, 1, {"MatrixDiag", "MATRIX_DIAG"}); // The translation of MatrixDiag to OpenVINO opset relies on padding of input tensor with zeros, // reshape to a special form and cutting of unneeded padding part. // Here is a basic idea described by an example, @@ -27,7 +28,6 @@ OutputVector translate_matrix_diag_op(const NodeContext& node) { // Reshape to tensor of a shape [12] equal to [1, 0, 0, 0, 2, 0, 0, 0, 3, 0, 0, 0] // Cut off last 3 elements and get [1, 0, 0, 0, 2, 0, 0, 0, 3] and reshape to [3, 3] // This idea is generalized to higher rank tensors - TENSORFLOW_OP_VALIDATION(node, node.get_input_size() > 0, "MatrixDiag must have at least one input."); // diagonal is the single input to MatrixDiag operation and has a shape [I, J, ..., M, N] auto diagonal = node.get_input(0); auto diagonal_type = diagonal.get_element_type(); diff --git a/src/frontends/tensorflow_common/src/op/no_op.cpp b/src/frontends/tensorflow_common/src/op/no_op.cpp index 9d8552b6f71c24..388d5c5c6af573 100644 --- a/src/frontends/tensorflow_common/src/op/no_op.cpp +++ b/src/frontends/tensorflow_common/src/op/no_op.cpp @@ -15,7 +15,7 @@ namespace op { OutputVector translate_no_op(const NodeContext& node) { // the operation does nothing in terms of data generation - default_op_checks(node, 0, {"NoOp", "SaveV2"}); + default_op_checks(node, 0, {"NoOp", "SaveV2", "Assert", "LookupTableInsert", "LookupTableInsertV2"}); return {}; } } // namespace op diff --git a/src/frontends/tensorflow_common/src/op/non_max_suppression.cpp b/src/frontends/tensorflow_common/src/op/non_max_suppression.cpp index e0b59ba3707d48..4e81994b73fd86 100644 --- a/src/frontends/tensorflow_common/src/op/non_max_suppression.cpp +++ b/src/frontends/tensorflow_common/src/op/non_max_suppression.cpp @@ -4,6 +4,7 @@ #include "common_op_table.hpp" #include "openvino/opsets/opset10.hpp" +#include "utils.hpp" using namespace std; using namespace ov; @@ -26,10 +27,11 @@ Output normalize_selected_indices(const Output& ov_selected_indices, selected_indices = make_shared(selected_indices, axis)->output(0); if (pad_to_max_output_size) { - auto num_selected_indices = make_shared(selected_indices, max_output_size.get_element_type()); + Output num_selected_indices = make_shared(selected_indices); + num_selected_indices = make_shared(num_selected_indices, max_output_size); auto num_padded_elements = make_shared(max_output_size, num_selected_indices); - auto pad_element = make_shared(selected_indices.get_element_type(), Shape{}, 0); - auto pads_begin = make_shared(max_output_size.get_element_type(), Shape{1}, 0); + auto pad_element = create_same_type_const_scalar(selected_indices, 0); + auto pads_begin = create_same_type_const(max_output_size, vector{0}, Shape{1}); auto pad_mode = ov::op::PadMode::CONSTANT; selected_indices = make_shared(selected_indices, pads_begin, num_padded_elements, pad_element, pad_mode)->output(0); @@ -128,7 +130,7 @@ OutputVector translate_non_max_suppression_op(const NodeContext& node) { if (selected_scores) { tf_selected_scores = normalize_selected_indices(non_max_suppression->output(1), max_output_size, pad_to_max_output_size); - tf_selected_scores = make_shared(tf_selected_scores, boxes.get_element_type())->output(0); + tf_selected_scores = make_shared(tf_selected_scores, boxes)->output(0); results.push_back(tf_selected_scores); } diff --git a/src/frontends/tensorflow_common/src/op/pad.cpp b/src/frontends/tensorflow_common/src/op/pad.cpp index a8d0994453466e..037c5c49fad52f 100644 --- a/src/frontends/tensorflow_common/src/op/pad.cpp +++ b/src/frontends/tensorflow_common/src/op/pad.cpp @@ -4,6 +4,7 @@ #include "common_op_table.hpp" #include "openvino/opsets/opset9.hpp" +#include "utils.hpp" using namespace std; using namespace ov; @@ -44,7 +45,7 @@ OutputVector translate_pad_op(const NodeContext& node) { default_op_checks(node, 2, {"Pad"}); auto input = node.get_input(0); auto paddings = node.get_input(1); - auto constant_value = make_shared(input.get_element_type(), Shape{}, 0); + auto constant_value = create_same_type_const_scalar(input, 0); return translate_pad_base_op(node, input, paddings, constant_value); } diff --git a/src/frontends/tensorflow_common/src/op/parallel_dynamic_stitch.cpp b/src/frontends/tensorflow_common/src/op/parallel_dynamic_stitch.cpp index 771168e6bf24ab..b1d50bc37dbdc2 100644 --- a/src/frontends/tensorflow_common/src/op/parallel_dynamic_stitch.cpp +++ b/src/frontends/tensorflow_common/src/op/parallel_dynamic_stitch.cpp @@ -4,6 +4,7 @@ #include "common_op_table.hpp" #include "openvino/opsets/opset9.hpp" +#include "utils.hpp" using namespace std; using namespace ov::opset9; @@ -26,7 +27,6 @@ OutputVector translate_parallel_dynamic_stitch_op(const NodeContext& node) { int N = static_cast(in_size / 2); OutputVector indices_to_concat; OutputVector data_to_concat; - auto data_element_type = node.get_input(N).get_element_type(); auto const_minus_one = make_shared(ov::element::i32, Shape{1}, -1); auto const_zero = make_shared(ov::element::i32, Shape{1}, 0); auto const_one = make_shared(ov::element::i32, Shape{1}, 1); @@ -58,7 +58,7 @@ OutputVector translate_parallel_dynamic_stitch_op(const NodeContext& node) { auto indices = make_shared(indices_to_concat, 0); auto data_shape = make_shared(update, ov::element::i32); - auto zero = make_shared(data_element_type, Shape{}, 0); + auto zero = create_same_type_const_scalar(node.get_input(N), 0); auto zeros = make_shared(zero, data_shape); auto max_idx = make_shared(indices, Constant::create(element::i32, {1}, {0}), true); auto stop = make_shared(max_idx->output(0), const_one); diff --git a/src/frontends/tensorflow_common/src/op/random_uniform.cpp b/src/frontends/tensorflow_common/src/op/random_uniform.cpp index 170ced1f05473c..4b1b068380629b 100644 --- a/src/frontends/tensorflow_common/src/op/random_uniform.cpp +++ b/src/frontends/tensorflow_common/src/op/random_uniform.cpp @@ -40,10 +40,16 @@ ov::OutputVector translate_random_uniform_int_op(const NodeContext& node) { auto seed2 = node.get_attribute("seed2", 0); auto output_type = minval.get_element_type(); - auto random = std::make_shared(shape, minval, maxval, output_type, seed, seed2); - - set_node_name(node.get_name(), random); - return random->outputs(); + Output random; + if (output_type.is_static()) { + random = std::make_shared(shape, minval, maxval, output_type, seed, seed2); + } else { + random = std::make_shared(shape, minval, maxval, element::i64, seed, seed2); + random = make_shared(random, minval); + } + + set_node_name(node.get_name(), random.get_node_shared_ptr()); + return {random}; } } // namespace op } // namespace tensorflow diff --git a/src/frontends/tensorflow_common/src/op/range.cpp b/src/frontends/tensorflow_common/src/op/range.cpp index 27fe3c295a3a88..64cbf2094903aa 100644 --- a/src/frontends/tensorflow_common/src/op/range.cpp +++ b/src/frontends/tensorflow_common/src/op/range.cpp @@ -19,8 +19,15 @@ OutputVector translate_range_op(const NodeContext& node) { auto limit = node.get_input(1); auto delta = node.get_input(2); - auto range = make_shared(start, limit, delta, start.get_element_type()); - set_node_name(node.get_name(), range); + auto start_type = start.get_element_type(); + Output range; + if (start_type.is_static()) { + range = make_shared(start, limit, delta, start_type); + } else { + range = make_shared(start, limit, delta, element::f32); + range = make_shared(range, start); + } + set_node_name(node.get_name(), range.get_node_shared_ptr()); return {range}; } diff --git a/src/frontends/tensorflow_common/src/op/reciprocal.cpp b/src/frontends/tensorflow_common/src/op/reciprocal.cpp index d55c7158adb906..98d37ff9a337f5 100644 --- a/src/frontends/tensorflow_common/src/op/reciprocal.cpp +++ b/src/frontends/tensorflow_common/src/op/reciprocal.cpp @@ -4,6 +4,7 @@ #include "common_op_table.hpp" #include "openvino/opsets/opset8.hpp" +#include "utils.hpp" using namespace std; using namespace ov::opset8; @@ -17,7 +18,7 @@ OutputVector translate_reciprocal_op(const NodeContext& node) { // computes element-wise 1/x, where x - input default_op_checks(node, 1, {"Reciprocal"}); auto x = node.get_input(0); - auto minus_one_const = make_shared(x.get_element_type(), Shape{}, -1); + auto minus_one_const = create_same_type_const_scalar(x, -1); auto reciprocal = make_shared(x, minus_one_const); set_node_name(node.get_name(), reciprocal); return {reciprocal}; diff --git a/src/frontends/tensorflow_common/src/op/rsqrt.cpp b/src/frontends/tensorflow_common/src/op/rsqrt.cpp index 4edcf7196a5571..12d7f98cb856f2 100644 --- a/src/frontends/tensorflow_common/src/op/rsqrt.cpp +++ b/src/frontends/tensorflow_common/src/op/rsqrt.cpp @@ -4,6 +4,7 @@ #include "common_op_table.hpp" #include "openvino/opsets/opset8.hpp" +#include "utils.hpp" using namespace std; using namespace ov::opset8; @@ -14,11 +15,12 @@ namespace tensorflow { namespace op { OutputVector translate_rsqrt_op(const NodeContext& node) { + default_op_checks(node, 1, {"Rsqrt", "RSQRT"}); auto input = node.get_input(0); - auto ng_exponent = make_shared(input.get_element_type(), Shape{1}, -0.5f); - auto res = make_shared(input, ng_exponent); - set_node_name(node.get_name(), res); - return res->outputs(); + auto exponent = create_same_type_const_scalar(input, -0.5f); + auto rsqrt = make_shared(input, exponent); + set_node_name(node.get_name(), rsqrt); + return {rsqrt}; } } // namespace op } // namespace tensorflow diff --git a/src/frontends/tensorflow_common/src/op/scatter_nd.cpp b/src/frontends/tensorflow_common/src/op/scatter_nd.cpp index d008c073aff045..d7bd83c4dd2d88 100644 --- a/src/frontends/tensorflow_common/src/op/scatter_nd.cpp +++ b/src/frontends/tensorflow_common/src/op/scatter_nd.cpp @@ -4,6 +4,7 @@ #include "common_op_table.hpp" #include "openvino/opsets/opset8.hpp" +#include "utils.hpp" using namespace std; using namespace ov::opset8; @@ -13,15 +14,16 @@ namespace frontend { namespace tensorflow { namespace op { OutputVector translate_scatter_nd_op(const NodeContext& node) { + default_op_checks(node, 3, {"ScatterNd", "SCATTER_ND"}); auto input_indices = node.get_input(0); auto updates = node.get_input(1); auto shape = node.get_input(2); - auto input_data = make_shared(updates.get_element_type(), Shape{1}, 0); + auto input_data = create_same_type_const(updates, vector{0}, Shape{1}); auto broadcast = make_shared(input_data, shape); - auto res = make_shared(broadcast, input_indices, updates); - set_node_name(node.get_name(), res); - return res->outputs(); + auto scatter_nd = make_shared(broadcast, input_indices, updates); + set_node_name(node.get_name(), scatter_nd); + return {scatter_nd}; } } // namespace op } // namespace tensorflow diff --git a/src/frontends/tensorflow_common/src/op/segment_sum.cpp b/src/frontends/tensorflow_common/src/op/segment_sum.cpp index 36a1c6ca379480..f64b14bbd30d56 100644 --- a/src/frontends/tensorflow_common/src/op/segment_sum.cpp +++ b/src/frontends/tensorflow_common/src/op/segment_sum.cpp @@ -4,6 +4,7 @@ #include "common_op_table.hpp" #include "openvino/opsets/opset9.hpp" +#include "utils.hpp" using namespace std; using namespace ov; @@ -18,6 +19,10 @@ OutputVector translate_segment_sum_op(const NodeContext& node) { auto data = node.get_input(0); auto segment_ids = node.get_input(1); + // create auxiliary constants + auto const_one = create_same_type_const_scalar(segment_ids, 1); + auto const_zero = create_same_type_const_scalar(segment_ids, 0); + // compute SegmentSum using EmbeddingSegmentSum // for this prepare all the required inputs auto indices_type = segment_ids.get_element_type(); @@ -25,18 +30,14 @@ OutputVector translate_segment_sum_op(const NodeContext& node) { // do not forget that segment ids are counting from zero auto reduction_axis = make_shared(element::i32, Shape{1}, 0); auto num_segments_minus1 = make_shared(segment_ids, reduction_axis, false); - auto one = make_shared(indices_type, Shape{}, 1); - auto num_segments = make_shared(num_segments_minus1, one); + auto num_segments = make_shared(num_segments_minus1, const_one); // 2. generate indices input for EmbeddingSegmentSum // that will collect slices consequently from data for each segment auto squeeze_axis = make_shared(element::i32, Shape{1}, 0); auto segment_ids_shape = make_shared(segment_ids, indices_type); auto num_indices = make_shared(segment_ids_shape, squeeze_axis); - auto indices = make_shared(make_shared(indices_type, ov::Shape{}, 0), - num_indices, - make_shared(indices_type, ov::Shape{}, 1), - indices_type); + auto indices = make_shared(const_zero, num_indices, const_one, indices_type); auto emb_segment_sum = make_shared(data, indices, segment_ids, num_segments); set_node_name(node.get_name(), emb_segment_sum); diff --git a/src/frontends/tensorflow_common/src/op/slice.cpp b/src/frontends/tensorflow_common/src/op/slice.cpp index 243a8259367320..24ceaef5b505a9 100644 --- a/src/frontends/tensorflow_common/src/op/slice.cpp +++ b/src/frontends/tensorflow_common/src/op/slice.cpp @@ -4,6 +4,7 @@ #include "common_op_table.hpp" #include "openvino/opsets/opset8.hpp" +#include "utils.hpp" using namespace std; using namespace ov::opset8; @@ -19,25 +20,28 @@ OutputVector translate_slice_op(const NodeContext& node) { auto start = node.get_input(1); auto size = node.get_input(2); + // create axiliary constants + auto const_one = create_same_type_const_scalar(start, 1); + auto const_zero = create_same_type_const_scalar(start, 0); + // compute stop values in case non-negative sizes auto stop_pos = make_shared(start, size); // compute stop values in case negative sizes // since TensorFlow supports only -1 among negative sizes // assign stop values to the data shape - auto stop_neg = make_shared(input, size.get_element_type()); + Output stop_neg = make_shared(input); + stop_neg = make_shared(stop_neg, size); // select the correct stop value based on a sign of size value - auto zeros = make_shared(size.get_element_type(), Shape{}, 0); - auto negative_sizes_mask = make_shared(size, zeros); + auto negative_sizes_mask = make_shared(size, const_zero); // TODO: investigate if we can simplify and replace Select with FloorMod operation // like FloorMod(size, input_shape) auto stop = make_shared(x_is_zero, one, y); - auto res = make_shared(x, select); - set_node_name(node.get_name(), res); - return res->outputs(); + // create auxiliary constants + auto const_zero = create_same_type_const_scalar(x, 0); + auto const_one = create_same_type_const_scalar(x, 1); + + auto x_is_zero = make_shared(x, const_zero); + auto select = make_shared