diff --git a/.ci/azure/mac.yml b/.ci/azure/mac.yml index c6c2a0453f4c9c..76c5e3b978464f 100644 --- a/.ci/azure/mac.yml +++ b/.ci/azure/mac.yml @@ -105,11 +105,11 @@ jobs: workingDirectory: $(BUILD_DIR) displayName: 'Install' - - script: $(BIN_DIR)/unit-test --gtest_print_time=1 --gtest_filter=-backend_api.config_unsupported:*IE_GPU*:IE_CPU.onnx_model_sigmoid --gtest_output=xml:TEST-NGraphUT.xml + - script: $(BIN_DIR)/unit-test --gtest_print_time=1 --gtest_filter=-backend_api.config_unsupported:*IE_GPU*:IE_CPU.onnx_model_sigmoid:IE_CPU/GRUSequenceOp.onnx_model_gru* --gtest_output=xml:TEST-NGraphUT.xml displayName: 'nGraph UT' continueOnError: false - - script: $(BIN_DIR)/InferenceEngineUnitTests --gtest_print_time=1 --gtest_output=xml:TEST-InferenceEngineUnitTests.xml + - script: $(BIN_DIR)/InferenceEngineUnitTests --gtest_print_time=1 --gtest_filter=-MKLDNNGraphStructureTests.TestNoRedundantReordersBeforeDWConvolution:TestConvolution/MKLDNNGraphConvolutionTests.TestsConvolution/0:TestConvolutionDefaultPrimitivesPriority/MKLDNNGraphConvolutionTests.TestsConvolution/0 --gtest_output=xml:TEST-InferenceEngineUnitTests.xml displayName: 'IE UT old' continueOnError: false diff --git a/CMakeLists.txt b/CMakeLists.txt index fa0b9ac9119e84..105181b6f85edd 100644 --- a/CMakeLists.txt +++ b/CMakeLists.txt @@ -1,4 +1,4 @@ -# Copyright (C) 2018-2020 Intel Corporation +# Copyright (C) 2018-2021 Intel Corporation # SPDX-License-Identifier: Apache-2.0 # @@ -142,6 +142,7 @@ function(openvino_developer_export_targets) "A list of OpenVINO exported components" FORCE) endfunction() +add_subdirectory(thirdparty) add_subdirectory(openvino) build_ngraph() add_subdirectory(inference-engine) diff --git a/cmake/toolchains/ia32.linux.toolchain.cmake b/cmake/toolchains/ia32.linux.toolchain.cmake new file mode 100644 index 00000000000000..13c090fb9a73d7 --- /dev/null +++ b/cmake/toolchains/ia32.linux.toolchain.cmake @@ -0,0 +1,27 @@ +# Copyright (C) 2021 Intel Corporation +# SPDX-License-Identifier: Apache-2.0 +# + +set(CMAKE_CXX_FLAGS_INIT "-m32") +set(CMAKE_C_FLAGS_INIT "-m32") + +set(CMAKE_SHARED_LINKER_FLAGS_INIT "-m32") +set(CMAKE_MODULE_LINKER_FLAGS_INIT "-m32") +set(CMAKE_EXE_LINKER_FLAGS_INIT "-m32") + +# Hints for OpenVINO + +macro(_set_if_not_defined var val) + if(NOT DEFINED ${var}) + set(${var} ${val} CACHE BOOL "" FORCE) + endif() +endmacro() + +# need libusb 32-bits version +_set_if_not_defined(ENABLE_VPU OFF) + +# _mm_loadl_epi64 is not defined +_set_if_not_defined(ENABLE_SSE42 OFF) + +# fix conversion from uint64_t / int64_t to size_t +_set_if_not_defined(NGRAPH_ONNX_IMPORT_ENABLE OFF) diff --git a/cmake/toolchains/mt.runtime.win32.toolchain.cmake b/cmake/toolchains/mt.runtime.win32.toolchain.cmake new file mode 100644 index 00000000000000..484a4c8b282ae3 --- /dev/null +++ b/cmake/toolchains/mt.runtime.win32.toolchain.cmake @@ -0,0 +1,39 @@ +# Copyright (C) 2021 Intel Corporation +# SPDX-License-Identifier: Apache-2.0 +# + +# +# Flags for 3rd party projects +# + +set(use_static_runtime ON) + +if(use_static_runtime) + foreach(lang C CXX) + foreach(build_type "" "_DEBUG" "_MINSIZEREL" "_RELEASE" "_RELWITHDEBINFO") + set(flag_var "CMAKE_${lang}_FLAGS${build_type}") + string(REPLACE "/MD" "/MT" ${flag_var} "${${flag_var}}") + endforeach() + endforeach() +endif() + +function(onecoreuap_set_runtime var) + set(${var} ${use_static_runtime} CACHE BOOL "" FORCE) +endfunction() + +# ONNX +onecoreuap_set_runtime(ONNX_USE_MSVC_STATIC_RUNTIME) +# pugixml +onecoreuap_set_runtime(STATIC_CRT) +# protobuf +onecoreuap_set_runtime(protobuf_MSVC_STATIC_RUNTIME) +# clDNN +onecoreuap_set_runtime(CLDNN__COMPILE_LINK_USE_STATIC_RUNTIME) +# google-test +if(use_static_runtime) + set(gtest_force_shared_crt OFF CACHE BOOL "" FORCE) +else() + set(gtest_force_shared_crt ON CACHE BOOL "" FORCE) +endif() + +unset(use_static_runtime) diff --git a/cmake/toolchains/onecoreuap.toolchain.cmake b/cmake/toolchains/onecoreuap.toolchain.cmake index 8902d6b9acca58..38a29860c749b2 100644 --- a/cmake/toolchains/onecoreuap.toolchain.cmake +++ b/cmake/toolchains/onecoreuap.toolchain.cmake @@ -1,4 +1,4 @@ -# Copyright (C) 2018-2020 Intel Corporation +# Copyright (C) 2018-2021 Intel Corporation # SPDX-License-Identifier: Apache-2.0 # @@ -68,31 +68,7 @@ set(CMAKE_EXE_LINKER_FLAGS "${CMAKE_EXE_LINKER_FLAGS} ${linker_flags}") unset(linker_flags) # -# Flags for 3rd party projects +# Static runtime to overcome apiValidator tool restrictions # -set(use_static_runtime ON) - -if(use_static_runtime) - foreach(lang C CXX) - foreach(build_type "" "_DEBUG" "_MINSIZEREL" "_RELEASE" "_RELWITHDEBINFO") - set(flag_var "CMAKE_${lang}_FLAGS${build_type}") - string(REPLACE "/MD" "/MT" ${flag_var} "${${flag_var}}") - endforeach() - endforeach() -endif() - -function(onecoreuap_set_runtime var) - set(${var} ${use_static_runtime} CACHE BOOL "" FORCE) -endfunction() - -# ONNX -onecoreuap_set_runtime(ONNX_USE_MSVC_STATIC_RUNTIME) -# pugixml -onecoreuap_set_runtime(STATIC_CRT) -# protobuf -onecoreuap_set_runtime(protobuf_MSVC_STATIC_RUNTIME) -# clDNN -onecoreuap_set_runtime(CLDNN__COMPILE_LINK_USE_STATIC_RUNTIME) - -unset(use_static_runtime) +include("${CMAKE_CURRENT_LIST_DIR}/mt.runtime.win32.toolchain.cmake") diff --git a/docs/HOWTO/Custom_Layers_Guide.md b/docs/HOWTO/Custom_Layers_Guide.md index 0cacca13451ad7..8037e6e95a29ee 100644 --- a/docs/HOWTO/Custom_Layers_Guide.md +++ b/docs/HOWTO/Custom_Layers_Guide.md @@ -369,7 +369,6 @@ python3 mri_reconstruction_demo.py \ - [Inference Engine Extensibility Mechanism](../IE_DG/Extensibility_DG/Intro.md) - [Inference Engine Samples Overview](../IE_DG/Samples_Overview.md) - [Overview of OpenVINO™ Toolkit Pre-Trained Models](@ref omz_models_intel_index) -- [Inference Engine Tutorials](https://github.com/intel-iot-devkit/inference-tutorials-generic) - For IoT Libraries and Code Samples see the [Intel® IoT Developer Kit](https://github.com/intel-iot-devkit). ## Converting Models: diff --git a/docs/IE_DG/API_Changes.md b/docs/IE_DG/API_Changes.md index 41681e58d8a3ad..c23c427e6edf38 100644 --- a/docs/IE_DG/API_Changes.md +++ b/docs/IE_DG/API_Changes.md @@ -156,7 +156,7 @@ The sections below contain detailed list of changes made to the Inference Engine ### Deprecated API - **Myriad Plugin API:** + **MYRIAD Plugin API:** * VPU_CONFIG_KEY(IGNORE_IR_STATISTIC) diff --git a/docs/IE_DG/Extensibility_DG/Custom_ONNX_Ops.md b/docs/IE_DG/Extensibility_DG/Custom_ONNX_Ops.md index 0999679ae0caa2..b6728e65bc402d 100644 --- a/docs/IE_DG/Extensibility_DG/Custom_ONNX_Ops.md +++ b/docs/IE_DG/Extensibility_DG/Custom_ONNX_Ops.md @@ -24,11 +24,11 @@ The `ngraph::onnx_import::Node` class represents a node in ONNX model. It provid New operator registration must happen before the ONNX model is read, for example, if an ONNX model uses the 'CustomRelu' operator, `register_operator("CustomRelu", ...)` must be called before InferenceEngine::Core::ReadNetwork. Re-registering ONNX operators within the same process is supported. During registration of the existing operator, a warning is printed. -The example below demonstrates an examplary model that requires previously created 'CustomRelu' operator: +The example below demonstrates an exemplary model that requires previously created 'CustomRelu' operator: @snippet onnx_custom_op/onnx_custom_op.cpp onnx_custom_op:model -For a reference on how to create a graph with nGraph operations, visit [nGraph tutorial](../nGraphTutorial.md). +For a reference on how to create a graph with nGraph operations, visit [Custom nGraph Operations](AddingNGraphOps.md). For a complete list of predefined nGraph operators, visit [available operations sets](../../ops/opset.md). If operator is no longer needed, it can be unregistered by calling `unregister_operator`. The function takes three arguments `op_type`, `version`, and `domain`. diff --git a/docs/IE_DG/InferenceEngine_QueryAPI.md b/docs/IE_DG/InferenceEngine_QueryAPI.md index 788c2d580324a9..60497bbebdf362 100644 --- a/docs/IE_DG/InferenceEngine_QueryAPI.md +++ b/docs/IE_DG/InferenceEngine_QueryAPI.md @@ -32,7 +32,8 @@ MYRIAD.1.4-ma2480 FPGA.0 FPGA.1 CPU -GPU +GPU.0 +GPU.1 ... ``` diff --git a/docs/IE_DG/Introduction.md b/docs/IE_DG/Introduction.md index efab88d5dd95e2..6d3d5be66c608b 100644 --- a/docs/IE_DG/Introduction.md +++ b/docs/IE_DG/Introduction.md @@ -122,7 +122,4 @@ The open source version is available in the [OpenVINO™ toolkit GitHub reposito - [Intel® Deep Learning Deployment Toolkit Web Page](https://software.intel.com/en-us/computer-vision-sdk) -[scheme]: img/workflow_steps.png - -#### Optimization Notice -For complete information about compiler optimizations, see our [Optimization Notice](https://software.intel.com/en-us/articles/optimization-notice#opt-en). +[scheme]: img/workflow_steps.png \ No newline at end of file diff --git a/docs/IE_DG/Optimization_notice.md b/docs/IE_DG/Optimization_notice.md deleted file mode 100644 index 3c128d95b6c5bc..00000000000000 --- a/docs/IE_DG/Optimization_notice.md +++ /dev/null @@ -1,3 +0,0 @@ -# Optimization Notice {#openvino_docs_IE_DG_Optimization_notice} - -![Optimization_notice](img/opt-notice-en_080411.gif) \ No newline at end of file diff --git a/docs/IE_DG/Samples_Overview.md b/docs/IE_DG/Samples_Overview.md index 245fa68e900e80..1eeedca35b9f52 100644 --- a/docs/IE_DG/Samples_Overview.md +++ b/docs/IE_DG/Samples_Overview.md @@ -43,7 +43,7 @@ To run the sample applications, you can use images and videos from the media fil ## Samples that Support Pre-Trained Models -You can download the [pre-trained models](@ref omz_models_intel_index) using the OpenVINO [Model Downloader](@ref omz_tools_downloader_README) or from [https://download.01.org/opencv/](https://download.01.org/opencv/). +To run the sample, you can use [public](@ref omz_models_public_index) or [Intel's](@ref omz_models_intel_index) pre-trained models from the Open Model Zoo. The models can be downloaded using the [Model Downloader](@ref omz_tools_downloader_README). ## Build the Sample Applications @@ -127,7 +127,7 @@ You can also build a generated solution manually. For example, if you want to bu Microsoft Visual Studio and open the generated solution file from the `C:\Users\\Documents\Intel\OpenVINO\inference_engine_cpp_samples_build\Samples.sln` directory. -### Build the Sample Applications on macOS* +### Build the Sample Applications on macOS* The officially supported macOS* build environment is the following: diff --git a/docs/IE_DG/ShapeInference.md b/docs/IE_DG/ShapeInference.md index a7cdddb784d676..ea86911ff397e0 100644 --- a/docs/IE_DG/ShapeInference.md +++ b/docs/IE_DG/ShapeInference.md @@ -1,6 +1,36 @@ Using Shape Inference {#openvino_docs_IE_DG_ShapeInference} ========================================== +OpenVINO™ provides the following methods for runtime model reshaping: + +* **Set a new input shape** with the `InferenceEngine::CNNNetwork::reshape` method.
+ The `InferenceEngine::CNNNetwork::reshape` method updates input shapes and propagates them down to the outputs of the model through all intermediate layers. + +> **NOTES**: +> - Starting with the 2021.1 release, the Model Optimizer converts topologies keeping shape-calculating sub-graphs by default, which enables correct shape propagation during reshaping in most cases. +> - Older versions of IRs are not guaranteed to reshape successfully. Please regenerate them with the Model Optimizer of the latest version of OpenVINO™.
+> - If an ONNX model does not have a fully defined input shape and the model was imported with the ONNX importer, reshape the model before loading it to the plugin. + +* **Set a new batch dimension value** with the `InferenceEngine::CNNNetwork::setBatchSize` method.
+ The meaning of a model batch may vary depending on the model design. + This method does not deduce batch placement for inputs from the model architecture. + It assumes that the batch is placed at the zero index in the shape for all inputs and uses the `InferenceEngine::CNNNetwork::reshape` method to propagate updated shapes through the model. + + The method transforms the model before a new shape propagation to relax a hard-coded batch dimension in the model, if any. + + Use `InferenceEngine::CNNNetwork::reshape` instead of `InferenceEngine::CNNNetwork::setBatchSize` to set new input shapes for the model in case the model has: + * Multiple inputs with different zero-index dimension meanings + * Input without a batch dimension + * 0D, 1D, or 3D shape + + The `InferenceEngine::CNNNetwork::setBatchSize` method is a high-level API method that wraps the `InferenceEngine::CNNNetwork::reshape` method call and works for trivial models from the batch placement standpoint. + Use `InferenceEngine::CNNNetwork::reshape` for other models. + + Using the `InferenceEngine::CNNNetwork::setBatchSize` method for models with a non-zero index batch placement or for models with inputs that do not have a batch dimension may lead to undefined behaviour. + +You can change input shapes multiple times using the `InferenceEngine::CNNNetwork::reshape` and `InferenceEngine::CNNNetwork::setBatchSize` methods in any order. +If a model has a hard-coded batch dimension, use `InferenceEngine::CNNNetwork::setBatchSize` first to change the batch, then call `InferenceEngine::CNNNetwork::reshape` to update other dimensions, if needed. + Inference Engine takes three kinds of a model description as an input, which are converted into an `InferenceEngine::CNNNetwork` object: 1. [Intermediate Representation (IR)](../MO_DG/IR_and_opsets.md) through `InferenceEngine::Core::ReadNetwork` 2. [ONNX model](../IE_DG/OnnxImporterTutorial.md) through `InferenceEngine::Core::ReadNetwork` @@ -23,33 +53,7 @@ for (const auto & parameter : parameters) { To feed input data of a shape that is different from the model input shape, reshape the model first. -OpenVINO™ provides the following methods for runtime model reshaping: - -* **Set a new input shape** with the `InferenceEngine::CNNNetwork::reshape` method.
- The `InferenceEngine::CNNNetwork::reshape` method updates input shapes and propagates them down to the outputs of the model through all intermediate layers. - You can reshape a model multiple times like in this application scheme: - ``` - ReadNetwork -> reshape(input_1_shape) -> LoadNetwork -> infer(input_1) - \ - -> reshape(input_2_shape) -> LoadNetwork -> infer(input_2) - ``` - > **NOTES**: - > - Starting with the 2021.1 release, the Model Optimizer converts topologies keeping shape-calculating sub-graphs by default, which enables correct shape propagation during reshaping. - > - Older versions of IRs are not guaranteed to reshape successfully. Please regenerate them with the Model Optimizer of the latest version of OpenVINO™.
- > - If an ONNX model does not have a fully defined input shape and the model was imported with the ONNX importer, reshape the model before loading it to the plugin. -* **Set a new batch dimension value** with the `InferenceEngine::CNNNetwork::setBatchSize` method.
- The meaning of a model batch may vary depending on the model design. - The `InferenceEngine::CNNNetwork::setBatchSize` method deduces the index of a batch dimension based only on the input rank. - This method does not work for models with a non-zero index batch placement or models with inputs without a batch dimension. - The batch-setting algorithm does not involve the shape inference mechanism. - Batch of input and output shapes for all layers is set to a new batch value without layer validation. - It may cause both positive and negative side effects. - Due to the limitations described above, the current method is not recommended to use. - If you need to set a new batch size for the model, use the `CNNNetwork::reshape` method instead. - -Do not use runtime reshaping methods simultaneously, especially do not call the `CNNNetwork::reshape` method after you use `InferenceEngine::CNNNetwork::setBatchSize`. -The `InferenceEngine::CNNNetwork::setBatchSize` method causes irreversible conversion of the internal model representation into the legacy model representation. -The method does not use nGraph for shape inference which leads to reduced reshape opportunities and may affect the performance of the model. +Once the input shape of `InferenceEngine::CNNNetwork` is set, call the `InferenceEngine::Core::LoadNetwork` method to get an `InferenceEngine::ExecutableNetwork` object for inference with updated shapes. There are other approaches to reshape the model during the stage of IR generation or [nGraph::Function creation](../nGraph_DG/build_function.md). @@ -62,8 +66,8 @@ Shape collision during shape propagation may be a sign that a new shape does not Changing the model input shape may result in intermediate operations shape collision. Examples of such operations: -- `Reshape` operation with a hard-coded output shape value -- `MatMul` operation with the `Const` second input cannot be resized by spatial dimensions due to operation semantics +- [`Reshape` operation](../ops/shape/Reshape_1.md) with a hard-coded output shape value +- [`MatMul` operation](../ops/matrix/MatMul_1.md) with the `Const` second input cannot be resized by spatial dimensions due to operation semantics Model structure and logic should not change significantly after model reshaping. - The Global Pooling operation is commonly used to reduce output feature map of classification models output. diff --git a/docs/IE_DG/protecting_model_guide.md b/docs/IE_DG/protecting_model_guide.md index 99b7836b1b25d1..2074d2230146cb 100644 --- a/docs/IE_DG/protecting_model_guide.md +++ b/docs/IE_DG/protecting_model_guide.md @@ -59,5 +59,4 @@ should be called with `weights` passed as an empty `Blob`. - Inference Engine Developer Guide: [Inference Engine Developer Guide](Deep_Learning_Inference_Engine_DevGuide.md) - For more information on Sample Applications, see the [Inference Engine Samples Overview](Samples_Overview.md) - For information on a set of pre-trained models, see the [Overview of OpenVINO™ Toolkit Pre-Trained Models](@ref omz_models_intel_index) -- For information on Inference Engine Tutorials, see the [Inference Tutorials](https://github.com/intel-iot-devkit/inference-tutorials-generic) - For IoT Libraries and Code Samples see the [Intel® IoT Developer Kit](https://github.com/intel-iot-devkit). diff --git a/docs/IE_DG/supported_plugins/CL_DNN.md b/docs/IE_DG/supported_plugins/CL_DNN.md index a25012bf0732a0..a8cfbc579128f9 100644 --- a/docs/IE_DG/supported_plugins/CL_DNN.md +++ b/docs/IE_DG/supported_plugins/CL_DNN.md @@ -1,9 +1,30 @@ GPU Plugin {#openvino_docs_IE_DG_supported_plugins_CL_DNN} ======= -The GPU plugin uses the Intel® Compute Library for Deep Neural Networks ([clDNN](https://01.org/cldnn)) to infer deep neural networks. -clDNN is an open source performance library for Deep Learning (DL) applications intended for acceleration of Deep Learning Inference on Intel® Processor Graphics including Intel® HD Graphics and Intel® Iris® Graphics. -For an in-depth description of clDNN, see: [clDNN sources](https://github.com/intel/clDNN) and [Accelerate Deep Learning Inference with Intel® Processor Graphics](https://software.intel.com/en-us/articles/accelerating-deep-learning-inference-with-intel-processor-graphics). +The GPU plugin uses the Intel® Compute Library for Deep Neural Networks (clDNN) to infer deep neural networks. +clDNN is an open source performance library for Deep Learning (DL) applications intended for acceleration of Deep Learning Inference on Intel® Processor Graphics including Intel® HD Graphics, Intel® Iris® Graphics, Intel® Iris® Xe Graphics, and Intel® Iris® Xe MAX graphics. +For an in-depth description of clDNN, see [Inference Engine source files](https://github.com/openvinotoolkit/openvino/tree/master/inference-engine/src/cldnn_engine) and [Accelerate Deep Learning Inference with Intel® Processor Graphics](https://software.intel.com/en-us/articles/accelerating-deep-learning-inference-with-intel-processor-graphics). + +## Device Naming Convention +* Devices are enumerated as "GPU.X" where `X={0, 1, 2,...}`. Only Intel® GPU devices are considered. +* If the system has an integrated GPU, it always has id=0 ("GPU.0"). +* Other GPUs have undefined order that depends on the GPU driver. +* "GPU" is an alias for "GPU.0" +* If the system doesn't have an integrated GPU, then devices are enumerated starting from 0. + +For demonstration purposes, see the [Hello Query Device C++ Sample](../../../inference-engine/samples/hello_query_device/README.md) that can print out the list of available devices with associated indices. Below is an example output (truncated to the device names only): + +```sh +./hello_query_device +Available devices: + Device: CPU +... + Device: GPU.0 +... + Device: GPU.1 +... + Device: HDDL +``` ## Optimizations @@ -92,7 +113,7 @@ When specifying key values as raw strings (that is, when using Python API), omit | `KEY_CLDNN_PLUGIN_THROTTLE` | `<0-3>` | `0` | OpenCL queue throttling (before usage, make sure your OpenCL driver supports appropriate extension)
Lower value means lower driver thread priority and longer sleep time for it. 0 disables the setting. | | `KEY_CLDNN_GRAPH_DUMPS_DIR` | `""` | `""` | clDNN graph optimizer stages dump output directory (in GraphViz format) | | `KEY_CLDNN_SOURCES_DUMPS_DIR` | `""` | `""` | Final optimized clDNN OpenCL sources dump output directory | -| `KEY_GPU_THROUGHPUT_STREAMS` | `KEY_GPU_THROUGHPUT_AUTO`, or positive integer| 1 | Specifies a number of GPU "execution" streams for the throughput mode (upper bound for a number of inference requests that can be executed simultaneously).
This option is can be used to decrease GPU stall time by providing more effective load from several streams. Increasing the number of streams usually is more effective for smaller topologies or smaller input sizes. Note that your application should provide enough parallel slack (e.g. running many inference requests) to leverage full GPU bandwidth. Additional streams consume several times more GPU memory, so make sure the system has enough memory available to suit parallel stream execution. Multiple streams might also put additional load on CPU. If CPU load increases, it can be regulated by setting an appropriate `KEY_CLDNN_PLUGIN_THROTTLE` option value (see above). If your target system has relatively weak CPU, keep throttling low.
The default value is 1, which implies latency-oriented behaviour.
`KEY_GPU_THROUGHPUT_AUTO` creates bare minimum of streams to improve the performance; this is the most portable option if you are not sure how many resources your target machine has (and what would be the optimal number of streams).
A positive integer value creates the requested number of streams. | +| `KEY_GPU_THROUGHPUT_STREAMS` | `KEY_GPU_THROUGHPUT_AUTO`, or positive integer| 1 | Specifies a number of GPU "execution" streams for the throughput mode (upper bound for a number of inference requests that can be executed simultaneously).
This option is can be used to decrease GPU stall time by providing more effective load from several streams. Increasing the number of streams usually is more effective for smaller topologies or smaller input sizes. Note that your application should provide enough parallel slack (e.g. running many inference requests) to leverage full GPU bandwidth. Additional streams consume several times more GPU memory, so make sure the system has enough memory available to suit parallel stream execution. Multiple streams might also put additional load on CPU. If CPU load increases, it can be regulated by setting an appropriate `KEY_CLDNN_PLUGIN_THROTTLE` option value (see above). If your target system has relatively weak CPU, keep throttling low.
The default value is 1, which implies latency-oriented behavior.
`KEY_GPU_THROUGHPUT_AUTO` creates bare minimum of streams to improve the performance; this is the most portable option if you are not sure how many resources your target machine has (and what would be the optimal number of streams).
A positive integer value creates the requested number of streams. | | `KEY_EXCLUSIVE_ASYNC_REQUESTS` | `YES` / `NO` | `NO` | Forces async requests (also from different executable networks) to execute serially.| ## Note on Debug Capabilities of the GPU Plugin diff --git a/docs/IE_DG/supported_plugins/HDDL.md b/docs/IE_DG/supported_plugins/HDDL.md index f935c42cc21a3e..9154f1d3f3039a 100644 --- a/docs/IE_DG/supported_plugins/HDDL.md +++ b/docs/IE_DG/supported_plugins/HDDL.md @@ -21,7 +21,7 @@ For the "Supported Networks", please reference to [MYRIAD Plugin](MYRIAD.md) See VPU common configuration parameters for the [VPU Plugins](VPU.md). When specifying key values as raw strings (that is, when using Python API), omit the `KEY_` prefix. -In addition to common parameters for Myriad plugin and HDDL plugin, HDDL plugin accepts the following options: +In addition to common parameters for MYRIAD plugin and HDDL plugin, HDDL plugin accepts the following options: | Parameter Name | Parameter Values | Default | Description | | :--- | :--- | :--- | :--- | diff --git a/docs/IE_DG/supported_plugins/MULTI.md b/docs/IE_DG/supported_plugins/MULTI.md index a3166c3de8e956..a6b4aaefc9f1c9 100644 --- a/docs/IE_DG/supported_plugins/MULTI.md +++ b/docs/IE_DG/supported_plugins/MULTI.md @@ -47,11 +47,13 @@ Inference Engine now features a dedicated API to enumerate devices and their cap ```sh ./hello_query_device Available devices: - Device: CPU + Device: CPU ... - Device: GPU + Device: GPU.0 ... - Device: HDDL + Device: GPU.1 +... + Device: HDDL ``` Simple programmatic way to enumerate the devices and use with the multi-device is as follows: diff --git a/docs/IE_DG/supported_plugins/VPU.md b/docs/IE_DG/supported_plugins/VPU.md index 7c04290f7dd16d..189a23b5a94f20 100644 --- a/docs/IE_DG/supported_plugins/VPU.md +++ b/docs/IE_DG/supported_plugins/VPU.md @@ -9,12 +9,12 @@ This chapter provides information on the Inference Engine plugins that enable in ## Known Layers Limitations -* `'ScaleShift'` layer is supported for zero value of `'broadcast'` attribute only. -* `'CTCGreedyDecoder'` layer works with `'ctc_merge_repeated'` attribute equal 1. -* `'DetectionOutput'` layer works with zero values of `'interpolate_orientation'` and `'num_orient_classes'` parameters only. -* `'MVN'` layer uses fixed value for `'eps'` parameters (1e-9). -* `'Normalize'` layer uses fixed value for `'eps'` parameters (1e-9) and is supported for zero value of `'across_spatial'` only. -* `'Pad'` layer works only with 4D tensors. +* `ScaleShift` layer is supported for zero value of `broadcast` attribute only. +* `CTCGreedyDecoder` layer works with `ctc_merge_repeated` attribute equal 1. +* `DetectionOutput` layer works with zero values of `interpolate_orientation` and `num_orient_classes` parameters only. +* `MVN` layer uses fixed value for `eps` parameters (1e-9). +* `Normalize` layer uses fixed value for `eps` parameters (1e-9) and is supported for zero value of `across_spatial` only. +* `Pad` layer works only with 4D tensors. ## Optimizations diff --git a/docs/Legal_Information.md b/docs/Legal_Information.md index 00c6cd968357e6..2f3526f2902677 100644 --- a/docs/Legal_Information.md +++ b/docs/Legal_Information.md @@ -4,9 +4,7 @@ This software and the related documents are Intel copyrighted materials, and you This document contains information on products, services and/or processes in development. All information provided here is subject to change without notice. Contact your Intel representative to obtain the latest forecast, schedule, specifications and roadmaps. The products and services described may contain defects or errors known as errata which may cause deviations from published specifications. Current characterized errata are available on request. Copies of documents which have an order number and are referenced in this document may be obtained by calling 1-800-548-4725 or by visiting [www.intel.com/design/literature.htm](https://www.intel.com/design/literature.htm). -Software and workloads used in performance tests may have been optimized for performance only on Intel microprocessors. - -Performance tests, such as SYSmark and MobileMark, are measured using specific computer systems, components, software, operations and functions. Any change to any of those factors may cause the results to vary. You should consult other information and performance tests to assist you in fully evaluating your contemplated purchases, including the performance of that product when combined with other products. For more complete information visit [www.intel.com/benchmarks](https://www.intel.com/benchmarks). +Performance varies by use, configuration and other factors. Learn more at [www.intel.com/PerformanceIndex](https://www.intel.com/PerformanceIndex). Performance results are based on testing as of dates shown in configurations and may not reflect all publicly available updates. See backup for configuration details. No product or component can be absolutely secure. @@ -14,7 +12,7 @@ Your costs and results may vary. Intel technologies may require enabled hardware, software or service activation. -© Intel Corporation. Intel, the Intel logo, and other Intel marks are trademarks of Intel Corporation or its subsidiaries. \*Other names and brands may be claimed as the property of others. +© Intel Corporation. Intel, the Intel logo, and other Intel marks are trademarks of Intel Corporation or its subsidiaries. \*Other names and brands may be claimed as the property of others. ## OpenVINO™ Logo To build equity around the project, the OpenVINO logo was created for both Intel and community usage. The logo may only be used to represent the OpenVINO toolkit and offerings built using the OpenVINO toolkit. diff --git a/docs/MO_DG/Deep_Learning_Model_Optimizer_DevGuide.md b/docs/MO_DG/Deep_Learning_Model_Optimizer_DevGuide.md index 8ce80da1d1579b..cd9245c3e69646 100644 --- a/docs/MO_DG/Deep_Learning_Model_Optimizer_DevGuide.md +++ b/docs/MO_DG/Deep_Learning_Model_Optimizer_DevGuide.md @@ -12,6 +12,13 @@ Model Optimizer produces an Intermediate Representation (IR) of the network, whi * .bin - Contains the weights and biases binary data. +> **TIP**: You also can work with the Model Optimizer inside the OpenVINO™ [Deep Learning Workbench](@ref workbench_docs_Workbench_DG_Introduction) (DL Workbench). +> [DL Workbench](@ref workbench_docs_Workbench_DG_Introduction) is a platform built upon OpenVINO™ and provides a web-based graphical environment that enables you to optimize, fine-tune, analyze, visualize, and compare +> performance of deep learning models on various Intel® architecture +> configurations. In the DL Workbench, you can use most of OpenVINO™ toolkit components. +>
+> Proceed to an [easy installation from Docker](@ref workbench_docs_Workbench_DG_Install_from_Docker_Hub) to get started. + ## What's New in the Model Optimizer in this Release? * Common changes: diff --git a/docs/MO_DG/prepare_model/Supported_Frameworks_Layers.md b/docs/MO_DG/prepare_model/Supported_Frameworks_Layers.md index 869cfa49d5e942..e938848a679444 100644 --- a/docs/MO_DG/prepare_model/Supported_Frameworks_Layers.md +++ b/docs/MO_DG/prepare_model/Supported_Frameworks_Layers.md @@ -108,6 +108,7 @@ Standard MXNet\* symbols: | SoftmaxActivation | No | | SoftmaxOutput | No | | SoftSign | No | +| Take | The attribute 'mode' is not supported | | Tile | No | | UpSampling | No | | Where | No | diff --git a/docs/MO_DG/prepare_model/convert_model/Convert_Model_From_Caffe.md b/docs/MO_DG/prepare_model/convert_model/Convert_Model_From_Caffe.md index cb111e9004bc4d..06ae438d9cd3c6 100644 --- a/docs/MO_DG/prepare_model/convert_model/Convert_Model_From_Caffe.md +++ b/docs/MO_DG/prepare_model/convert_model/Convert_Model_From_Caffe.md @@ -23,7 +23,7 @@ A summary of the steps for optimizing and deploying a model that was trained wit * **Object detection models:** * SSD300-VGG16, SSD500-VGG16 * Faster-RCNN - * RefineDet (Myriad plugin only) + * RefineDet (MYRIAD plugin only) * **Face detection models:** * VGG Face diff --git a/docs/MO_DG/prepare_model/convert_model/Convert_Model_From_TensorFlow.md b/docs/MO_DG/prepare_model/convert_model/Convert_Model_From_TensorFlow.md index 077e35db9d1569..7748206c36d09e 100644 --- a/docs/MO_DG/prepare_model/convert_model/Convert_Model_From_TensorFlow.md +++ b/docs/MO_DG/prepare_model/convert_model/Convert_Model_From_TensorFlow.md @@ -280,7 +280,7 @@ python3 mo_tf.py --input_model inception_v1.pb -b 1 --tensorflow_custom_operatio * Launching the Model Optimizer for Inception V1 frozen model and use custom sub-graph replacement file `transform.json` for model conversion. For more information about this feature, refer to [Sub-Graph Replacement in the Model Optimizer](../customize_model_optimizer/Subgraph_Replacement_Model_Optimizer.md). ```sh -python3 mo_tf.py --input_model inception_v1.pb -b 1 --tensorflow_use_custom_operations_config transform.json +python3 mo_tf.py --input_model inception_v1.pb -b 1 --transformations_config transform.json ``` * Launching the Model Optimizer for Inception V1 frozen model and dump information about the graph to TensorBoard log dir `/tmp/log_dir` diff --git a/docs/MO_DG/prepare_model/convert_model/tf_specific/Convert_EfficientDet_Models.md b/docs/MO_DG/prepare_model/convert_model/tf_specific/Convert_EfficientDet_Models.md index c58de18d8479d5..6362f018132c6c 100644 --- a/docs/MO_DG/prepare_model/convert_model/tf_specific/Convert_EfficientDet_Models.md +++ b/docs/MO_DG/prepare_model/convert_model/tf_specific/Convert_EfficientDet_Models.md @@ -38,6 +38,8 @@ python3 model_inspect.py --runmode=saved_model --model_name=efficientdet-d4 --c ``` As a result the frozen model file `savedmodeldir/efficientdet-d4_frozen.pb` will be generated. +> **NOTE**: For custom trained models, specify `--hparams` flag to `config.yaml` which was used during training. + > **NOTE:** If you see an error `AttributeError: module 'tensorflow_core.python.keras.api._v2.keras.initializers' has no attribute 'variance_scaling'` apply the fix from the [patch](https://github.com/google/automl/pull/846). ### Convert EfficientDet TensorFlow Model to the IR @@ -46,7 +48,7 @@ To generate the IR of the EfficientDet TensorFlow model, run:
```sh python3 $MO_ROOT/mo.py \ --input_model savedmodeldir/efficientdet-d4_frozen.pb \ ---tensorflow_use_custom_operations_config $MO_ROOT/extensions/front/tf/automl_efficientdet.json \ +--transformations_config $MO_ROOT/extensions/front/tf/automl_efficientdet.json \ --input_shape [1,$IMAGE_SIZE,$IMAGE_SIZE,3] \ --reverse_input_channels ``` @@ -56,7 +58,7 @@ EfficientDet models were trained with different input image sizes. To determine dictionary in the [hparams_config.py](https://github.com/google/automl/blob/96e1fee/efficientdet/hparams_config.py#L304) file. The attribute `image_size` specifies the shape to be specified for the model conversion. -The `tensorflow_use_custom_operations_config` command line parameter specifies the configuration json file containing hints +The `transformations_config` command line parameter specifies the configuration json file containing hints to the Model Optimizer on how to convert the model and trigger transformations implemented in the `$MO_ROOT/extensions/front/tf/AutomlEfficientDet.py`. The json file contains some parameters which must be changed if you train the model yourself and modified the `hparams_config` file or the parameters are different from the ones used for EfficientDet-D4. diff --git a/docs/MO_DG/prepare_model/convert_model/tf_specific/Convert_YOLO_From_Tensorflow.md b/docs/MO_DG/prepare_model/convert_model/tf_specific/Convert_YOLO_From_Tensorflow.md index 0073ac2f5490ca..99748b7b18f61a 100644 --- a/docs/MO_DG/prepare_model/convert_model/tf_specific/Convert_YOLO_From_Tensorflow.md +++ b/docs/MO_DG/prepare_model/convert_model/tf_specific/Convert_YOLO_From_Tensorflow.md @@ -91,7 +91,7 @@ To generate the IR of the YOLOv3 TensorFlow model, run:
```sh python3 mo_tf.py --input_model /path/to/yolo_v3.pb ---tensorflow_use_custom_operations_config $MO_ROOT/extensions/front/tf/yolo_v3.json +--transformations_config $MO_ROOT/extensions/front/tf/yolo_v3.json --batch 1 ``` @@ -99,18 +99,18 @@ To generate the IR of the YOLOv3-tiny TensorFlow model, run:
```sh python3 mo_tf.py --input_model /path/to/yolo_v3_tiny.pb ---tensorflow_use_custom_operations_config $MO_ROOT/extensions/front/tf/yolo_v3_tiny.json +--transformations_config $MO_ROOT/extensions/front/tf/yolo_v3_tiny.json --batch 1 ``` where: * `--batch` defines shape of model input. In the example, `--batch` is equal to 1, but you can also specify other integers larger than 1. -* `--tensorflow_use_custom_operations_config` adds missing `Region` layers to the model. In the IR, the `Region` layer has name `RegionYolo`. +* `--transformations_config` adds missing `Region` layers to the model. In the IR, the `Region` layer has name `RegionYolo`. > **NOTE:** The color channel order (RGB or BGR) of an input data should match the channel order of the model training dataset. If they are different, perform the `RGB<->BGR` conversion specifying the command-line parameter: `--reverse_input_channels`. Otherwise, inference results may be incorrect. For more information about the parameter, refer to **When to Reverse Input Channels** section of [Converting a Model Using General Conversion Parameters](../Converting_Model_General.md). -OpenVINO™ toolkit provides a demo that uses YOLOv3 model. For more information, refer to [Object Detection YOLO* V3 Demo, Async API Performance Showcase](@ref omz_demos_object_detection_demo_yolov3_async_README). +OpenVINO™ toolkit provides a demo that uses YOLOv3 model. For more information, refer to [Object Detection C++ Demo](@ref omz_demos_object_detection_demo_ssd_async_README). ## Convert YOLOv1 and YOLOv2 Models to the IR @@ -167,14 +167,14 @@ python3 ./mo_tf.py --input_model /.pb \ --batch 1 \ --scale 255 \ ---tensorflow_use_custom_operations_config /deployment_tools/model_optimizer/extensions/front/tf/.json +--transformations_config /deployment_tools/model_optimizer/extensions/front/tf/.json ``` where: * `--batch` defines shape of model input. In the example, `--batch` is equal to 1, but you can also specify other integers larger than 1. * `--scale` specifies scale factor that input values will be divided by. The model was trained with input values in the range `[0,1]`. OpenVINO™ toolkit samples read input images as values in `[0,255]` range, so the scale 255 must be applied. -* `--tensorflow_use_custom_operations_config` adds missing `Region` layers to the model. In the IR, the `Region` layer has name `RegionYolo`. +* `--transformations_config` adds missing `Region` layers to the model. In the IR, the `Region` layer has name `RegionYolo`. For other applicable parameters, refer to [Convert Model from TensorFlow](../Convert_Model_From_TensorFlow.md). > **NOTE:** The color channel order (RGB or BGR) of an input data should match the channel order of the model training dataset. If they are different, perform the `RGB<->BGR` conversion specifying the command-line parameter: `--reverse_input_channels`. Otherwise, inference results may be incorrect. For more information about the parameter, refer to **When to Reverse Input Channels** section of [Converting a Model Using General Conversion Parameters](../Converting_Model_General.md). diff --git a/docs/MO_DG/prepare_model/customize_model_optimizer/Subgraph_Replacement_Model_Optimizer.md b/docs/MO_DG/prepare_model/customize_model_optimizer/Subgraph_Replacement_Model_Optimizer.md index a3e6eda7756ad7..70bec8bdb4f91c 100644 --- a/docs/MO_DG/prepare_model/customize_model_optimizer/Subgraph_Replacement_Model_Optimizer.md +++ b/docs/MO_DG/prepare_model/customize_model_optimizer/Subgraph_Replacement_Model_Optimizer.md @@ -1,4 +1,4 @@ # Sub-Graph Replacement in the Model Optimizer {#openvino_docs_MO_DG_prepare_model_customize_model_optimizer_Subgraph_Replacement_Model_Optimizer} -The document has been deprecated. Refer to the [Model Optimizer Extensibility](Subgraph_Replacement_Model_Optimizer.md) +The document has been deprecated. Refer to the [Model Optimizer Extensibility](Customize_Model_Optimizer.md) for the up-to-date documentation. diff --git a/docs/Optimization_notice.md b/docs/Optimization_notice.md deleted file mode 100644 index 99f71b905cc6b5..00000000000000 --- a/docs/Optimization_notice.md +++ /dev/null @@ -1,3 +0,0 @@ -# Optimization Notice {#openvino_docs_Optimization_notice} - -![Optimization_notice](img/opt-notice-en_080411.gif) \ No newline at end of file diff --git a/docs/benchmarks/performance_benchmarks.md b/docs/benchmarks/performance_benchmarks.md index 9247d63541ba28..169c83c9bea947 100644 --- a/docs/benchmarks/performance_benchmarks.md +++ b/docs/benchmarks/performance_benchmarks.md @@ -26,127 +26,174 @@ Measuring inference performance involves many variables and is extremely use-cas \htmlonly - + \endhtmlonly \htmlonly - + \endhtmlonly \htmlonly - + \endhtmlonly \htmlonly - + \endhtmlonly \htmlonly - + \endhtmlonly \htmlonly - + \endhtmlonly \htmlonly - + \endhtmlonly \htmlonly - + \endhtmlonly \htmlonly - + \endhtmlonly \htmlonly - + \endhtmlonly \htmlonly - + \endhtmlonly \htmlonly - + \endhtmlonly \htmlonly - + \endhtmlonly \htmlonly - + \endhtmlonly \htmlonly - + \endhtmlonly ## Platform Configurations -Intel® Distribution of OpenVINO™ toolkit performance benchmark numbers are based on release 2021.1. +Intel® Distribution of OpenVINO™ toolkit performance benchmark numbers are based on release 2021.2. -Intel technologies’ features and benefits depend on system configuration and may require enabled hardware, software or service activation. Learn more at intel.com, or from the OEM or retailer. Performance results are based on testing as of September 25, 2020 and may not reflect all publicly available security updates. See configuration disclosure for details. No product can be absolutely secure. +Intel technologies’ features and benefits depend on system configuration and may require enabled hardware, software or service activation. Learn more at intel.com, or from the OEM or retailer. Performance results are based on testing as of December 9, 2020 and may not reflect all publicly available updates. See configuration disclosure for details. No product can be absolutely secure. -Software and workloads used in performance tests may have been optimized for performance only on Intel microprocessors. Performance tests, such as SYSmark and MobileMark, are measured using specific computer systems, components, software, operations and functions. Any change to any of those factors may cause the results to vary. You should consult other information and performance tests to assist you in fully evaluating your contemplated purchases, including the performance of that product when combined with other products. For more complete information, see [Performance Benchmark Test Disclosure](https://www.intel.com/content/www/us/en/benchmarks/benchmark.html). +Performance varies by use, configuration and other factors. Learn more at [www.intel.com/PerformanceIndex](https://www.intel.com/PerformanceIndex). Your costs and results may vary. © Intel Corporation. Intel, the Intel logo, and other Intel marks are trademarks of Intel Corporation or its subsidiaries. Other names and brands may be claimed as the property of others. -Optimization Notice: Intel’s compilers may or may not optimize to the same degree for non-Intel microprocessors for optimizations that are not unique to Intel microprocessors. These optimizations include SSE2, SSE3, and SSSE3 instruction sets and other optimizations. Intel does not guarantee the availability, functionality, or effectiveness of any optimization on microprocessors not manufactured by Intel. Microprocessor-dependent optimizations in this product are intended for use with Intel microprocessors. Certain optimizations not specific to Intel microarchitecture are reserved for Intel microprocessors. Please refer to the applicable product User and Reference Guides for more information regarding the specific instruction sets covered by this notice. [Notice Revision #2010804](https://software.intel.com/articles/optimization-notice). +Intel optimizations, for Intel compilers or other products, may not optimize to the same degree for non-Intel products. Testing by Intel done on: see test date for each HW platform below. **CPU Inference Engines** -| | Intel® Xeon® E-2124G | Intel® Xeon® Silver 4216R | Intel® Xeon® Gold 5218T | Intel® Xeon® Platinum 8270 | -| ------------------------------- | ----------------------| ---------------------------- | ---------------------------- | ---------------------------- | -| Motherboard | ASUS* WS C246 PRO | Intel® Server Board S2600STB | Intel® Server Board S2600STB | Intel® Server Board S2600STB | -| CPU | Intel® Xeon® E-2124G CPU @ 3.40GHz | Intel® Xeon® Silver 4216R CPU @ 2.20GHz | Intel® Xeon® Gold 5218T CPU @ 2.10GHz | Intel® Xeon® Platinum 8270 CPU @ 2.70GHz | -| Hyper Threading | OFF | ON | ON | ON | -| Turbo Setting | ON | ON | ON | ON | -| Memory | 2 x 16 GB DDR4 2666MHz| 12 x 32 GB DDR4 2666MHz | 12 x 32 GB DDR4 2666MHz | 12 x 32 GB DDR4 2933MHz | -| Operating System | Ubuntu* 18.04 LTS | Ubuntu* 18.04 LTS | Ubuntu* 18.04 LTS | Ubuntu* 18.04 LTS | -| Kernel Version | 5.3.0-24-generic | 5.3.0-24-generic | 5.3.0-24-generic | 5.3.0-24-generic | -| BIOS Vendor | American Megatrends Inc.* | Intel Corporation | Intel Corporation | Intel Corporation | -| BIOS Version | 0904 | SE5C620.86B.02.01.
0009.092820190230 | SE5C620.86B.02.01.
0009.092820190230 | SE5C620.86B.02.01.
0009.092820190230 | -| BIOS Release | April 12, 2019 | September 28, 2019 | September 28, 2019 | September 28, 2019 | -| BIOS Settings | Select optimized default settings,
save & exit | Select optimized default settings,
change power policy
to "performance",
save & exit | Select optimized default settings,
change power policy to "performance",
save & exit | Select optimized default settings,
change power policy to "performance",
save & exit | -| Batch size | 1 | 1 | 1 | 1 | -| Precision | INT8 | INT8 | INT8 | INT8 | -| Number of concurrent inference requests | 4 | 32 | 32 | 52 | -| Test Date | September 25, 2020 | September 25, 2020 | September 25, 2020 | September 25, 2020 | -| Power dissipation, TDP in Watt | [71](https://ark.intel.com/content/www/us/en/ark/products/134854/intel-xeon-e-2124g-processor-8m-cache-up-to-4-50-ghz.html#tab-blade-1-0-1) | [125](https://ark.intel.com/content/www/us/en/ark/products/193394/intel-xeon-silver-4216-processor-22m-cache-2-10-ghz.html#tab-blade-1-0-1) | [105](https://ark.intel.com/content/www/us/en/ark/products/193953/intel-xeon-gold-5218t-processor-22m-cache-2-10-ghz.html#tab-blade-1-0-1) | [205](https://ark.intel.com/content/www/us/en/ark/products/192482/intel-xeon-platinum-8270-processor-35-75m-cache-2-70-ghz.html#tab-blade-1-0-1) | -| CPU Price on September 29, 2020, USD
Prices may vary | [213](https://ark.intel.com/content/www/us/en/ark/products/134854/intel-xeon-e-2124g-processor-8m-cache-up-to-4-50-ghz.html) | [1,002](https://ark.intel.com/content/www/us/en/ark/products/193394/intel-xeon-silver-4216-processor-22m-cache-2-10-ghz.html) | [1,349](https://ark.intel.com/content/www/us/en/ark/products/193953/intel-xeon-gold-5218t-processor-22m-cache-2-10-ghz.html) | [7,405](https://ark.intel.com/content/www/us/en/ark/products/192482/intel-xeon-platinum-8270-processor-35-75m-cache-2-70-ghz.html) | +| | Intel® Xeon® E-2124G | Intel® Xeon® W1290P | Intel® Xeon® Silver 4216R | +| ------------------------------- | ---------------------- | --------------------------- | ---------------------------- | +| Motherboard | ASUS* WS C246 PRO | ASUS* WS W480-ACE | Intel® Server Board S2600STB | +| CPU | Intel® Xeon® E-2124G CPU @ 3.40GHz | Intel® Xeon® W-1290P CPU @ 3.70GHz | Intel® Xeon® Silver 4216R CPU @ 2.20GHz | +| Hyper Threading | OFF | ON | ON | +| Turbo Setting | ON | ON | ON | +| Memory | 2 x 16 GB DDR4 2666MHz | 4 x 16 GB DDR4 @ 2666MHz |12 x 32 GB DDR4 2666MHz | +| Operating System | Ubuntu* 18.04 LTS | Ubuntu* 18.04 LTS | Ubuntu* 18.04 LTS | +| Kernel Version | 5.3.0-24-generic | 5.3.0-24-generic | 5.3.0-24-generic | +| BIOS Vendor | American Megatrends Inc.* | American Megatrends Inc. | Intel Corporation | +| BIOS Version | 0904 | 607 | SE5C620.86B.02.01.
0009.092820190230 | +| BIOS Release | April 12, 2019 | May 29, 2020 | September 28, 2019 | +| BIOS Settings | Select optimized default settings,
save & exit | Select optimized default settings,
save & exit | Select optimized default settings,
change power policy
to "performance",
save & exit | +| Batch size | 1 | 1 | 1 +| Precision | INT8 | INT8 | INT8 +| Number of concurrent inference requests | 4 | 5 | 32 +| Test Date | December 9, 2020 | December 9, 2020 | December 9, 2020 +| Power dissipation, TDP in Watt | [71](https://ark.intel.com/content/www/us/en/ark/products/134854/intel-xeon-e-2124g-processor-8m-cache-up-to-4-50-ghz.html#tab-blade-1-0-1) | [125](https://ark.intel.com/content/www/us/en/ark/products/199336/intel-xeon-w-1290p-processor-20m-cache-3-70-ghz.html) | [125](https://ark.intel.com/content/www/us/en/ark/products/193394/intel-xeon-silver-4216-processor-22m-cache-2-10-ghz.html#tab-blade-1-0-1) | +| CPU Price on September 29, 2020, USD
Prices may vary | [213](https://ark.intel.com/content/www/us/en/ark/products/134854/intel-xeon-e-2124g-processor-8m-cache-up-to-4-50-ghz.html) | [539](https://ark.intel.com/content/www/us/en/ark/products/199336/intel-xeon-w-1290p-processor-20m-cache-3-70-ghz.html) |[1,002](https://ark.intel.com/content/www/us/en/ark/products/193394/intel-xeon-silver-4216-processor-22m-cache-2-10-ghz.html) | **CPU Inference Engines (continue)** -| | Intel® Core™ i5-8500 | Intel® Core™ i7-8700T | Intel® Core™ i9-10920X | 11th Gen Intel® Core™ i5-1145G7E | -| -------------------- | ---------------------------------- | ----------------------------------- |--------------------------------------|-----------------------------------| -| Motherboard | ASUS* PRIME Z370-A | GIGABYTE* Z370M DS3H-CF | ASUS* PRIME X299-A II | Intel Corporation
internal/Reference Validation Platform | -| CPU | Intel® Core™ i5-8500 CPU @ 3.00GHz | Intel® Core™ i7-8700T CPU @ 2.40GHz | Intel® Core™ i9-10920X CPU @ 3.50GHz | 11th Gen Intel® Core™ i5-1145G7E @ 2.60GHz | -| Hyper Threading | OFF | ON | ON | ON | -| Turbo Setting | ON | ON | ON | ON | -| Memory | 2 x 16 GB DDR4 2666MHz | 4 x 16 GB DDR4 2400MHz | 4 x 16 GB DDR4 2666MHz | 2 x 8 GB DDR4 3200MHz | -| Operating System | Ubuntu* 18.04 LTS | Ubuntu* 18.04 LTS | Ubuntu* 18.04 LTS | Ubuntu* 18.04 LTS | -| Kernel Version | 5.3.0-24-generic | 5.0.0-23-generic | 5.0.0-23-generic | 5.8.0-05-generic | -| BIOS Vendor | American Megatrends Inc.* | American Megatrends Inc.* | American Megatrends Inc.* | Intel Corporation | -| BIOS Version | 2401 | F11 | 505 | TGLIFUI1.R00.3243.A04.2006302148 | -| BIOS Release | July 12, 2019 | March 13, 2019 | December 17, 2019 | June 30, 2020 | -| BIOS Settings | Select optimized default settings,
save & exit | Select optimized default settings,
set OS type to "other",
save & exit | Default Settings | Default Settings | -| Batch size | 1 | 1 | 1 | 1 | -| Precision | INT8 | INT8 | INT8 | INT8 | -| Number of concurrent inference requests | 3 | 4 | 24 | 4 | -| Test Date | September 25, 2020 | September 25, 2020 | September 25, 2020 | September 25, 2020 | -| Power dissipation, TDP in Watt | [65](https://ark.intel.com/content/www/us/en/ark/products/129939/intel-core-i5-8500-processor-9m-cache-up-to-4-10-ghz.html#tab-blade-1-0-1) | [35](https://ark.intel.com/content/www/us/en/ark/products/129948/intel-core-i7-8700t-processor-12m-cache-up-to-4-00-ghz.html#tab-blade-1-0-1) | [165](https://ark.intel.com/content/www/us/en/ark/products/198012/intel-core-i9-10920x-x-series-processor-19-25m-cache-3-50-ghz.html) | [28](https://ark.intel.com/content/www/us/en/ark/products/208081/intel-core-i5-1145g7e-processor-8m-cache-up-to-4-10-ghz.html) | -| CPU Price on September 29, 2020, USD
Prices may vary | [192](https://ark.intel.com/content/www/us/en/ark/products/129939/intel-core-i5-8500-processor-9m-cache-up-to-4-10-ghz.html) | [303](https://ark.intel.com/content/www/us/en/ark/products/129948/intel-core-i7-8700t-processor-12m-cache-up-to-4-00-ghz.html) | [700](https://ark.intel.com/content/www/us/en/ark/products/198012/intel-core-i9-10920x-x-series-processor-19-25m-cache-3-50-ghz.html) | [309](https://mysamples.intel.com/SAM_U_Product/ProductDetail.aspx?InputMMID=99A3D1&RequestID=0&ProductID=1213750) | +| | Intel® Xeon® Gold 5218T | Intel® Xeon® Platinum 8270 | +| ------------------------------- | ---------------------------- | ---------------------------- | +| Motherboard | Intel® Server Board S2600STB | Intel® Server Board S2600STB | +| CPU | Intel® Xeon® Gold 5218T CPU @ 2.10GHz | Intel® Xeon® Platinum 8270 CPU @ 2.70GHz | +| Hyper Threading | ON | ON | +| Turbo Setting | ON | ON | +| Memory | 12 x 32 GB DDR4 2666MHz | 12 x 32 GB DDR4 2933MHz | +| Operating System | Ubuntu* 18.04 LTS | Ubuntu* 18.04 LTS | +| Kernel Version | 5.3.0-24-generic | 5.3.0-24-generic | +| BIOS Vendor | Intel Corporation | Intel Corporation | +| BIOS Version | SE5C620.86B.02.01.
0009.092820190230 | SE5C620.86B.02.01.
0009.092820190230 | +| BIOS Release | September 28, 2019 | September 28, 2019 | +| BIOS Settings | Select optimized default settings,
change power policy to "performance",
save & exit | Select optimized default settings,
change power policy to "performance",
save & exit | +| Batch size | 1 | 1 | +| Precision | INT8 | INT8 | +| Number of concurrent inference requests |32 | 52 | +| Test Date | December 9, 2020 | December 9, 2020 | +| Power dissipation, TDP in Watt | [105](https://ark.intel.com/content/www/us/en/ark/products/193953/intel-xeon-gold-5218t-processor-22m-cache-2-10-ghz.html#tab-blade-1-0-1) | [205](https://ark.intel.com/content/www/us/en/ark/products/192482/intel-xeon-platinum-8270-processor-35-75m-cache-2-70-ghz.html#tab-blade-1-0-1) | +| CPU Price on September 29, 2020, USD
Prices may vary | [1,349](https://ark.intel.com/content/www/us/en/ark/products/193953/intel-xeon-gold-5218t-processor-22m-cache-2-10-ghz.html) | [7,405](https://ark.intel.com/content/www/us/en/ark/products/192482/intel-xeon-platinum-8270-processor-35-75m-cache-2-70-ghz.html) | + + +**CPU Inference Engines (continue)** + +| | Intel® Core™ i7-8700T | Intel® Core™ i9-10920X | Intel® Core™ i9-10900TE
(iEi Flex BX210AI)| 11th Gen Intel® Core™ i7-1185G7 | +| -------------------- | ----------------------------------- |--------------------------------------| ---------------------------------------------|---------------------------------| +| Motherboard | GIGABYTE* Z370M DS3H-CF | ASUS* PRIME X299-A II | iEi / B595 | Intel Corporation
internal/Reference
Validation Platform | +| CPU | Intel® Core™ i7-8700T CPU @ 2.40GHz | Intel® Core™ i9-10920X CPU @ 3.50GHz | Intel® Core™ i9-10900TE CPU @ 1.80GHz | 11th Gen Intel® Core™ i7-1185G7 @ 3.00GHz | +| Hyper Threading | ON | ON | ON | ON | +| Turbo Setting | ON | ON | ON | ON | +| Memory | 4 x 16 GB DDR4 2400MHz | 4 x 16 GB DDR4 2666MHz | 2 x 8 GB DDR4 @ 2400MHz | 2 x 8 GB DDR4 3200MHz | +| Operating System | Ubuntu* 18.04 LTS | Ubuntu* 18.04 LTS | Ubuntu* 18.04 LTS | Ubuntu* 18.04 LTS | +| Kernel Version | 5.3.0-24-generic | 5.3.0-24-generic | 5.8.0-05-generic | 5.8.0-05-generic | +| BIOS Vendor | American Megatrends Inc.* | American Megatrends Inc.* | American Megatrends Inc.* | Intel Corporation | +| BIOS Version | F11 | 505 | Z667AR10 | TGLSFWI1.R00.3425.
A00.2010162309 | +| BIOS Release | March 13, 2019 | December 17, 2019 | July 15, 2020 | October 16, 2020 | +| BIOS Settings | Select optimized default settings,
set OS type to "other",
save & exit | Default Settings | Default Settings | Default Settings | +| Batch size | 1 | 1 | 1 | 1 | +| Precision | INT8 | INT8 | INT8 | INT8 | +| Number of concurrent inference requests |4 | 24 | 5 | 4 | +| Test Date | December 9, 2020 | December 9, 2020 | December 9, 2020 | December 9, 2020 | +| Power dissipation, TDP in Watt | [35](https://ark.intel.com/content/www/us/en/ark/products/129948/intel-core-i7-8700t-processor-12m-cache-up-to-4-00-ghz.html#tab-blade-1-0-1) | [165](https://ark.intel.com/content/www/us/en/ark/products/198012/intel-core-i9-10920x-x-series-processor-19-25m-cache-3-50-ghz.html) | [35](https://ark.intel.com/content/www/us/en/ark/products/203901/intel-core-i9-10900te-processor-20m-cache-up-to-4-60-ghz.html) | [28](https://ark.intel.com/content/www/us/en/ark/products/208664/intel-core-i7-1185g7-processor-12m-cache-up-to-4-80-ghz-with-ipu.html#tab-blade-1-0-1) | +| CPU Price on September 29, 2020, USD
Prices may vary | [303](https://ark.intel.com/content/www/us/en/ark/products/129948/intel-core-i7-8700t-processor-12m-cache-up-to-4-00-ghz.html) | [700](https://ark.intel.com/content/www/us/en/ark/products/198012/intel-core-i9-10920x-x-series-processor-19-25m-cache-3-50-ghz.html) | [444](https://ark.intel.com/content/www/us/en/ark/products/203901/intel-core-i9-10900te-processor-20m-cache-up-to-4-60-ghz.html) | [426](https://ark.intel.com/content/www/us/en/ark/products/208664/intel-core-i7-1185g7-processor-12m-cache-up-to-4-80-ghz-with-ipu.html#tab-blade-1-0-0) | + + +**CPU Inference Engines (continue)** + +| | Intel® Core™ i5-8500 | Intel® Core™ i5-10500TE | Intel® Core™ i5-10500TE
(iEi Flex-BX210AI)| +| -------------------- | ---------------------------------- | ----------------------------------- |-------------------------------------- | +| Motherboard | ASUS* PRIME Z370-A | GIGABYTE* Z490 AORUS PRO AX | iEi / B595 | +| CPU | Intel® Core™ i5-8500 CPU @ 3.00GHz | Intel® Core™ i5-10500TE CPU @ 2.30GHz | Intel® Core™ i5-10500TE CPU @ 2.30GHz | +| Hyper Threading | OFF | ON | ON | +| Turbo Setting | ON | ON | ON | +| Memory | 2 x 16 GB DDR4 2666MHz | 2 x 16 GB DDR4 @ 2666MHz | 1 x 8 GB DDR4 @ 2400MHz | +| Operating System | Ubuntu* 18.04 LTS | Ubuntu* 18.04 LTS | Ubuntu* 18.04 LTS | +| Kernel Version | 5.3.0-24-generic | 5.3.0-24-generic | 5.3.0-24-generic | +| BIOS Vendor | American Megatrends Inc.* | American Megatrends Inc.* | American Megatrends Inc.* | +| BIOS Version | 2401 | F3 | Z667AR10 | +| BIOS Release | July 12, 2019 | March 25, 2020 | July 17, 2020 | +| BIOS Settings | Select optimized default settings,
save & exit | Select optimized default settings,
set OS type to "other",
save & exit | Default Settings | +| Batch size | 1 | 1 | 1 | +| Precision | INT8 | INT8 | INT8 | +| Number of concurrent inference requests | 3 | 4 | 4 | +| Test Date | December 9, 2020 | December 9, 2020 | December 9, 2020 | +| Power dissipation, TDP in Watt | [65](https://ark.intel.com/content/www/us/en/ark/products/129939/intel-core-i5-8500-processor-9m-cache-up-to-4-10-ghz.html#tab-blade-1-0-1)| [35](https://ark.intel.com/content/www/us/en/ark/products/203891/intel-core-i5-10500te-processor-12m-cache-up-to-3-70-ghz.html) | [35](https://ark.intel.com/content/www/us/en/ark/products/203891/intel-core-i5-10500te-processor-12m-cache-up-to-3-70-ghz.html) | +| CPU Price on September 29, 2020, USD
Prices may vary | [192](https://ark.intel.com/content/www/us/en/ark/products/129939/intel-core-i5-8500-processor-9m-cache-up-to-4-10-ghz.html) | [195](https://ark.intel.com/content/www/us/en/ark/products/203891/intel-core-i5-10500te-processor-12m-cache-up-to-3-70-ghz.html) | [195](https://ark.intel.com/content/www/us/en/ark/products/203891/intel-core-i5-10500te-processor-12m-cache-up-to-3-70-ghz.html) | + **CPU Inference Engines (continue)** @@ -166,7 +213,7 @@ Testing by Intel done on: see test date for each HW platform below. | Batch size | 1 | 1 | | Precision | INT8 | INT8 | | Number of concurrent inference requests | 4 | 4 | -| Test Date | September 25, 2020 | September 25, 2020 | +| Test Date | December 9, 2020 | December 9, 2020 | | Power dissipation, TDP in Watt | [9.5](https://ark.intel.com/content/www/us/en/ark/products/96485/intel-atom-x5-e3940-processor-2m-cache-up-to-1-80-ghz.html) | [65](https://ark.intel.com/content/www/us/en/ark/products/126688/intel-core-i3-8100-processor-6m-cache-3-60-ghz.html#tab-blade-1-0-1)| | CPU Price on September 29, 2020, USD
Prices may vary | [34](https://ark.intel.com/content/www/us/en/ark/products/96485/intel-atom-x5-e3940-processor-2m-cache-up-to-1-80-ghz.html) | [117](https://ark.intel.com/content/www/us/en/ark/products/126688/intel-core-i3-8100-processor-6m-cache-3-60-ghz.html) | @@ -174,7 +221,7 @@ Testing by Intel done on: see test date for each HW platform below. **Accelerator Inference Engines** -| | Intel® Neural Compute Stick 2 | Intel® Vision Accelerator Design
with Intel® Movidius™ VPUs (Uzel* UI-AR8) | +| | Intel® Neural Compute Stick 2 | Intel® Vision Accelerator Design
with Intel® Movidius™ VPUs (Mustang-V100-MX8) | | --------------------------------------- | ------------------------------------- | ------------------------------------- | | VPU | 1 X Intel® Movidius™ Myriad™ X MA2485 | 8 X Intel® Movidius™ Myriad™ X MA2485 | | Connection | USB 2.0/3.0 | PCIe X4 | @@ -182,7 +229,7 @@ Testing by Intel done on: see test date for each HW platform below. | Precision | FP16 | FP16 | | Number of concurrent inference requests | 4 | 32 | | Power dissipation, TDP in Watt | 2.5 | [30](https://www.mouser.com/ProductDetail/IEI/MUSTANG-V100-MX8-R10?qs=u16ybLDytRaZtiUUvsd36w%3D%3D) | -| CPU Price, USD
Prices may vary | [69](https://ark.intel.com/content/www/us/en/ark/products/140109/intel-neural-compute-stick-2.html) (from September 29, 2020) | [768](https://www.mouser.com/ProductDetail/IEI/MUSTANG-V100-MX8-R10?qs=u16ybLDytRaZtiUUvsd36w%3D%3D) (from May 15, 2020) | +| CPU Price, USD
Prices may vary | [69](https://ark.intel.com/content/www/us/en/ark/products/140109/intel-neural-compute-stick-2.html) (from December 9, 2020) | [214](https://www.arrow.com/en/products/mustang-v100-mx8-r10/iei-technology?gclid=Cj0KCQiA5bz-BRD-ARIsABjT4ng1v1apmxz3BVCPA-tdIsOwbEjTtqnmp_rQJGMfJ6Q2xTq6ADtf9OYaAhMUEALw_wcB) (from December 9, 2020) | | Host Computer | Intel® Core™ i7 | Intel® Core™ i5 | | Motherboard | ASUS* Z370-A II | Uzelinfo* / US-E1300 | | CPU | Intel® Core™ i7-8700 CPU @ 3.20GHz | Intel® Core™ i5-6600 CPU @ 3.30GHz | @@ -194,9 +241,9 @@ Testing by Intel done on: see test date for each HW platform below. | BIOS Vendor | American Megatrends Inc.* | American Megatrends Inc.* | | BIOS Version | 411 | 5.12 | | BIOS Release | September 21, 2018 | September 21, 2018 | -| Test Date | September 25, 2020 | September 25, 2020 | +| Test Date | December 9, 2020 | December 9, 2020 | -Please follow this link for more detailed configuration descriptions: [Configuration Details](https://docs.openvinotoolkit.org/resources/benchmark_files/system_configurations_2021.1.html) +Please follow this link for more detailed configuration descriptions: [Configuration Details](https://docs.openvinotoolkit.org/resources/benchmark_files/system_configurations_2021.2.html) \htmlonly + + + + + + + + + + + Page-1 + + + + Rounded Rectangle + Landmarks detector + + + + + + + + + + + + + + + + + + + + + + Landmarksdetector + + Rounded Rectangle.4 + Generate BG mask + + + + + + + + + + + + + + + + + + + + + + GenerateBG mask + + Rounded Rectangle.5 + Input + + + + + + + + + + + + + + + + + + + + + + Input + + Rounded Rectangle.7 + Unsharp mask + + + + + + + + + + + + + + + + + + + + + + Unsharpmask + + Rounded Rectangle.8 + Bilateral filter + + + + + + + + + + + + + + + + + + + + + + Bilateralfilter + + Rounded Rectangle.9 + Face detector + + + + + + + + + + + + + + + + + + + + + + Facedetector + + Rounded Rectangle.10 + Generate sharp mask + + + + + + + + + + + + + + + + + + + + + + Generatesharp mask + + Rounded Rectangle.11 + Output + + + + + + + + + + + + + + + + + + + + + + Output + + Rounded Rectangle.13 + Generate blur mask + + + + + + + + + + + + + + + + + + + + + + Generateblur mask + + Circle + + + + + + + Circle.15 + * + + + + * + + Circle.16 + + + + + + + Circle.17 + * + + + + * + + Circle.18 + + + + + + + + + + + + Circle.20 + + + + + + + Circle.21 + * + + + + * + + Dynamic connector + + + + Dynamic connector.27 + + + + Dynamic connector.28 + + + + Dynamic connector.29 + + + + Dynamic connector.30 + + + + Dynamic connector.31 + + + + Dynamic connector.32 + + + + Dynamic connector.33 + + + + Dynamic connector.34 + + + + Dynamic connector.35 + + + + Dynamic connector.36 + + + + Dynamic connector.37 + + + + Dynamic connector.38 + + + + Dynamic connector.39 + + + + Dynamic connector.40 + + + + Rectangle + For each face + + + + + + + For each face + + diff --git a/docs/img/gapi_face_beautification_example.jpg b/docs/img/gapi_face_beautification_example.jpg new file mode 100644 index 00000000000000..eb3df6b58785bf --- /dev/null +++ b/docs/img/gapi_face_beautification_example.jpg @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:fb32d3db8768ff157daeff999cc7f4361d2bca866ed6dc95b8f78d8cc62ae208 +size 176525 diff --git a/docs/img/gapi_kernel_implementation_hierarchy.png b/docs/img/gapi_kernel_implementation_hierarchy.png new file mode 100644 index 00000000000000..f910caa840d191 --- /dev/null +++ b/docs/img/gapi_kernel_implementation_hierarchy.png @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:f291422f562825d4c5eee718b7c22e472b02a5a0a9c0be01d59b6b7cd8d756b1 +size 14603 diff --git a/docs/img/gapi_programming_model.png b/docs/img/gapi_programming_model.png new file mode 100644 index 00000000000000..2ac10dcc82c13f --- /dev/null +++ b/docs/img/gapi_programming_model.png @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:925f70ede92d71e16733d78e003f62cd8bfdee0790bddbf2b7ce4fc8ef3f44bf +size 171518 diff --git a/docs/img/int8vsfp32.png b/docs/img/int8vsfp32.png index a47ffa2f1c96ff..b4889ea2252a97 100644 --- a/docs/img/int8vsfp32.png +++ b/docs/img/int8vsfp32.png @@ -1,3 +1,3 @@ version https://git-lfs.github.com/spec/v1 -oid sha256:304869bcbea000f6dbf46dee7900ff01aa61a75a3787969cc307f2f54d57263c -size 32185 +oid sha256:0109b9cbc2908f786f6593de335c725f8ce5c800f37a7d79369408cc47eb8471 +size 25725 diff --git a/docs/install_guides/PAC_Configure_2018R5.md b/docs/install_guides/PAC_Configure_2018R5.md index 8177adb315d2b5..1378c0c6f2cb09 100644 --- a/docs/install_guides/PAC_Configure_2018R5.md +++ b/docs/install_guides/PAC_Configure_2018R5.md @@ -236,11 +236,7 @@ classification_sample_async -m squeezenet1.1.xml -i $IE_INSTALL/demo/car.png -d classification_sample_async -m squeezenet1.1.xml -i $IE_INSTALL/demo/car.png -d HETERO:FPGA,CPU -ni 100 ``` -Congratulations, You are done with the Intel® Distribution of OpenVINO™ toolkit installation for FPGA. To learn more about how the Intel® Distribution of OpenVINO™ toolkit works, the Hello World tutorial and are other resources are provided below. - -## Hello World Face Detection Tutorial - -Use the [Intel® Distribution of OpenVINO™ toolkit with FPGA Hello World Face Detection Exercise](https://github.com/fritzboyle/openvino-with-fpga-hello-world-face-detection) to learn more about how the software and hardware work together. +Congratulations, You are done with the Intel® Distribution of OpenVINO™ toolkit installation for FPGA. ## Additional Resources diff --git a/docs/install_guides/PAC_Configure_2019RX.md b/docs/install_guides/PAC_Configure_2019RX.md index 867215540e4881..5e43876ec20e00 100644 --- a/docs/install_guides/PAC_Configure_2019RX.md +++ b/docs/install_guides/PAC_Configure_2019RX.md @@ -237,12 +237,7 @@ classification_sample_async -m squeezenet1.1.xml -i $IE_INSTALL/demo/car.png classification_sample_async -m squeezenet1.1.xml -i $IE_INSTALL/demo/car.png -d HETERO:FPGA,CPU ``` -Congratulations, You are done with the Intel® Distribution of OpenVINO™ toolkit installation for FPGA. To learn more about how the Intel® Distribution of OpenVINO™ toolkit works, the Hello World tutorial and are other resources are provided below. - -## Hello World Face Detection Tutorial - -Use the [Intel® Distribution of OpenVINO™ toolkit with FPGA Hello World Face Detection Exercise](https://github.com/fritzboyle/openvino-with-fpga-hello-world-face-detection) to learn more about how the software and hardware work together. - +Congratulations, You are done with the Intel® Distribution of OpenVINO™ toolkit installation for FPGA. ## Additional Resources Intel® Distribution of OpenVINO™ toolkit home page: [https://software.intel.com/en-us/openvino-toolkit](https://software.intel.com/en-us/openvino-toolkit) diff --git a/docs/install_guides/VisionAcceleratorFPGA_Configure_2018R5.md b/docs/install_guides/VisionAcceleratorFPGA_Configure_2018R5.md index c0082ef86f62a2..328c824fa35967 100644 --- a/docs/install_guides/VisionAcceleratorFPGA_Configure_2018R5.md +++ b/docs/install_guides/VisionAcceleratorFPGA_Configure_2018R5.md @@ -319,11 +319,7 @@ The throughput on FPGA is listed and may show a lower FPS. This is due to the in ./classification_sample_async -i car.png -m ~/squeezenet1.1_FP16/squeezenet1.1.xml -d HETERO:FPGA,CPU -ni 100 ``` -Congratulations, you are done with the Intel® Distribution of OpenVINO™ toolkit installation for FPGA. To learn more about how the Intel® Distribution of OpenVINO™ toolkit works, the Hello World tutorial and are other resources are provided below. - -## Hello World Face Detection Tutorial - -Use the [Intel® Distribution of OpenVINO™ toolkit with FPGA Hello World Face Detection Exercise](https://github.com/fritzboyle/openvino-with-fpga-hello-world-face-detection) to learn more about how the software and hardware work together. +Congratulations, you are done with the Intel® Distribution of OpenVINO™ toolkit installation for FPGA. ## Additional Resources diff --git a/docs/install_guides/VisionAcceleratorFPGA_Configure_2019R1.md b/docs/install_guides/VisionAcceleratorFPGA_Configure_2019R1.md index 640f5387c38fa7..8de131e8c45161 100644 --- a/docs/install_guides/VisionAcceleratorFPGA_Configure_2019R1.md +++ b/docs/install_guides/VisionAcceleratorFPGA_Configure_2019R1.md @@ -270,11 +270,7 @@ The throughput on FPGA is listed and may show a lower FPS. This is due to the in ./classification_sample_async -i car.png -m ~/squeezenet1.1_FP16/squeezenet1.1.xml -d HETERO:FPGA,CPU -ni 100 ``` -Congratulations, you are done with the Intel® Distribution of OpenVINO™ toolkit installation for FPGA. To learn more about how the Intel® Distribution of OpenVINO™ toolkit works, the Hello World tutorial and are other resources are provided below. - -## Hello World Face Detection Tutorial - -Use the [Intel® Distribution of OpenVINO™ toolkit with FPGA Hello World Face Detection Exercise](https://github.com/fritzboyle/openvino-with-fpga-hello-world-face-detection) to learn more about how the software and hardware work together. +Congratulations, you are done with the Intel® Distribution of OpenVINO™ toolkit installation for FPGA. ## Additional Resources diff --git a/docs/install_guides/VisionAcceleratorFPGA_Configure_2019R3.md b/docs/install_guides/VisionAcceleratorFPGA_Configure_2019R3.md index 369555f35f2f8a..06d8ebbc86939a 100644 --- a/docs/install_guides/VisionAcceleratorFPGA_Configure_2019R3.md +++ b/docs/install_guides/VisionAcceleratorFPGA_Configure_2019R3.md @@ -270,11 +270,7 @@ Note the CPU throughput in Frames Per Second (FPS). This tells you how quickly t ``` The throughput on FPGA is listed and may show a lower FPS. This may be due to the initialization time. To account for that, increase the number of iterations or batch size when deploying to get a better sense of the speed the FPGA can run inference at. -Congratulations, you are done with the Intel® Distribution of OpenVINO™ toolkit installation for FPGA. To learn more about how the Intel® Distribution of OpenVINO™ toolkit works, the Hello World tutorial and are other resources are provided below. - -## Hello World Face Detection Tutorial - -Use the [Intel® Distribution of OpenVINO™ toolkit with FPGA Hello World Face Detection Exercise](https://github.com/fritzboyle/openvino-with-fpga-hello-world-face-detection) to learn more about how the software and hardware work together. +Congratulations, you are done with the Intel® Distribution of OpenVINO™ toolkit installation for FPGA. ## Additional Resources diff --git a/docs/install_guides/installing-openvino-apt.md b/docs/install_guides/installing-openvino-apt.md index 08249588623ac6..812c6195f2c9a5 100644 --- a/docs/install_guides/installing-openvino-apt.md +++ b/docs/install_guides/installing-openvino-apt.md @@ -129,6 +129,5 @@ sudo apt autoremove intel-openvino--ubuntu-.< - [Model Optimizer Developer Guide](../MO_DG/Deep_Learning_Model_Optimizer_DevGuide.md). - [Inference Engine Developer Guide](../IE_DG/Deep_Learning_Inference_Engine_DevGuide.md). - For more information on Sample Applications, see the [Inference Engine Samples Overview](../IE_DG/Samples_Overview.md). -- For information on Inference Engine Tutorials, see the [Inference Tutorials](https://github.com/intel-iot-devkit/inference-tutorials-generic). - For IoT Libraries & Code Samples see the [Intel® IoT Developer Kit](https://github.com/intel-iot-devkit). diff --git a/docs/install_guides/installing-openvino-conda.md b/docs/install_guides/installing-openvino-conda.md index c491c862a682dc..a53997c4901fb5 100644 --- a/docs/install_guides/installing-openvino-conda.md +++ b/docs/install_guides/installing-openvino-conda.md @@ -49,7 +49,7 @@ Now you can start to develop and run your application. ## Known Issues and Limitations - You cannot use Python bindings included in Intel® Distribution of OpenVINO™ toolkit with [Anaconda* distribution](https://www.anaconda.com/products/individual/) -- You cannot use Python OpenVINO™ bindings included in Anaconda* package with official [Python distribution](https://https://www.python.org/). +- You cannot use Python OpenVINO™ bindings included in Anaconda* package with official [Python distribution](https://www.python.org/). ## Additional Resources @@ -59,6 +59,5 @@ Now you can start to develop and run your application. - [Model Optimizer Developer Guide](../MO_DG/Deep_Learning_Model_Optimizer_DevGuide.md). - [Inference Engine Developer Guide](../IE_DG/Deep_Learning_Inference_Engine_DevGuide.md). - For more information on Sample Applications, see the [Inference Engine Samples Overview](../IE_DG/Samples_Overview.md). -- For information on Inference Engine Tutorials, see the [Inference Tutorials](https://github.com/intel-iot-devkit/inference-tutorials-generic). - Intel® Distribution of OpenVINO™ toolkit Anaconda* home page: [https://anaconda.org/intel/openvino-ie4py](https://anaconda.org/intel/openvino-ie4py) diff --git a/docs/install_guides/installing-openvino-docker-linux.md b/docs/install_guides/installing-openvino-docker-linux.md index 9d73e742d8aaae..ff5acfbe0635b2 100644 --- a/docs/install_guides/installing-openvino-docker-linux.md +++ b/docs/install_guides/installing-openvino-docker-linux.md @@ -59,7 +59,7 @@ RUN apt-get update && \ curl -L "https://github.com/intel/compute-runtime/releases/download/19.41.14441/intel-igc-core_1.0.2597_amd64.deb" --output "intel-igc-core_1.0.2597_amd64.deb" && \ curl -L "https://github.com/intel/compute-runtime/releases/download/19.41.14441/intel-igc-opencl_1.0.2597_amd64.deb" --output "intel-igc-opencl_1.0.2597_amd64.deb" && \ curl -L "https://github.com/intel/compute-runtime/releases/download/19.41.14441/intel-opencl_19.41.14441_amd64.deb" --output "intel-opencl_19.41.14441_amd64.deb" && \ - curl -L "https://github.com/intel/compute-runtime/releases/download/19.41.14441/intel-ocloc_19.04.12237_amd64.deb" --output "intel-ocloc_19.04.12237_amd64.deb" && \ + curl -L "https://github.com/intel/compute-runtime/releases/download/19.41.14441/intel-ocloc_19.41.14441_amd64.deb" --output "intel-ocloc_19.04.12237_amd64.deb" && \ dpkg -i /tmp/opencl/*.deb && \ ldconfig && \ rm /tmp/opencl diff --git a/docs/install_guides/installing-openvino-linux.md b/docs/install_guides/installing-openvino-linux.md index 9ceed341bda9b8..df4c0413152a97 100644 --- a/docs/install_guides/installing-openvino-linux.md +++ b/docs/install_guides/installing-openvino-linux.md @@ -31,6 +31,19 @@ The Intel® Distribution of OpenVINO™ toolkit for Linux\*: | [Documentation for Pre-Trained Models ](@ref omz_models_intel_index) | Documentation for the pre-trained models available in the [Open Model Zoo repo](https://github.com/opencv/open_model_zoo). | | Deep Learning Streamer (DL Streamer) | Streaming analytics framework, based on GStreamer, for constructing graphs of media analytics components. For the DL Streamer documentation, see [DL Streamer Samples](@ref gst_samples_README), [API Reference](https://openvinotoolkit.github.io/dlstreamer_gst/), [Elements](https://github.com/opencv/gst-video-analytics/wiki/Elements), [Tutorial](https://github.com/opencv/gst-video-analytics/wiki/DL%20Streamer%20Tutorial). | +**Could Be Optionally Installed** + +[Deep Learning Workbench](@ref workbench_docs_Workbench_DG_Introduction) (DL Workbench) is a platform built upon OpenVINO™ and provides a web-based graphical environment that enables you to optimize, fine-tune, analyze, visualize, and compare performance of deep learning models on various Intel® architecture +configurations. In the DL Workbench, you can use most of OpenVINO™ toolkit components: +* [Model Downloader](@ref omz_tools_downloader_README) +* [Intel® Open Model Zoo](@ref omz_models_intel_index) +* [Model Optimizer](../MO_DG/Deep_Learning_Model_Optimizer_DevGuide.md) +* [Post-training Optimization Tool](@ref pot_README) +* [Accuracy Checker](@ref omz_tools_accuracy_checker_README) +* [Benchmark Tool](../../inference-engine/samples/benchmark_app/README.md) + +Proceed to an [easy installation from Docker](@ref workbench_docs_Workbench_DG_Install_from_Docker_Hub) to get started. + ## System Requirements **Hardware** @@ -65,14 +78,12 @@ This guide provides step-by-step instructions on how to install the Intel® Dist 2. Install External software dependencies 3. Set the OpenVINO™ Environment Variables: Optional Update to .bashrc. 4. Configure the Model Optimizer -5. Run the Verification Scripts to Verify Installation and Compile Samples -6. Steps for Intel® Processor Graphics (GPU) -7. Steps for Intel® Neural Compute Stick 2 -8. Steps for Intel® Vision Accelerator Design with Intel® Movidius™ VPU
+5. Steps for Intel® Processor Graphics (GPU) +6. Steps for Intel® Neural Compute Stick 2 +7. Steps for Intel® Vision Accelerator Design with Intel® Movidius™ VPU
After installing your Intel® Movidius™ VPU, you will return to this guide to complete OpenVINO™ installation. -9. Run a Sample Application -10. Uninstall the Intel® Distribution of OpenVINO™ Toolkit. -11. Use the Face Detection Tutorial +8. Get Started with Code Samples and Demo Applications +9. Steps to uninstall the Intel® Distribution of OpenVINO™ Toolkit. ## Install the Intel® Distribution of OpenVINO™ Toolkit Core Components @@ -98,15 +109,10 @@ cd l_openvino_toolkit_p_ ``` If you have a previous version of the Intel Distribution of OpenVINO toolkit installed, rename or delete these two directories: -- `~/inference_engine_samples_build` -- `~/openvino_models` - - **Installation Notes:** - - Choose an installation option and run the related script as root. - - You can use either a GUI installation wizard or command line instructions (CLI). - - Screenshots are provided for the GUI, but not for CLI. The following information also applies to CLI and will be helpful to your installation where you will be presented with the same choices and tasks. - -5. Choose your installation option: + - `~/inference_engine_samples_build` + - `~/openvino_models` +5. Choose your installation option and run the related script as root to use either a GUI installation wizard or command line instructions (CLI).
+ Screenshots are provided for the GUI, but not for CLI. The following information also applies to CLI and will be helpful to your installation where you will be presented with the same choices and tasks. - **Option 1:** GUI Installation Wizard: ```sh sudo ./install_GUI.sh @@ -120,27 +126,22 @@ sudo ./install.sh sudo sed -i 's/decline/accept/g' silent.cfg sudo ./install.sh -s silent.cfg ``` -You can select which OpenVINO components will be installed by modifying the `COMPONENTS` parameter in the `silent.cfg` file. For example, to install only CPU runtime for the Inference Engine, set -`COMPONENTS=intel-openvino-ie-rt-cpu__x86_64` in `silent.cfg`. -To get a full list of available components for installation, run the `./install.sh --list_components` command from the unpacked OpenVINO™ toolkit package. - -6. Follow the instructions on your screen. Watch for informational -messages such as the following in case you must complete additional -steps: -![](../img/openvino-install-linux-01.png) - + You can select which OpenVINO components will be installed by modifying the `COMPONENTS` parameter in the `silent.cfg` file. For example, to install only CPU runtime for the Inference Engine, set `COMPONENTS=intel-openvino-ie-rt-cpu__x86_64` in `silent.cfg`. To get a full list of available components for installation, run the `./install.sh --list_components` command from the unpacked OpenVINO™ toolkit package. +6. Follow the instructions on your screen. Watch for informational messages such as the following in case you must complete additional steps: + ![](../img/openvino-install-linux-01.png) 7. If you select the default options, the **Installation summary** GUI screen looks like this: -![](../img/openvino-install-linux-02.png) -**Optional:** You can choose **Customize** to change the installation directory or the components you want to install: -![](../img/openvino-install-linux-03.png) -By default, the Intel® Distribution of OpenVINO™ is installed to the following directory, referred to as ``: - - For root or administrator: `/opt/intel/openvino_/` - - For regular users: `/home//intel/openvino_/` -For simplicity, a symbolic link to the latest installation is also created: `/opt/intel/openvino_2021/`. + ![](../img/openvino-install-linux-02.png) + By default, the Intel® Distribution of OpenVINO™ is installed to the following directory, referred to as ``: + * For root or administrator: `/opt/intel/openvino_/` + * For regular users: `/home//intel/openvino_/` + For simplicity, a symbolic link to the latest installation is also created: `/opt/intel/openvino_2021/`. + +8. **Optional**: You can choose **Customize** to change the installation directory or the components you want to install: +> **NOTE**: If there is an OpenVINO™ toolkit version previously installed on your system, the installer will use the same destination directory for next installations. If you want to install a newer version to a different directory, you need to uninstall the previously installed versions. + ![](../img/openvino-install-linux-03.png) > **NOTE**: The Intel® Media SDK component is always installed in the `/opt/intel/mediasdk` directory regardless of the OpenVINO installation path chosen. -8. A Complete screen indicates that the core components have been installed: - -![](../img/openvino-install-linux-04.png) +9. A Complete screen indicates that the core components have been installed: + ![](../img/openvino-install-linux-04.png) The first core components are installed. Continue to the next section to install additional dependencies. @@ -266,51 +267,15 @@ cd /opt/intel/openvino_2021/deployment_tools/model_optimizer/install_prerequisit ``` The Model Optimizer is configured for one or more frameworks. -You are ready to compile the samples by running the verification scripts. - -## Run the Verification Scripts to Verify Installation - -> **IMPORTANT**: This section is required. In addition to confirming your installation was successful, demo scripts perform other steps, such as setting up your computer to use the Inference Engine samples. +You have completed all required installation, configuration and build steps in this guide to use your CPU to work with your trained models. -To verify the installation and compile two samples, use the steps below to run the verification applications provided with the product on the CPU. - -> **NOTE:** To run the demo applications on Intel® Processor Graphics or Intel® Neural Compute Stick 2 devices, make sure you first completed the additional Steps for Intel® Processor Graphics (GPU) or Steps for Intel® Neural Compute Stick 2. - -1. Go to the **Inference Engine demo** directory: -```sh -cd /opt/intel/openvino_2021/deployment_tools/demo -``` - -2. Run the **Image Classification verification script**: -```sh -./demo_squeezenet_download_convert_run.sh -``` - This verification script downloads a SqueezeNet model, uses the Model Optimizer to convert the model to the .bin and .xml Intermediate Representation (IR) files. The Inference Engine requires this model conversion so it can use the IR as input and achieve optimum performance on Intel hardware.
- This verification script builds the [Image Classification Sample Async](../../inference-engine/samples/classification_sample_async/README.md) application and run it with the `car.png` image located in the demo directory. When the verification script completes, you will have the label and confidence for the top-10 categories: - ![](../img/image_classification_script_output_lnx.png) - -3. Run the **Inference Pipeline verification script**: -```sh -./demo_security_barrier_camera.sh -``` - This script downloads three pre-trained model IRs, builds the [Security Barrier Camera Demo](@ref omz_demos_security_barrier_camera_demo_README) application, and runs it with the downloaded models and the `car_1.bmp` image from the `demo` directory to show an inference pipeline. The verification script uses vehicle recognition in which vehicle attributes build on each other to narrow in on a specific attribute.
- First, an object is identified as a vehicle. This identification is used as input to the next model, which identifies specific vehicle attributes, including the license plate. Finally, the attributes identified as the license plate are used as input to the third model, which recognizes specific characters in the license plate.
- When the verification script completes, you will see an image that displays the resulting frame with detections rendered as bounding boxes, and text: - ![](../img/inference_pipeline_script_lnx.png) - -4. Close the image viewer window to complete the verification script. - - -To learn about the verification scripts, see the `README.txt` file in `/opt/intel/openvino_2021/deployment_tools/demo`. - -For a description of the Intel Distribution of OpenVINO™ pre-trained object detection and object recognition models, see [Overview of OpenVINO™ Toolkit Pre-Trained Models](@ref omz_models_intel_index). - -You have completed all required installation, configuration and build steps in this guide to use your CPU to work with your trained models. -To use other hardware, see; +To enable inference on other hardware, see: - Steps for Intel® Processor Graphics (GPU) - Steps for Intel® Neural Compute Stick 2 - Steps for Intel® Vision Accelerator Design with Intel® Movidius™ VPUs
+Or proceed to the Get Started to get started with running code samples and demo applications. + ## Steps for Intel® Processor Graphics (GPU) The steps in this section are required only if you want to enable the toolkit components to use processor graphics (GPU) on your system. @@ -323,11 +288,10 @@ cd /opt/intel/openvino_2021/install_dependencies/ ```sh sudo -E su ``` -3. Install the **Intel® Graphics Compute Runtime for OpenCL™** driver components required to use the GPU plugin and write custom layers for Intel® Integrated Graphics. Run the installation script: +3. Install the **Intel® Graphics Compute Runtime for OpenCL™** driver components required to use the GPU plugin and write custom layers for Intel® Integrated Graphics. The drivers are not included in the package, to install it, make sure you have the internet connection and run the installation script: ```sh ./install_NEO_OCL_driver.sh ``` - The drivers are not included in the package and the script downloads them. Make sure you have the internet connection for this step.
The script compares the driver version on the system to the current version. If the driver version on the system is higher or equal to the current version, the script does not install a new driver. If the version of the driver is lower than the current version, the script uninstalls the lower and installs the current version with your permission: ![](../img/NEO_check_agreement.png) @@ -335,9 +299,13 @@ not install a new driver. If the version of the driver is lower than the current ```sh Add OpenCL user to video group ``` - Ignore this suggestion and continue. + Ignore this suggestion and continue.
You can also find the most recent version of the driver, installation procedure and other information in the [https://github.com/intel/compute-runtime/](https://github.com/intel/compute-runtime/) repository. + 4. **Optional** Install header files to allow compiling a new code. You can find the header files at [Khronos OpenCL™ API Headers](https://github.com/KhronosGroup/OpenCL-Headers.git). +You've completed all required configuration steps to perform inference on processor graphics. +Proceed to the Get Started to get started with running code samples and demo applications. + ## Steps for Intel® Neural Compute Stick 2 These steps are only required if you want to perform inference on Intel® Movidius™ NCS powered by the Intel® Movidius™ Myriad™ 2 VPU or Intel® Neural Compute Stick 2 powered by the Intel® Movidius™ Myriad™ X VPU. See also the [Get Started page for Intel® Neural Compute Stick 2:](https://software.intel.com/en-us/neural-compute-stick/get-started) @@ -348,20 +316,23 @@ sudo usermod -a -G users "$(whoami)" ``` Log out and log in for it to take effect. 2. To perform inference on Intel® Neural Compute Stick 2, install the USB rules as follows: -```sh -sudo cp /opt/intel/openvino_2021/inference_engine/external/97-myriad-usbboot.rules /etc/udev/rules.d/ -``` -```sh -sudo udevadm control --reload-rules -``` -```sh -sudo udevadm trigger -``` -```sh -sudo ldconfig -``` + ```sh + sudo cp /opt/intel/openvino_2021/inference_engine/external/97-myriad-usbboot.rules /etc/udev/rules.d/ + ``` + ```sh + sudo udevadm control --reload-rules + ``` + ```sh + sudo udevadm trigger + ``` + ```sh + sudo ldconfig + ``` > **NOTE**: You may need to reboot your machine for this to take effect. +You've completed all required configuration steps to perform inference on Intel® Neural Compute Stick 2. +Proceed to the Get Started to get started with running code samples and demo applications. + ## Steps for Intel® Vision Accelerator Design with Intel® Movidius™ VPUs To install and configure your Intel® Vision Accelerator Design with Intel® Movidius™ VPUs, see the [Intel® Vision Accelerator Design with Intel® Movidius™ VPUs Configuration Guide](installing-openvino-linux-ivad-vpu.md). @@ -385,61 +356,14 @@ cd /opt/intel/openvino_2021/deployment_tools/demo ./demo_security_barrier_camera.sh -d HDDL ``` -## Run a Sample Application - -> **IMPORTANT**: This section requires that you have [Run the Verification Scripts to Verify Installation](#run-the-demos). This script builds the Image Classification sample application and downloads and converts the required Caffe* Squeezenet model to an IR. - -In this section you will run the Image Classification sample application, with the Caffe* Squeezenet1.1 model on three types of Intel® hardware: CPU, GPU and VPUs. - -Image Classification sample application binary file was automatically built and the FP16 model IR files are created when you [Ran the Image Classification Verification Script](#run-the-image-classification-verification-script). - -The Image Classification sample application binary file located in the `/home//inference_engine_samples_build/intel64/Release` directory. -The Caffe* Squeezenet model IR files (`.bin` and `.xml`) are located in the `/home//openvino_models/ir/public/squeezenet1.1/FP16/` directory. - -> **NOTE**: If you installed the Intel® Distribution of OpenVINO™ to the non-default install directory, replace `/opt/intel` with the directory in which you installed the software. - -To run the sample application: - -1. Set up environment variables: -```sh -source /opt/intel/openvino_2021/bin/setupvars.sh -``` -2. Go to the samples build directory: -```sh -cd ~/inference_engine_samples_build/intel64/Release -``` -3. Run the sample executable with specifying the `car.png` file from the `demo` directory as an input image, the IR of your FP16 model and a plugin for a hardware device to perform inference on. -> **NOTE**: Running the sample application on hardware other than CPU requires performing [additional hardware configuration steps](#optional-steps). - - - **For CPU**: - ```sh - ./classification_sample_async -i /opt/intel/openvino_2021/deployment_tools/demo/car.png -m ~/openvino_models/ir/public/squeezenet1.1/FP16/squeezenet1.1.xml -d CPU - ``` - - - **For GPU**: - ```sh - ./classification_sample_async -i /opt/intel/openvino_2021/deployment_tools/demo/car.png -m ~/openvino_models/ir/public/squeezenet1.1/FP16/squeezenet1.1.xml -d GPU - ``` - - - **For MYRIAD**: - > **NOTE**: Running inference on Intel® Neural Compute Stick 2 with the MYRIAD plugin requires performing [additional hardware configuration steps](#additional-NCS-steps). - ```sh - ./classification_sample_async -i /opt/intel/openvino_2021/deployment_tools/demo/car.png -m ~/openvino_models/ir/public/squeezenet1.1/FP16/squeezenet1.1.xml -d MYRIAD - ``` - - - **For HDDL**: - > **NOTE**: Running inference on Intel® Vision Accelerator Design with Intel® Movidius™ VPUs with the HDDL plugin requires performing [additional hardware configuration steps](installing-openvino-linux-ivad-vpu.md) - ```sh - ./classification_sample_async -i /opt/intel/openvino_2021/deployment_tools/demo/car.png -m ~/openvino_models/ir/public/squeezenet1.1/FP16/squeezenet1.1.xml -d HDDL - ``` - -For information on Sample Applications, see the [Inference Engine Samples Overview](../IE_DG/Samples_Overview.md). - -Congratulations, you have finished the installation of the Intel® Distribution of OpenVINO™ toolkit for Linux*. To learn more about how the Intel® Distribution of OpenVINO™ toolkit works, the Hello World tutorial and other resources are provided below. +You've completed all required configuration steps to perform inference on Intel® Vision Accelerator Design with Intel® Movidius™ VPUs. +Proceed to the Get Started to get started with running code samples and demo applications. -## Hello World Face Detection Tutorial +## Get Started -See the [OpenVINO™ Hello World Face Detection Exercise](https://github.com/intel-iot-devkit/inference-tutorials-generic). +Now you are ready to get started. To continue, see the following pages: +* [OpenVINO™ Toolkit Overview](../index.md) +* [Get Started Guide for Linux](../get_started/get_started_linux.md) to learn the basic OpenVINO™ toolkit workflow and run code samples and demo applications with pre-trained models on different inference devices. ## Uninstall the Intel® Distribution of OpenVINO™ Toolkit Choose one of the options provided below to uninstall the Intel® Distribution of OpenVINO™ Toolkit from your system. @@ -492,7 +416,6 @@ trusted-host = mirrors.aliyun.com - [Inference Engine Developer Guide](../IE_DG/Deep_Learning_Inference_Engine_DevGuide.md). - For more information on Sample Applications, see the [Inference Engine Samples Overview](../IE_DG/Samples_Overview.md). - For information on a set of pre-trained models, see the [Overview of OpenVINO™ Toolkit Pre-Trained Models](@ref omz_models_intel_index) -- For information on Inference Engine Tutorials, see the [Inference Tutorials](https://github.com/intel-iot-devkit/inference-tutorials-generic) - For IoT Libraries and Code Samples see the [Intel® IoT Developer Kit](https://github.com/intel-iot-devkit). To learn more about converting models, go to: diff --git a/docs/install_guides/installing-openvino-macos.md b/docs/install_guides/installing-openvino-macos.md index a3e081e7c8e212..9489d3a3732a69 100644 --- a/docs/install_guides/installing-openvino-macos.md +++ b/docs/install_guides/installing-openvino-macos.md @@ -31,6 +31,19 @@ The following components are installed by default: | Additional Tools | A set of tools to work with your models including [Accuracy Checker utility](@ref omz_tools_accuracy_checker_README), [Post-Training Optimization Tool Guide](@ref pot_README), [Model Downloader](@ref omz_tools_downloader_README) and other | | [Documentation for Pre-Trained Models ](@ref omz_models_intel_index) | Documentation for the pre-trained models available in the [Open Model Zoo repo](https://github.com/opencv/open_model_zoo) | +**Could Be Optionally Installed** + +[Deep Learning Workbench](@ref workbench_docs_Workbench_DG_Introduction) (DL Workbench) is a platform built upon OpenVINO™ and provides a web-based graphical environment that enables you to optimize, fine-tune, analyze, visualize, and compare performance of deep learning models on various Intel® architecture +configurations. In the DL Workbench, you can use most of OpenVINO™ toolkit components: +* [Model Downloader](@ref omz_tools_downloader_README) +* [Intel® Open Model Zoo](@ref omz_models_intel_index) +* [Model Optimizer](../MO_DG/Deep_Learning_Model_Optimizer_DevGuide.md) +* [Post-training Optimization Tool](@ref pot_README) +* [Accuracy Checker](@ref omz_tools_accuracy_checker_README) +* [Benchmark Tool](../../inference-engine/samples/benchmark_app/README.md) + +Proceed to an [easy installation from Docker](@ref workbench_docs_Workbench_DG_Install_from_Docker_Hub) to get started. + ## Development and Target Platform The development and target platforms have the same requirements, but you can select different components during the installation, based on your intended use. @@ -64,9 +77,9 @@ The following steps will be covered: 1. Install the Intel® Distribution of OpenVINO™ Toolkit . 2. Set the OpenVINO environment variables and (optional) Update to .bash_profile. -4. Configure the Model Optimizer. -5. Run verification scripts to verify installation and compile samples. -6. Uninstall the Intel® Distribution of OpenVINO™ Toolkit. +3. Configure the Model Optimizer. +4. Get Started with Code Samples and Demo Applications. +5. Uninstall the Intel® Distribution of OpenVINO™ Toolkit. ## Install the Intel® Distribution of OpenVINO™ toolkit Core Components @@ -93,7 +106,7 @@ The disk image is mounted to `/Volumes/m_openvino_toolkit_p_` and autom ![](../img/openvino-install-macos-01.png) - The default installation directory path depends on the privileges you choose for the installation. + The default installation directory path depends on the privileges you choose for the installation. 5. Click **Next** and follow the instructions on your screen. @@ -104,18 +117,16 @@ The disk image is mounted to `/Volumes/m_openvino_toolkit_p_` and autom 8. The **Installation summary** screen shows you the default component set to install: ![](../img/openvino-install-macos-03.png) + By default, the Intel® Distribution of OpenVINO™ is installed to the following directory, referred to as ``: - By default, the Intel® Distribution of OpenVINO™ is installed to the following directory, referred to as ``: - -* For root or administrator: `/opt/intel/openvino_/` -* For regular users: `/home//intel/openvino_/` - -For simplicity, a symbolic link to the latest installation is also created: `/home//intel/openvino_2021/`. + * For root or administrator: `/opt/intel/openvino_/` + * For regular users: `/home//intel/openvino_/` + For simplicity, a symbolic link to the latest installation is also created: `/home//intel/openvino_2021/`. 9. If needed, click **Customize** to change the installation directory or the components you want to install: - ![](../img/openvino-install-macos-04.png) - - Click **Next** to save the installation options and show the Installation summary screen. + ![](../img/openvino-install-macos-04.png) + > **NOTE**: If there is an OpenVINO™ toolkit version previously installed on your system, the installer will use the same destination directory for next installations. If you want to install a newer version to a different directory, you need to uninstall the previously installed versions. + Click **Next** to save the installation options and show the Installation summary screen. 10. On the **Installation summary** screen, press **Install** to begin the installation. @@ -228,55 +239,11 @@ Configure individual frameworks separately **ONLY** if you did not select **Opti The Model Optimizer is configured for one or more frameworks. -You are ready to verify the installation by running the verification scripts. - -## Run the Verification Scripts to Verify Installation and Compile Samples - -> **NOTES**: -> - The steps shown here assume you used the default installation directory to install the OpenVINO toolkit. If you installed the software to a directory other than `/opt/intel/`, update the directory path with the location where you installed the toolkit. -> - If you installed the product as a root user, you must switch to the root mode before you continue: `sudo -i`. - -To verify the installation and compile two Inference Engine samples, run the verification applications provided with the product on the CPU: - -### Run the Image Classification Verification Script - -1. Go to the **Inference Engine demo** directory: - ```sh - cd /opt/intel/openvino_2021/deployment_tools/demo - ``` - -2. Run the **Image Classification verification script**: - ```sh - ./demo_squeezenet_download_convert_run.sh - ``` - -The Image Classification verification script downloads a public SqueezeNet Caffe* model and runs the Model Optimizer to convert the model to `.bin` and `.xml` Intermediate Representation (IR) files. The Inference Engine requires this model conversion so it can use the IR as input and achieve optimum performance on Intel hardware. - -This verification script creates the directory `/home//inference_engine_samples/`, builds the [Image Classification Sample](../../inference-engine/samples/classification_sample_async/README.md) application and runs with the model IR and `car.png` image located in the `demo` directory. When the verification script completes, you will have the label and confidence for the top-10 categories: +You have completed all required installation, configuration and build steps in this guide to use your CPU to work with your trained models. -![](../img/image_classification_script_output_lnx.png) +To enable inference on Intel® Neural Compute Stick 2, see the Steps for Intel® Neural Compute Stick 2. -For a brief description of the Intermediate Representation `.bin` and `.xml` files, see [Configuring the Model Optimizer](#configure-the-model-optimizer). - -This script is complete. Continue to the next section to run the Inference Pipeline verification script. - -### Run the Inference Pipeline Verification Script - -While still in `/opt/intel/openvino_2021/deployment_tools/demo/`, run the Inference Pipeline verification script: - ```sh - ./demo_security_barrier_camera.sh - ``` - -This verification script downloads three pre-trained model IRs, builds the [Security Barrier Camera Demo](@ref omz_demos_security_barrier_camera_demo_README) application and runs it with the downloaded models and the `car_1.bmp` image from the `demo` directory to show an inference pipeline. The verification script uses vehicle recognition in which vehicle attributes build on each other to narrow in on a specific attribute. - -First, an object is identified as a vehicle. This identification is used as input to the next model, which identifies specific vehicle attributes, including the license plate. Finally, the attributes identified as the license plate are used as input to the third model, which recognizes specific characters in the license plate. - -When the verification script completes, you will see an image that displays the resulting frame with detections rendered as bounding boxes, and text: -![](../img/inference_pipeline_script_mac.png) - -Close the image viewer screen to end the demo. - -**Congratulations**, you have completed the Intel® Distribution of OpenVINO™ 2020.1 installation for macOS. To learn more about what you can do with the Intel® Distribution of OpenVINO™ toolkit, see the additional resources provided below. +Or proceed to the Get Started to get started with running code samples and demo applications. ## Steps for Intel® Neural Compute Stick 2 @@ -291,9 +258,14 @@ For example, to install the `libusb` library using Homebrew\*, use the following brew install libusb ``` -## Hello World Tutorials +You've completed all required configuration steps to perform inference on your Intel® Neural Compute Stick 2. +Proceed to the Get Started to get started with running code samples and demo applications. + +## Get Started -Visit the Intel Distribution of OpenVINO Toolkit [Inference Tutorials for Face Detection and Car Detection Exercises](https://github.com/intel-iot-devkit/inference-tutorials-generic/tree/openvino_toolkit_r3_0) +Now you are ready to get started. To continue, see the following pages: +* [OpenVINO™ Toolkit Overview](../index.md) +* [Get Started Guide for Windows](../get_started/get_started_macos.md) to learn the basic OpenVINO™ toolkit workflow and run code samples and demo applications with pre-trained models on different inference devices. ## Uninstall the Intel® Distribution of OpenVINO™ Toolkit diff --git a/docs/install_guides/installing-openvino-windows.md b/docs/install_guides/installing-openvino-windows.md index af6c16247cb234..8de98761d15781 100644 --- a/docs/install_guides/installing-openvino-windows.md +++ b/docs/install_guides/installing-openvino-windows.md @@ -26,9 +26,7 @@ Your installation is complete when these are all completed: 4. Configure the Model Optimizer -5. Run two Verification Scripts to Verify Installation - -6. Optional:  +5. Optional:  - Install the Intel® Graphics Driver for Windows* @@ -36,7 +34,9 @@ Your installation is complete when these are all completed: - Update Windows* environment variables -7. Uninstall the Intel® Distribution of OpenVINO™ Toolkit +Also, the following steps will be covered in the guide: +- Get Started with Code Samples and Demo Applications +- Uninstall the Intel® Distribution of OpenVINO™ Toolkit ### About the Intel® Distribution of OpenVINO™ toolkit @@ -65,6 +65,19 @@ The following components are installed by default: | Additional Tools | A set of tools to work with your models including [Accuracy Checker utility](@ref omz_tools_accuracy_checker_README), [Post-Training Optimization Tool Guide](@ref pot_README), [Model Downloader](@ref omz_tools_downloader_README) and other | | [Documentation for Pre-Trained Models ](@ref omz_models_intel_index) | Documentation for the pre-trained models available in the [Open Model Zoo repo](https://github.com/opencv/open_model_zoo) | +**Could Be Optionally Installed** + +[Deep Learning Workbench](@ref workbench_docs_Workbench_DG_Introduction) (DL Workbench) is a platform built upon OpenVINO™ and provides a web-based graphical environment that enables you to optimize, fine-tune, analyze, visualize, and compare performance of deep learning models on various Intel® architecture +configurations. In the DL Workbench, you can use most of OpenVINO™ toolkit components: +* [Model Downloader](@ref omz_tools_downloader_README) +* [Intel® Open Model Zoo](@ref omz_models_intel_index) +* [Model Optimizer](../MO_DG/Deep_Learning_Model_Optimizer_DevGuide.md) +* [Post-training Optimization Tool](@ref pot_README) +* [Accuracy Checker](@ref omz_tools_accuracy_checker_README) +* [Benchmark Tool](../../inference-engine/samples/benchmark_app/README.md) + +Proceed to an [easy installation from Docker](@ref workbench_docs_Workbench_DG_Install_from_Docker_Hub) to get started. + ### System Requirements **Hardware** @@ -99,29 +112,20 @@ The following components are installed by default: ### Install the Intel® Distribution of OpenVINO™ toolkit Core Components -1. If you have not downloaded the Intel® Distribution of OpenVINO™ toolkit, [download the latest version](http://software.intel.com/en-us/openvino-toolkit/choose-download/free-download-windows). By default, the file is saved to the `Downloads` directory as `w_openvino_toolkit_p_.exe`. - -2. Go to the `Downloads` folder and double-click `w_openvino_toolkit_p_.exe`. A window opens to let you choose your installation directory and components. The default installation directory is `C:\Program Files (x86)\Intel\openvino_`, for simplicity, a shortcut to the latest installation is also created: `C:\Program Files (x86)\Intel\openvino_2021`. If you choose a different installation directory, the installer will create the directory for you: - +1. If you have not downloaded the Intel® Distribution of OpenVINO™ toolkit, [download the latest version](https://software.intel.com/content/www/us/en/develop/tools/openvino-toolkit/download.html). By default, the file is saved to the `Downloads` directory as `w_openvino_toolkit_p_.exe`. +2. Go to the `Downloads` folder and double-click `w_openvino_toolkit_p_.exe`. A window opens to let you choose your installation directory and components. ![](../img/openvino-install-windows-01.png) - + The default installation directory is `C:\Program Files (x86)\Intel\openvino_`, for simplicity, a shortcut to the latest installation is also created: `C:\Program Files (x86)\Intel\openvino_2021`. If you choose a different installation directory, the installer will create the directory for you. + > **NOTE**: If there is an OpenVINO™ toolkit version previously installed on your system, the installer will use the same destination directory for next installations. If you want to install a newer version to a different directory, you need to uninstall the previously installed versions. 3. Click **Next**. - 4. You are asked if you want to provide consent to gather information. Choose the option of your choice. Click **Next**. - 5. If you are missing external dependencies, you will see a warning screen. Write down the dependencies you are missing. **You need to take no other action at this time**. After installing the Intel® Distribution of OpenVINO™ toolkit core components, install the missing dependencies. The screen example below indicates you are missing two dependencies: - ![](../img/openvino-install-windows-02.png) - 6. Click **Next**. - 7. When the first part of installation is complete, the final screen informs you that the core components have been installed and additional steps still required: - - ![](../img/openvino-install-windows-03.png) - + ![](../img/openvino-install-windows-03.png) 8. Click **Finish** to close the installation wizard. A new browser window opens to the next section of the installation guide to set the environment variables. You are in the same document. The new window opens in case you ran the installation without first opening this installation guide. - 9. If the installation indicated you must install dependencies, install them first. If there are no missing dependencies, you can go ahead and set the environment variables. ### Set the Environment Variables @@ -139,14 +143,14 @@ setupvars.bat (Optional): OpenVINO toolkit environment variables are removed when you close the Command Prompt window. As an option, you can permanently set the environment variables manually. +> **NOTE**: If you see an error indicating Python is not installed when you know you installed it, your computer might not be able to find the program. For the instructions to add Python to your system environment variables, see Update Your Windows Environment Variables. + The environment variables are set. Continue to the next section to configure the Model Optimizer. ## Configure the Model Optimizer > **IMPORTANT**: These steps are required. You must configure the Model Optimizer for at least one framework. The Model Optimizer will fail if you do not complete the steps in this section. -> **NOTE**: If you see an error indicating Python is not installed when you know you installed it, your computer might not be able to find the program. For the instructions to add Python to your system environment variables, see Update Your Windows Environment Variables. - The Model Optimizer is a key component of the Intel® Distribution of OpenVINO™ toolkit. You cannot do inference on your trained model without running the model through the Model Optimizer. When you run a pre-trained model through the Model Optimizer, your output is an Intermediate Representation (IR) of the network. The IR is a pair of files that describe the whole model: - `.xml`: Describes the network topology @@ -234,89 +238,25 @@ The Model Optimizer is configured for one or more frameworks. Success is indicat ![](../img/Configure-MO.PNG) -You are ready to use two short demos to see the results of running the Intel Distribution of OpenVINO toolkit and to verify your installation was successful. The demo scripts are required since they perform additional configuration steps. Continue to the next section. - -If you want to use a GPU or VPU, or update your Windows* environment variables, read through the Optional Steps section. - - -## Use Verification Scripts to Verify Your Installation - -> **IMPORTANT**: This section is required. In addition to confirming your installation was successful, demo scripts perform other steps, such as setting up your computer to use the Inference Engine samples. - -> **NOTE**: -> The paths in this section assume you used the default installation directory. If you used a directory other than `C:\Program Files (x86)\Intel`, update the directory with the location where you installed the software. -To verify the installation and compile two samples, run the verification applications provided with the product on the CPU: - -1. Open a command prompt window. - -2. Go to the Inference Engine demo directory:
- ```sh - cd C:\Program Files (x86)\Intel\openvino_2021\deployment_tools\demo\ - ``` - -3. Run the verification scripts by following the instructions in the next section. - - -### Run the Image Classification Verification Script - -To run the script, start the `demo_squeezenet_download_convert_run.bat` file: -```sh -demo_squeezenet_download_convert_run.bat -``` - -This script downloads a SqueezeNet model, uses the Model Optimizer to convert the model to the `.‍bin` and `.‍xml` Intermediate Representation (IR) files. The Inference Engine requires this model conversion so it can use the IR as input and achieve optimum performance on Intel hardware.
-This verification script builds the [Image Classification Sample Async](../../inference-engine/samples/classification_sample_async/README.md) application and run it with the `car.png` image in the demo directory. For a brief description of the Intermediate Representation, see Configuring the Model Optimizer. - -When the verification script completes, you will have the label and confidence for the top-10 categories: -![](../img/image_classification_script_output_win.png) - -This demo is complete. Leave the console open and continue to the next section to run the Inference Pipeline demo. - - -### Run the Inference Pipeline Verification Script - -To run the script, start the `demo_security_barrier_camera.bat` file while still in the console: -```sh -demo_security_barrier_camera.bat -``` - -This script downloads three pre-trained model IRs, builds the [Security Barrier Camera Demo](@ref omz_demos_security_barrier_camera_demo_README) application, and runs it with the downloaded models and the `car_1.bmp` image from the `demo` directory to show an inference pipeline. The verification script uses vehicle recognition in which vehicle attributes build on each other to narrow in on a specific attribute. - -First, an object is identified as a vehicle. This identification is used as input to the next model, which identifies specific vehicle attributes, including the license plate. Finally, the attributes identified as the license plate are used as input to the third model, which recognizes specific characters in the license plate. - -When the demo completes, you have two windows open: - - * A console window that displays information about the tasks performed by the demo - * An image viewer window that displays a resulting frame with detections rendered as bounding boxes, similar to the following: - - ![](../img/inference_pipeline_script_win.png) - -Close the image viewer window to end the demo. - -To learn more about the verification scripts, see `README.txt` in `C:\Program Files (x86)\Intel\openvino_2021\deployment_tools\demo`. - -For detailed description of the OpenVINO™ pre-trained object detection and object recognition models, see the [Overview of OpenVINO™ toolkit Pre-Trained Models](@ref omz_models_intel_index) page. - -In this section, you saw a preview of the Intel® Distribution of OpenVINO™ toolkit capabilities. - -Congratulations. You have completed all the required installation, configuration, and build steps to work with your trained models using CPU. +You have completed all required installation, configuration and build steps in this guide to use your CPU to work with your trained models. -If you want to use Intel® Processor graphics (GPU), Intel® Neural Compute Stick 2 or Intel® Vision Accelerator Design with Intel® Movidius™ VPUs, or add CMake* and Python* to your Windows* environment variables, read through the next section for additional steps. +If you want to use a GPU or VPU, or update your Windows* environment variables, read through the Optional Steps section: -If you want to continue and run the Image Classification Sample Application on one of the supported hardware device, see the [Run the Image Classification Sample Application](#run-the-image-classification-sample-application) section. +- Steps for Intel® Processor Graphics (GPU) +- Steps for Intel® Vision Accelerator Design with Intel® Movidius™ VPUs +- Add CMake* or Python* to your Windows* environment variables
+Or proceed to the Get Started to get started with running code samples and demo applications. ## Optional Steps -Use the optional steps below if you want to: -* Infer models on Intel® Processor Graphics -* Infer models on Intel® Vision Accelerator Design with Intel® Movidius™ VPUs -* Add CMake* or Python* to your Windows* environment variables. - ### Optional: Additional Installation Steps for Intel® Processor Graphics (GPU) > **NOTE**: These steps are required only if you want to use a GPU. -If your applications offload computation to Intel® Integrated Graphics, you must have the Intel Graphics Driver for Windows version 15.65 or higher. To see if you have this driver installed: +If your applications offload computation to **Intel® Integrated Graphics**, you must have the latest version of Intel Graphics Driver for Windows installed for your hardware. +[Download and install a higher version](http://downloadcenter.intel.com/product/80939/Graphics-Drivers). + +To check if you have this driver installed: 1. Type **device manager** in your **Search Windows** box. The **Device Manager** opens. @@ -326,14 +266,13 @@ If your applications offload computation to Intel® Integrated Graphics, you mus 3. Right-click the adapter name and select **Properties**. -4. Click the **Driver** tab to see the driver version. Make sure the version number is 15.65 or higher. +4. Click the **Driver** tab to see the driver version. ![](../img/DeviceDriverVersion.PNG) -5. If your device driver version is lower than 15.65, [download and install a higher version](http://downloadcenter.intel.com/product/80939/Graphics-Drivers). - -You are done updating your device driver and are ready to use your GPU. +> **NOTE**: To use the **Intel® Iris® Xe MAX Graphics**, see the [Drivers & Software](https://downloadcenter.intel.com/download/29993/Intel-Iris-Xe-MAX-Dedicated-Graphics-Drivers?product=80939) page for driver downloads and installation instructions. +You are done updating your device driver and are ready to use your GPU. Proceed to the Get Started to get started with running code samples and demo applications. ### Optional: Additional Installation Steps for the Intel® Vision Accelerator Design with Intel® Movidius™ VPUs @@ -354,22 +293,7 @@ See also: * After you've configurated your Intel® Vision Accelerator Design with Intel® Movidius™ VPUs, see [Intel® Movidius™ VPUs Programming Guide for Use with Intel® Distribution of OpenVINO™ toolkit](movidius-programming-guide.md) to learn how to distribute a model across all 8 VPUs to maximize performance. -After configuration is done, you are ready to run the verification scripts with the HDDL Plugin for your Intel® Vision Accelerator Design with Intel® Movidius™ VPUs. - -1. Open a command prompt window. - -2. Go to the Inference Engine demo directory: - ```sh - cd C:\Program Files (x86)\Intel\openvino_2021\deployment_tools\demo\ - ``` -3. Run the Image Classification verification script. If you have access to the Internet through the proxy server only, please make sure that it is configured in your environment. - ```sh - demo_squeezenet_download_convert_run.bat -d HDDL - ``` -4. Run the Inference Pipeline verification script: - ```sh - demo_security_barrier_camera.bat -d HDDL - ``` +After configuration is done, you are ready to Get Started with running code samples and demo applications. ### Optional: Update Your Windows Environment Variables @@ -396,55 +320,11 @@ Use these steps to update your Windows `PATH` if a command you execute returns a Your `PATH` environment variable is updated. -## Run the Image Classification Sample Application - -> **IMPORTANT**: This section requires that you have [Run the Verification Scripts to Verify Installation](#run-the-demos). This script builds the Image Classification sample application and downloads and converts the required Caffe* Squeezenet model to an IR. - -In this section you will run the Image Classification sample application, with the Caffe* Squeezenet1.1 model on three types of Intel® hardware: CPU, GPU and VPUs. - -Image Classification sample application binary file was automatically built and the FP16 model IR files are created when you [Ran the Image Classification Verification Script](#run-the-image-classification-verification-script). - -The Image Classification sample application binary file located in the `C:\Users\\Documents\Intel\OpenVINO\inference_engine_samples_build\intel64\Release\` directory. -The Caffe* Squeezenet model IR files (`.bin` and `.xml`) are located in the in the `C:\Users\\Documents\Intel\OpenVINO\openvino_models\ir\public\squeezenet1.1\FP16\` directory. - -> **NOTE**: If you installed the Intel® Distribution of OpenVINO™ toolkit to the non-default installation directory, replace `C:\Program Files (x86)\Intel` with the directory where you installed the software. - -To run the sample application: - -1. Set up environment variables: -```sh -cd C:\Program Files (x86)\Intel\openvino_2021\bin\setupvars.bat -``` -2. Go to the samples build directory: -```sh -cd C:\Users\\Documents\Intel\OpenVINO\inference_engine_samples_build\intel64\Release -``` -3. Run the sample executable with specifying the `car.png` file from the `demo` directory as an input image, the IR of your FP16 model and a plugin for a hardware device to perform inference on. -> **NOTE**: Running the sample application on hardware other than CPU requires performing [additional hardware configuration steps](#optional-steps). - - - For CPU: - ```sh - classification_sample_async.exe -i "C:\Program Files (x86)\Intel\openvino_2021\deployment_tools\demo\car.png" -m "C:\Users\\Documents\Intel\OpenVINO\openvino_models\ir\public\squeezenet1.1\FP16\squeezenet1.1.xml" -d CPU - ``` - - - For GPU: - ```sh - classification_sample_async.exe -i "C:\Program Files (x86)\Intel\openvino_2021\deployment_tools\demo\car.png" -m "C:\Users\\Documents\Intel\OpenVINO\openvino_models\ir\public\squeezenet1.1\FP16\squeezenet1.1.xml" -d GPU - ``` - - - For VPU (Intel® Neural Compute Stick 2): - ```sh - classification_sample_async.exe -i "C:\Program Files (x86)\Intel\openvino_2021\deployment_tools\demo\car.png" -m "C:\Users\\Documents\Intel\OpenVINO\openvino_models\ir\public\squeezenet1.1\FP16\squeezenet1.1.xml" -d MYRIAD - ``` - - - For VPU (Intel® Vision Accelerator Design with Intel® Movidius™ VPUs): - ```sh - classification_sample_async.exe -i "C:\Program Files (x86)\Intel\openvino_2021\deployment_tools\demo\car.png" -m "C:\Users\\Documents\Intel\OpenVINO\openvino_models\ir\public\squeezenet1.1\FP16\squeezenet1.1.xml" -d HDDL - ``` - -For information on Sample Applications, see the [Inference Engine Samples Overview](../IE_DG/Samples_Overview.md). +## Get Started -Congratulations, you have finished the installation of the Intel® Distribution of OpenVINO™ toolkit for Windows*. To learn more about how the Intel® Distribution of OpenVINO™ toolkit works, the Hello World tutorial and other resources are provided below. +Now you are ready to get started. To continue, see the following pages: +* [OpenVINO™ Toolkit Overview](../index.md) +* [Get Started Guide for Windows](../get_started/get_started_windows.md) to learn the basic OpenVINO™ toolkit workflow and run code samples and demo applications with pre-trained models on different inference devices. ## Uninstall the Intel® Distribution of OpenVINO™ Toolkit Follow the steps below to uninstall the Intel® Distribution of OpenVINO™ Toolkit from your system: @@ -469,14 +349,12 @@ To learn more about converting deep learning models, go to: ## Additional Resources - [Intel Distribution of OpenVINO Toolkit home page](https://software.intel.com/en-us/openvino-toolkit) -- [Intel Distribution of OpenVINO Toolkit documentation](https://software.intel.com/en-us/openvino-toolkit/documentation/featured) - [OpenVINO™ Release Notes](https://software.intel.com/en-us/articles/OpenVINO-RelNotes) -- [Introduction to Inference Engine](inference_engine_intro.md) +- [Introduction to Inference Engine](../IE_DG/inference_engine_intro.md) - [Inference Engine Developer Guide](../IE_DG/Deep_Learning_Inference_Engine_DevGuide.md) - [Model Optimizer Developer Guide](../MO_DG/Deep_Learning_Model_Optimizer_DevGuide.md) - [Inference Engine Samples Overview](../IE_DG/Samples_Overview.md) - [Overview of OpenVINO™ Toolkit Pre-Trained Models](@ref omz_models_intel_index) -- Intel Distribution of OpenVINO Toolkit Hello World Activities, see the [Inference Tutorials for Face Detection and Car Detection Exercises](https://github.com/intel-iot-devkit/inference-tutorials-generic/tree/openvino_toolkit_r3_0) - [Intel® Neural Compute Stick 2 Get Started](https://software.intel.com/en-us/neural-compute-stick/get-started) diff --git a/docs/install_guides/installing-openvino-yum.md b/docs/install_guides/installing-openvino-yum.md index 2dab2bcdf938ab..5fc6143ae5133d 100644 --- a/docs/install_guides/installing-openvino-yum.md +++ b/docs/install_guides/installing-openvino-yum.md @@ -106,6 +106,5 @@ sudo yum autoremove intel-openvino-runtime-centos-. - [Model Optimizer Developer Guide](../MO_DG/Deep_Learning_Model_Optimizer_DevGuide.md). - [Inference Engine Developer Guide](../IE_DG/Deep_Learning_Inference_Engine_DevGuide.md). - For more information on Sample Applications, see the [Inference Engine Samples Overview](../IE_DG/Samples_Overview.md). -- For information on Inference Engine Tutorials, see the [Inference Tutorials](https://github.com/intel-iot-devkit/inference-tutorials-generic). - For IoT Libraries & Code Samples see the [Intel® IoT Developer Kit](https://github.com/intel-iot-devkit). diff --git a/docs/ops/convolution/Convolution_1.md b/docs/ops/convolution/Convolution_1.md index e6c4ece350bc8a..ffdbbc508618a4 100644 --- a/docs/ops/convolution/Convolution_1.md +++ b/docs/ops/convolution/Convolution_1.md @@ -1,41 +1,41 @@ -## Convolution {#openvino_docs_ops_convolution_Convolution_1} +## Convolution {#openvino_docs_ops_convolution_Convolution_1} **Versioned name**: *Convolution-1* **Category**: Convolution -**Short description**: [Reference](http://caffe.berkeleyvision.org/tutorial/layers/convolution.html) +**Short description**: Computes 1D, 2D or 3D convolution (cross-correlation to be precise) of input and kernel tensors. -**Detailed description**: [Reference](http://cs231n.github.io/convolutional-networks/#conv) +**Detailed description**: Basic building block of convolution is a dot product of input patch and kernel. Whole operation consist of multiple such computations over multiple input patches and kernels. More thorough explanation can be found in [Convolutional Neural Networks](http://cs231n.github.io/convolutional-networks/#conv) and [Convolution operation](https://medium.com/apache-mxnet/convolutions-explained-with-ms-excel-465d6649831c). - -* For the convolutional layer, the number of output features in each dimension is calculated using the formula: +For the convolutional layer, the number of output features in each dimension is calculated using the formula: \f[ n_{out} = \left ( \frac{n_{in} + 2p - k}{s} \right ) + 1 -\f] -* The receptive field in each layer is calculated using the formulas: - * Jump in the output feature map: - \f[ - j_{out} = j_{in} * s - \f] - * Size of the receptive field of output feature: - \f[ - r_{out} = r_{in} + ( k - 1 ) * j_{in} - \f] - * Center position of the receptive field of the first output feature: - \f[ - start_{out} = start_{in} + ( \frac{k - 1}{2} - p ) * j_{in} - \f] - * Output is calculated using the following formula: - \f[ - out = \sum_{i = 0}^{n}w_{i}x_{i} + b - \f] - -**Attributes** +\f] + +The receptive field in each layer is calculated using the formulas: +* Jump in the output feature map: + \f[ + j_{out} = j_{in} * s + \f] +* Size of the receptive field of output feature: + \f[ + r_{out} = r_{in} + ( k - 1 ) * j_{in} + \f] +* Center position of the receptive field of the first output feature: + \f[ + start_{out} = start_{in} + ( \frac{k - 1}{2} - p ) * j_{in} + \f] +* Output is calculated using the following formula: + \f[ + out = \sum_{i = 0}^{n}w_{i}x_{i} + b + \f] + +**Attributes**: * *strides* - * **Description**: *strides* is a distance (in pixels) to slide the filter on the feature map over the (z, y, x) axes for 3D convolutions and (y, x) axes for 2D convolutions. For example, *strides* equal *4,2,1* means sliding the filter 4 pixel at a time over depth dimension, 2 over height dimension and 1 over width dimension. + * **Description**: *strides* is a distance (in pixels) to slide the filter on the feature map over the `(z, y, x)` axes for 3D convolutions and `(y, x)` axes for 2D convolutions. For example, *strides* equal `4,2,1` means sliding the filter 4 pixel at a time over depth dimension, 2 over height dimension and 1 over width dimension. * **Range of values**: integer values starting from 0 * **Type**: int[] * **Default value**: None @@ -43,7 +43,7 @@ n_{out} = \left ( \frac{n_{in} + 2p - k}{s} \right ) + 1 * *pads_begin* - * **Description**: *pads_begin* is a number of pixels to add to the beginning along each axis. For example, *pads_begin* equal *1,2* means adding 1 pixel to the top of the input and 2 to the left of the input. + * **Description**: *pads_begin* is a number of pixels to add to the beginning along each axis. For example, *pads_begin* equal `1,2` means adding 1 pixel to the top of the input and 2 to the left of the input. * **Range of values**: integer values starting from 0 * **Type**: int[] * **Default value**: None @@ -52,7 +52,7 @@ n_{out} = \left ( \frac{n_{in} + 2p - k}{s} \right ) + 1 * *pads_end* - * **Description**: *pads_end* is a number of pixels to add to the ending along each axis. For example, *pads_end* equal *1,2* means adding 1 pixel to the bottom of the input and 2 to the right of the input. + * **Description**: *pads_end* is a number of pixels to add to the ending along each axis. For example, *pads_end* equal `1,2` means adding 1 pixel to the bottom of the input and 2 to the right of the input. * **Range of values**: integer values starting from 0 * **Type**: int[] * **Default value**: None @@ -61,7 +61,7 @@ n_{out} = \left ( \frac{n_{in} + 2p - k}{s} \right ) + 1 * *dilations* - * **Description**: *dilations* denotes the distance in width and height between elements (weights) in the filter. For example, *dilation* equal *1,1* means that all the elements in the filter are neighbors, so it is the same as for the usual convolution. *dilation* equal *2,2* means that all the elements in the filter are matched not to adjacent elements in the input matrix, but to those that are adjacent with distance 1. + * **Description**: *dilations* denotes the distance in width and height between elements (weights) in the filter. For example, *dilation* equal `1,1` means that all the elements in the filter are neighbors, so it is the same as for the usual convolution. *dilation* equal `2,2` means that all the elements in the filter are matched not to adjacent elements in the input matrix, but to those that are adjacent with distance 1. * **Range of values**: integer value starting from 0 * **Type**: int[] * **Default value**: None @@ -70,24 +70,63 @@ n_{out} = \left ( \frac{n_{in} + 2p - k}{s} \right ) + 1 * *auto_pad* * **Description**: *auto_pad* how the padding is calculated. Possible values: - * *explicit*: use explicit padding values from `pads_begin` and `pads_end`. - * *same_upper (same_lower)* the input is padded to match the output size. In case of odd padding value an extra padding is added at the end (at the beginning). + * *explicit* - use explicit padding values from *pads_begin* and *pads_end*. + * *same_upper* - the input is padded to match the output size. In case of odd padding value an extra padding is added at the end. + * *same_lower* - the input is padded to match the output size. In case of odd padding value an extra padding is added at the beginning. * *valid* - do not use padding. * **Type**: string - * **Default value**: None + * **Default value**: explicit * **Required**: *no* * **Note**: *pads_begin* and *pads_end* attributes are ignored when *auto_pad* is specified. **Inputs**: -* **1**: Input tensor of rank 3 or greater. Required. -* **2**: Convolution kernel tensor. Weights layout is OIYX (OIZYX for 3D convolution), which means that *X* is changing the fastest, then *Y*, then *Input*, then *Output*. The size of the kernel is derived from the shape of this input and not specified by any attribute. Required. +* **1**: Input tensor of type *T* and rank 3, 4 or 5. Layout is NCZYX (number of batches, number of channels, spatial axes Z, Y, X). Required. +* **2**: Kernel tensor of type *T* and rank 3, 4 or 5. Layout is OIZYX (number of output channels, number of input channels, spatial axes Z, Y, X). Required. +* **Note**: Type of the convolution (1D, 2D or 3D) is derived from the rank of the input tensors and not specified by any attribute: + * 1D convolution (input tensors rank 3) means that there is only one spatial axis X + * 2D convolution (input tensors rank 4) means that there are two spatial axes Y, X + * 3D convolution (input tensors rank 5) means that there are three spatial axes Z, Y, X + +**Outputs**: + +* **1**: Output tensor of type *T* and rank 3, 4 or 5. Layout is NOZYX (number of batches, number of kernel output channels, spatial axes Z, Y, X). + +**Types**: + +* *T*: any floating point type. -**Example** +**Example**: +1D Convolution ```xml - + + + + 1 + 5 + 128 + + + 16 + 5 + 4 + + + + + 1 + 16 + 63 + + + +``` +2D Convolution +```xml + + 1 @@ -112,3 +151,35 @@ n_{out} = \left ( \frac{n_{in} + 2p - k}{s} \right ) + 1 ``` + +3D Convolution +```xml + + + + + 1 + 7 + 320 + 320 + 320 + + + 32 + 7 + 3 + 3 + 3 + + + + + 1 + 32 + 106 + 106 + 106 + + + +``` \ No newline at end of file diff --git a/docs/ops/convolution/GroupConvolution_1.md b/docs/ops/convolution/GroupConvolution_1.md index 3cd78c99c7ad35..33a34c6fa2ed4b 100644 --- a/docs/ops/convolution/GroupConvolution_1.md +++ b/docs/ops/convolution/GroupConvolution_1.md @@ -4,15 +4,16 @@ **Category**: Convolution -**Short description**: [Reference](http://caffe.berkeleyvision.org/tutorial/layers/convolution.html) +**Short description**: Computes 1D, 2D or 3D GroupConvolution of input and kernel tensors. -**Detailed description**: [Reference](http://cs231n.github.io/convolutional-networks/#conv) +**Detailed description**: Splits input into multiple groups, convolves them with group filters as in regular convolution and concatenates the results. More thorough explanation can be found in [ImageNet Classification with Deep Convolutional +Neural Networks](https://proceedings.neurips.cc/paper/2012/file/c399862d3b9d6b76c8436e924a68c45b-Paper.pdf) -**Attributes** +**Attributes**: The operation has the same attributes as a regular _Convolution_. Number of groups is derived from the kernel shape. * *strides* - * **Description**: *strides* is a distance (in pixels) to slide the filter on the feature map over the (z, y, x) axes for 3D convolutions and (y, x) axes for 2D convolutions. For example, *strides* equal *4,2,1* means sliding the filter 4 pixel at a time over depth dimension, 2 over height dimension and 1 over width dimension. + * **Description**: *strides* is a distance (in pixels) to slide the filter on the feature map over the `(z, y, x)` axes for 3D convolutions and `(y, x)` axes for 2D convolutions. For example, *strides* equal `4,2,1` means sliding the filter 4 pixel at a time over depth dimension, 2 over height dimension and 1 over width dimension. * **Range of values**: positive integer numbers * **Type**: int[] * **Default value**: None @@ -20,7 +21,7 @@ * *pads_begin* - * **Description**: *pads_begin* is a number of pixels to add to the beginning along each axis. For example, *pads_begin* equal *1,2* means adding 1 pixel to the top of the input and 2 to the left of the input. + * **Description**: *pads_begin* is a number of pixels to add to the beginning along each axis. For example, *pads_begin* equal `1,2` means adding 1 pixel to the top of the input and 2 to the left of the input. * **Range of values**: positive integer numbers * **Type**: int[] * **Default value**: None @@ -29,7 +30,7 @@ * *pads_end* - * **Description**: *pads_end* is a number of pixels to add to the ending along each axis. For example, *pads_end* equal *1,2* means adding 1 pixel to the bottom of the input and 2 to the right of the input. + * **Description**: *pads_end* is a number of pixels to add to the ending along each axis. For example, *pads_end* equal `1,2` means adding 1 pixel to the bottom of the input and 2 to the right of the input. * **Range of values**: positive integer numbers * **Type**: int[] * **Default value**: None @@ -38,7 +39,7 @@ * *dilations* - * **Description**: *dilations* denotes the distance in width and height between elements (weights) in the filter. For example, *dilation* equal *1,1* means that all the elements in the filter are neighbors, so it is the same as for the usual convolution. *dilation* equal *2,2* means that all the elements in the filter are matched not to adjacent elements in the input matrix, but to those that are adjacent with distance 1. + * **Description**: *dilations* denotes the distance in width and height between elements (weights) in the filter. For example, *dilation* equal `1,1` means that all the elements in the filter are neighbors, so it is the same as for the usual convolution. *dilation* equal `2,2` means that all the elements in the filter are matched not to adjacent elements in the input matrix, but to those that are adjacent with distance 1. * **Range of values**: positive integer numbers * **Type**: int[] * **Default value**: None @@ -47,50 +48,64 @@ * *auto_pad* * **Description**: *auto_pad* how the padding is calculated. Possible values: - * *explicit*: use explicit padding values from `pads_begin` and `pads_end`. - * *same_upper (same_lower)* the input is padded to match the output size. In case of odd padding value an extra padding is added at the end (at the beginning). + * *explicit* - use explicit padding values from *pads_begin* and *pads_end*. + * *same_upper* - the input is padded to match the output size. In case of odd padding value an extra padding is added at the end. + * *same_lower* - the input is padded to match the output size. In case of odd padding value an extra padding is added at the beginning. * *valid* - do not use padding. * **Type**: string - * **Default value**: None + * **Default value**: explicit * **Required**: *no* * **Note**: *pads_begin* and *pads_end* attributes are ignored when *auto_pad* is specified. **Inputs**: -* **1**: 4D or 5D input tensor. Required. - -* **2**: Convolution kernel tensor. Weights layout is GOIYX (GOIZYX for 3D convolution), which means that *X* is changing the fastest, then *Y*, then *Input*, *Output* and *Group*. The size of kernel and number of groups are derived from the shape of this input and aren't specified by any attribute. Required. - - -**Mathematical Formulation** - -* For the convolutional layer, the number of output features in each dimension is calculated using the formula: -\f[ -n_{out} = \left ( \frac{n_{in} + 2p - k}{s} \right ) + 1 -\f] -* The receptive field in each layer is calculated using the formulas: - * Jump in the output feature map: - \f[ - j_{out} = j_{in} * s - \f] - * Size of the receptive field of output feature: - \f[ - r_{out} = r_{in} + ( k - 1 ) * j_{in} - \f] - * Center position of the receptive field of the first output feature: - \f[ - start_{out} = start_{in} + ( \frac{k - 1}{2} - p ) * j_{in} - \f] - * Output is calculated using the following formula: - \f[ - out = \sum_{i = 0}^{n}w_{i}x_{i} + b - \f] - -**Example** +* **1**: Input tensor of type *T* and rank 3, 4 or 5. Layout is NCZYX (number of batches, number of channels, spatial axes Z, Y, X). Required. +* **2**: Convolution kernel tensor of type *T* and rank 4, 5 or 6. Layout is GOIZYX (number of groups, number of output channels, number of input channels, spatial axes Z, Y, X), + * **Note** Number of groups is derived from the shape of the kernel and not specified by any attribute. + * **Note**: Type of the convolution (1D, 2D or 3D) is derived from the rank of the input tensors and not specified by any attribute: + * 1D convolution (input tensors rank 3) means that there is only one spatial axis X + * 2D convolution (input tensors rank 4) means that there are two spatial axes Y, X + * 3D convolution (input tensors rank 5) means that there are three spatial axes Z, Y, X + +**Outputs**: + +* **1**: Output tensor of type *T* and rank 3, 4 or 5. Layout is NOZYX (number of batches, number of kernel output channels, spatial axes Z, Y, X). + +**Types**: + +* *T*: any floating point type. + +**Example**: +1D GroupConvolution +```xml + + + + + 1 + 12 + 224 + + + 4 + 1 + 3 + 5 + + + + + 1 + 4 + 224 + + +``` +2D GroupConvolution ```xml - + 1 @@ -115,3 +130,35 @@ n_{out} = \left ( \frac{n_{in} + 2p - k}{s} \right ) + 1 ``` + +3D GroupConvolution +```xml + + + + + 1 + 12 + 224 + 224 + 224 + + + 4 + 1 + 3 + 5 + 5 + 5 + + + + + 1 + 4 + 224 + 224 + 224 + + +``` \ No newline at end of file diff --git a/docs/ops/detection/DeformablePSROIPooling_1.md b/docs/ops/detection/DeformablePSROIPooling_1.md index 2adcfb82e2e5c2..43abd2d65f6596 100644 --- a/docs/ops/detection/DeformablePSROIPooling_1.md +++ b/docs/ops/detection/DeformablePSROIPooling_1.md @@ -90,7 +90,7 @@ The box coordinates are specified as five element tuples: *[batch_id, x_1, y_1, ```xml - + 1 diff --git a/docs/ops/detection/DetectionOutput_1.md b/docs/ops/detection/DetectionOutput_1.md index 6175a9668982bc..363ef6ae4ea57c 100644 --- a/docs/ops/detection/DetectionOutput_1.md +++ b/docs/ops/detection/DetectionOutput_1.md @@ -156,7 +156,7 @@ At each feature map cell, *DetectionOutput* predicts the offsets relative to the ```xml - + 1 diff --git a/docs/ops/detection/PriorBoxClustered_1.md b/docs/ops/detection/PriorBoxClustered_1.md index 5d798442a92b6b..6762fe5fe97e68 100644 --- a/docs/ops/detection/PriorBoxClustered_1.md +++ b/docs/ops/detection/PriorBoxClustered_1.md @@ -110,7 +110,7 @@ If *clip* is defined, the coordinates of prior boxes are recalculated with the f ```xml - + 2 diff --git a/docs/ops/detection/PriorBox_1.md b/docs/ops/detection/PriorBox_1.md index 99842bc05caac1..4a4aa35186bda3 100644 --- a/docs/ops/detection/PriorBox_1.md +++ b/docs/ops/detection/PriorBox_1.md @@ -160,7 +160,7 @@ ```xml - + 2 diff --git a/docs/ops/image/Interpolate_4.md b/docs/ops/image/Interpolate_4.md index 72bb435227381f..a2f78226307524 100644 --- a/docs/ops/image/Interpolate_4.md +++ b/docs/ops/image/Interpolate_4.md @@ -80,7 +80,7 @@ * *cube_coeff* -* **Description**: *cube_coeff* specifies the parameter *a* for cubic interpolation (see, e.g. [article](https://ieeexplore.ieee.org/document/1163711/)). *cube_coeff* is used only when `mode == cubic`. + * **Description**: *cube_coeff* specifies the parameter *a* for cubic interpolation (see, e.g. [article](https://ieeexplore.ieee.org/document/1163711/)). *cube_coeff* is used only when `mode == cubic`. * **Range of values**: floating point number * **Type**: any of supported floating point type * **Default value**: `-0.75` diff --git a/docs/ops/pooling/AvgPool_1.md b/docs/ops/pooling/AvgPool_1.md index b19cbca9ab7fdb..78792d77d11836 100644 --- a/docs/ops/pooling/AvgPool_1.md +++ b/docs/ops/pooling/AvgPool_1.md @@ -48,9 +48,9 @@ * **Default value**: None * **Required**: *yes* -* *exclude_pad* +* *exclude-pad* - * **Description**: *exclude_pad* is a type of pooling strategy for values in the padding area. For example, if *exclude_pad* is "true", then zero-values that came from padding are not included in averaging calculation. + * **Description**: *exclude-pad* is a type of pooling strategy for values in the padding area. For example, if *exclude-pad* is "true", then zero-values that came from padding are not included in averaging calculation. * **Range of values**: true or false * **Type**: boolean * **Default value**: None @@ -94,7 +94,7 @@ output_{j} = \frac{\sum_{i = 0}^{n}x_{i}}{n} ```xml - + 1 @@ -114,7 +114,7 @@ output_{j} = \frac{\sum_{i = 0}^{n}x_{i}}{n} - + 1 @@ -134,7 +134,7 @@ output_{j} = \frac{\sum_{i = 0}^{n}x_{i}}{n} - + 1 @@ -154,7 +154,7 @@ output_{j} = \frac{\sum_{i = 0}^{n}x_{i}}{n} - + 1 @@ -174,7 +174,7 @@ output_{j} = \frac{\sum_{i = 0}^{n}x_{i}}{n} - + 1 diff --git a/docs/ops/sequence/CTCGreedyDecoder_1.md b/docs/ops/sequence/CTCGreedyDecoder_1.md index 59c6bf267eea7b..7a56c7e57ad683 100644 --- a/docs/ops/sequence/CTCGreedyDecoder_1.md +++ b/docs/ops/sequence/CTCGreedyDecoder_1.md @@ -19,9 +19,9 @@ Sequences in the batch can have different length. The lengths of sequences are c **Attributes** -* *merge_repeated* +* *ctc_merge_repeated* - * **Description**: *merge_repeated* is a flag for merging repeated labels during the CTC calculation. + * **Description**: *ctc_merge_repeated* is a flag for merging repeated labels during the CTC calculation. * **Range of values**: true or false * **Type**: `boolean` * **Default value**: true @@ -41,6 +41,7 @@ Sequences in the batch can have different length. The lengths of sequences are c ```xml + 20 diff --git a/docs/ops/sequence/CTCLoss_4.md b/docs/ops/sequence/CTCLoss_4.md index 67def3a2250366..c38e0a293d23dc 100644 --- a/docs/ops/sequence/CTCLoss_4.md +++ b/docs/ops/sequence/CTCLoss_4.md @@ -41,7 +41,7 @@ p(S) = \prod_{t=1}^{L_i} p_{i,t,ct} 3. Finally, compute negative log of summed up probabilities of all found alignments: \f[ -CTCLoss = \minus \ln \sum_{S} p(S) +CTCLoss = - \ln \sum_{S} p(S) \f] **Note 1**: This calculation scheme does not provide steps for optimal implementation and primarily serves for better explanation. diff --git a/docs/ovsa/ovsa_diagram.png b/docs/ovsa/ovsa_diagram.png new file mode 100644 index 00000000000000..774de121e18d0b --- /dev/null +++ b/docs/ovsa/ovsa_diagram.png @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:2e7ed21b111f0438b9fad367c4db293c35882de05bc8bb3252a1ef5bc289ae2a +size 33369 diff --git a/docs/ovsa/ovsa_example.png b/docs/ovsa/ovsa_example.png new file mode 100644 index 00000000000000..fd44a7a4ff8b7b --- /dev/null +++ b/docs/ovsa/ovsa_example.png @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:356688e3fd7dd4ad6c591cda1d35d9ebd5c2b6f9787e6caa4c116717101669e5 +size 29847 diff --git a/docs/ovsa/ovsa_get_started.md b/docs/ovsa/ovsa_get_started.md new file mode 100644 index 00000000000000..f45d4bf299cff8 --- /dev/null +++ b/docs/ovsa/ovsa_get_started.md @@ -0,0 +1,798 @@ +# OpenVINO™ Security Add-on {#ovsa_get_started} + +This guide provides instructions for people who use the OpenVINO™ Security Add-on to create, distribute, and use models that are created with the OpenVINO™ toolkit: + +* **Model Developer**: The Model Developer interacts with the Independent Software Vendor to control the User access to models. This document shows you how to setup hardware and virtual machines to use the OpenVINO™ Security Add-on to define access control to your OpenVINO™ models and then provide the access controlled models to the users. +* **Independent Software Vendor**: Use this guide for instructions to use the OpenVINO™ Security Add-on to validate license for access controlled models that are provided to your customers (users). +* **User**: This document includes instructions for end users who need to access and run access controlled models through the OpenVINO™ Security Add-on. + +In this release, one person performs the role of both the Model Developer and the Independent Software Vendor. Therefore, this document provides instructions to configure one system for these two roles and one system for the User role. This document also provides a way for the same person to play the role of the Model Developer, Independent Software Vendor, and User to let you see how the OpenVINO™ Security Add-on functions from the User perspective. + + +## Overview + +The OpenVINO™ Security Add-on works with the [OpenVINO™ Model Server](@ref openvino_docs_ovms) on Intel® architecture. Together, the OpenVINO™ Security Add-on and the OpenVINO™ Model Server provide a way for Model Developers and Independent Software Vendors to use secure packaging and secure model execution to enable access control to the OpenVINO™ models, and for model Users to run inference within assigned limits. + +The OpenVINO™ Security Add-on consists of three components that run in Kernel-based Virtual Machines (KVMs). These components provide a way to run security-sensitive operations in an isolated environment. A brief description of the three components are as follows. Click each triangled line for more information about each. + +
+ OpenVINO™ Security Add-on Tool: As a Model Developer or Independent Software Vendor, you use the OpenVINO™ Security Add-on Tool(`ovsatool`) to generate a access controlled model and master license. + +- The Model Developer generates a access controlled model from the OpenVINO™ toolkit output. The access controlled model uses the model's Intermediate Representation (IR) files to create a access controlled output file archive that are distributed to Model Users. The Developer can also put the archive file in long-term storage or back it up without additional security. + +- The Model Developer uses the OpenVINO™ Security Add-on Tool(`ovsatool`) to generate and manage cryptographic keys and related collateral for the access controlled models. Cryptographic material is only available in a virtual machine (VM) environment. The OpenVINO™ Security Add-on key management system lets the Model Developer to get external Certificate Authorities to generate certificates to add to a key-store. + +- The Model Developer generates user-specific licenses in a JSON format file for the access controlled model. The Model Developer can define global or user-specific licenses and attach licensing policies to the licenses. For example, the Model Developer can add a time limit for a model or limit the number of times a user can run a model. + +
+ +
+ OpenVINO™ Security Add-on License Service: Use the OpenVINO™ Security Add-on License Service to verify user parameters. + +- The Independent Software Vendor hosts the OpenVINO™ Security Add-on License Service, which responds to license validation requests when a user attempts to load a access controlled model in a model server. The licenses are registered with the OpenVINO™ Security Add-on License Service. + +- When a user loads the model, the OpenVINO™ Security Add-on Runtime contacts the License Service to make sure the license is valid and within the parameters that the Model Developer defined with the OpenVINO™ Security Add-on Tool(`ovsatool`). The user must be able to reach the Independent Software Vendor's License Service over the Internet. + +
+ +
+ OpenVINO™ Security Add-on Runtime: Users install and use the OpenVINO™ Security Add-on Runtime on a virtual machine. + +Users host the OpenVINO™ Security Add-on Runtime component in a virtual machine. + +Externally from the OpenVINO™ Security Add-on, the User adds the access controlled model to the OpenVINO™ Model Server config file. The OpenVINO™ Model Server attempts to load the model in memory. At this time, the OpenVINO™ Security Add-on Runtime component validates the user's license for the access controlled model against information stored in the License Service provided by the Independent Software Vendor. + +After the license is successfully validated, the OpenVINO™ Model Server loads the model and services the inference requests. + +
+ +
+**Where the OpenVINO™ Security Add-on Fits into Model Development and Deployment** + +![Security Add-on Diagram](ovsa_diagram.png) + +## About the Installation +The Model Developer, Independent Software Vendor, and User each must prepare one physical hardware machine and one Kernel-based Virtual Machine (KVM). In addition, each person must prepare a Guest Virtual Machine (Guest VM) for each role that person plays. + +For example: +* If one person acts as both the Model Developer and as the Independent Software Vendor, that person must prepare two Guest VMs. Both Guest VMs can be on the same physical hardware (Host Machine) and under the same KVM on that Host Machine. +* If one person acts as all three roles, that person must prepare three Guest VMs. All three Guest VMs can be on the same Host Machine and under the same KVM on that Host Machine. + +**Purpose of Each Machine** + +| Machine | Purpose | +| ----------- | ----------- | +| Host Machine | Physical hardware on which the KVM and Guest VM share set up. | +| Kernel-based Virtual Machine (KVM) | The OpenVINO™ Security Add-on runs in this virtual machine because it provides an isolated environment for security sensitive operations. | +| Guest VM | The Model Developer uses the Guest VM to enable access control to the completed model.
The Independent Software Provider uses the Guest VM to host the License Service.
The User uses the Guest VM to contact the License Service and run the access controlled model. | + + +## Prerequisites + +**Hardware** +* Intel® Core™ or Xeon® processor
+ +**Operating system, firmware, and software** +* Ubuntu* Linux* 18.04 on the Host Machine.
+* TPM version 2.0-conformant Discrete Trusted Platform Module (dTPM) or Firmware Trusted Platform Module (fTPM) +* Secure boot is enabled.
+ +**Other** +* The Independent Software Vendor must have access to a Certificate Authority (CA) that implements the Online Certificate Status Protocol (OCSP), supporting Elliptic Curve Cryptography (ECC) certificates for deployment. +* The example in this document uses self-signed certificates. + +## How to Prepare a Host Machine + +This section is for the combined role of Model Developer and Independent Software Vendor, and the separate User role. + +### Step 1: Set up Packages on the Host Machine + +Begin this step on the Intel® Core™ or Xeon® processor machine that meets the prerequisites. + +> **NOTE**: As an alternative to manually following steps 1 - 11, you can run the script `install_host_deps.sh` in the `Scripts/reference directory` under the OpenVINO™ Security Add-on repository. The script stops with an error message if it identifies any issues. If the script halts due to an error, correct the issue that caused the error and restart the script. The script runs for several minutes and provides progress information. + +1. Test for Trusted Platform Module (TPM) support: + ```sh + dmesg | grep -i TPM + ``` + The output indicates TPM availability in the kernel boot logs. Look for presence of the following devices to indicate TPM support is available: + * `/dev/tpm0` + * `/dev/tpmrm0` + + If you do not see this information, your system does not meet the prerequisites to use the OpenVINO™ Security Add-on. +2. Make sure hardware virtualization support is enabled in the BIOS: + ```sh + kvm-ok + ``` + The output should show:
+ `INFO: /dev/kvm exists`
+ `KVM acceleration can be used` + + If your output is different, modify your BIOS settings to enable hardware virtualization. + + If the `kvm-ok` command is not present, install it: + ```sh + sudo apt install -y cpu-checker + ``` +3. Install the Kernel-based Virtual Machine (KVM) and QEMU packages. + ```sh + sudo apt install qemu qemu-kvm libvirt-bin bridge-utils virt-manager + ``` +4. Check the QEMU version: + ```sh + qemu-system-x86_64 --version + ``` + If the response indicates a QEMU version lower than 2.12.0 download, compile and install the latest QEMU version from [https://www.qemu.org/download](https://www.qemu.org/download). +5. Build and install the [`libtpm` package](https://github.com/stefanberger/libtpms/). +6. Build and install the [`swtpm` package](https://github.com/stefanberger/swtpm/). +7. Add the `swtpm` package to the `$PATH` environment variable. +8. Install the software tool [`tpm2-tss`]( https://github.com/tpm2-software/tpm2-tss/releases/download/2.4.4/tpm2-tss-2.4.4.tar.gz).
+ Installation information is at https://github.com/tpm2-software/tpm2-tss/blob/master/INSTALL.md +9. Install the software tool [`tpm2-abmrd`](https://github.com/tpm2-software/tpm2-abrmd/releases/download/2.3.3/tpm2-abrmd-2.3.3.tar.gz).
+ Installation information is at https://github.com/tpm2-software/tpm2-abrmd/blob/master/INSTALL.md +10. Install the [`tpm2-tools`](https://github.com/tpm2-software/tpm2-tools/releases/download/4.3.0/tpm2-tools-4.3.0.tar.gz).
+ Installation information is at https://github.com/tpm2-software/tpm2-tools/blob/master/INSTALL.md +11. Install the [Docker packages](https://docs.docker.com/engine/install/ubuntu/). + > **NOTE**: Regardless of whether you used the `install_host_deps.sh` script, complete step 12 to finish setting up the packages on the Host Machine. +12. If you are running behind a proxy, [set up a proxy for Docker](https://docs.docker.com/config/daemon/systemd/). + +The following are installed and ready to use: +* Kernel-based Virtual Machine (KVM) +* QEMU +* SW-TPM +* HW-TPM support +* Docker
+ +You're ready to configure the Host Machine for networking. + +### Step 2: Set up Networking on the Host Machine + +This step is for the combined Model Developer and Independent Software Vendor roles. If Model User VM is running on different physical host, repeat the following steps for that host also. + +In this step you prepare two network bridges: +* A global IP address that a KVM can access across the Internet. This is the address that the OpenVINO™ Security Add-on Run-time software on a user's machine uses to verify they have a valid license. +* A host-only local address to provide communication between the Guest VM and the QEMU host operating system. + +This example in this step uses the following names. Your configuration might use different names: +* `50-cloud-init.yaml` as an example configuration file name. +* `eno1` as an example network interface name. +* `br0` as an example bridge name. +* `virbr0` as an example bridge name. + +1. Open the network configuration file for editing. This file is in `/etc/netplan` with a name like `50-cloud-init.yaml` +2. Look for these lines in the file: + ```sh + network: + ethernets: + eno1: + dhcp4: true + dhcp-identifier: mac + version: 2 + ``` +3. Change the existing lines and add the `br0` network bridge. These changes enable external network access: + ```sh + network: + ethernets: + eno1: + dhcp4: false + bridges: + br0: + interfaces: [eno1] + dhcp4: yes + dhcp-identifier: mac + version: 2 + ``` +4. Save and close the network configuration file. +5. Run two commands to activate the updated network configuration file. If you use ssh, you might lose network connectivity when issuing these commands. If so, reconnect to the network. +```sh +sudo netplan generate +``` +```sh +sudo netplan apply +``` + A bridge is created and an IP address is assigned to the new bridge. +6. Verify the new bridge: + ```sh + ip a | grep br0 + ``` + The output looks similar to this and shows valid IP addresses: + ```sh + 4: br0:
mtu 1500 qdisc noqueue state UP group default qlen 1000
inet 123.123.123.123/ brd 321.321.321.321 scope global dynamic br0 + ``` +7. Create a script named `br0-qemu-ifup` to bring up the `br0` interface. Add the following script contents: + ```sh + #!/bin/sh + nic=$1 + if [ -f /etc/default/qemu-kvm ]; then + . /etc/default/qemu-kvm + fi + switch=br0 + ifconfig $nic 0.0.0.0 up + brctl addif ${switch} $nic + ``` +8. Create a script named `br0-qemu-ifdown` to bring down the `br0` interface. Add the following script contents: + ```sh + #!/bin/sh + nic=$1 + if [ -f /etc/default/qemu-kvm ]; then + . /etc/default/qemu-kvm + fi + switch=br0 + brctl delif $switch $nic + ifconfig $nic 0.0.0.0 down + ``` +9. Create a script named `virbr0-qemu-ifup` to bring up the `virbr0` interface. Add the following script contents: + ```sh + #!/bin/sh + nic=$1 + if [ -f /etc/default/qemu-kvm ]; then + . /etc/default/qemu-kvm + fi + switch=virbr0 + ifconfig $nic 0.0.0.0 up + brctl addif ${switch} $nic + ``` +10. Create a script named `virbr0-qemu-ifdown` to bring down the `virbr0` interface. Add the following script contents: + ```sh + #!/bin/sh + nic=$1 + if [ -f /etc/default/qemu-kvm ]; then + . /etc/default/qemu-kvm + fi + switch=virbr0 + brctl delif $switch $nic + ifconfig $nic 0.0.0.0 down + ``` + +See the QEMU documentation for more information about the QEMU network configuration. + +Networking is set up on the Host Machine. Continue to the Step 3 to prepare a Guest VM for the combined role of Model Developer and Independent Software Vendor. + + +### Step 3: Set Up one Guest VM for the combined roles of Model Developer and Independent Software Vendor + +For each separate role you play, you must prepare a virtual machine, called a Guest VM. Because in this release, the Model Developer and Independent Software Vendor roles are combined, these instructions guide you to set up one Guest VM, named `ovsa_isv`. + +Begin these steps on the Host Machine. + +As an option, you can use `virsh` and the virtual machine manager to create and bring up a Guest VM. See the `libvirtd` documentation for instructions if you'd like to do this. + +1. Download the [Ubuntu 18.04 server ISO image](https://releases.ubuntu.com/18.04/ubuntu-18.04.5-live-server-amd64.iso) + +2. Create an empty virtual disk image to serve as the Guest VM for your role as Model Developer and Independent Software Vendor: + ```sh + sudo qemu-img create -f qcow2 /ovsa_isv_dev_vm_disk.qcow2 20G + ``` +3. Install Ubuntu 18.04 on the Guest VM. Name the Guest VM `ovsa_isv`: + ```sh + sudo qemu-system-x86_64 -m 8192 -enable-kvm \ + -cpu host \ + -drive if=virtio,file=/ovsa_isv_dev_vm_disk.qcow2,cache=none \ + -cdrom /ubuntu-18.04.5-live-server-amd64.iso \ + -device e1000,netdev=hostnet1,mac=52:54:00:d1:66:5f \ + -netdev tap,id=hostnet1,script=/virbr0-qemu-ifup,downscript=/virbr0-qemu-ifdown \ + -vnc :1 + ``` +4. Connect a VNC client with `:1` +5. Follow the prompts on the screen to finish installing the Guest VM. Name the VM as `ovsa_isv_dev` +6. Shut down the Guest VM. +7. Restart the Guest VM after removing the option of cdrom image: + ```sh + sudo qemu-system-x86_64 -m 8192 -enable-kvm \ + -cpu host \ + -drive if=virtio,file=/ovsa_isv_dev_vm_disk.qcow2,cache=none \ + -device e1000,netdev=hostnet1,mac=52:54:00:d1:66:5f \ + -netdev tap,id=hostnet1,script=/virbr0-qemu-ifup,downscript=/virbr0-qemu-ifdown \ + -vnc :1 + ``` +8. Choose ONE of these options to install additional required software: + * **Option 1**: Use a script to install additional software + 1. Copy the script `install_guest_deps.sh` from the `Scripts/reference directory` of the OVSA repository to the Guest VM + 2. Run the script. + 3. Shut down the Guest VM.
+ * **Option 2** : Manually install additional software + 1. Install the software tool [`tpm2-tss`](https://github.com/tpm2-software/tpm2-tss/releases/download/2.4.4/tpm2-tss-2.4.4.tar.gz). + Installation information is at https://github.com/tpm2-software/tpm2-tss/blob/master/INSTALL.md + 2. Install the software tool [`tpm2-abmrd`](https://github.com/tpm2-software/tpm2-abrmd/releases/download/2.3.3/tpm2-abrmd-2.3.3.tar.gz). + Installation information is at https://github.com/tpm2-software/tpm2-abrmd/blob/master/INSTALL.md + 3. Install the [`tpm2-tools`](https://github.com/tpm2-software/tpm2-tools/releases/download/4.3.0/tpm2-tools-4.3.0.tar.gz). + Installation information is at https://github.com/tpm2-software/tpm2-tools/blob/master/INSTALL.md + 4. Install the [Docker packages](https://docs.docker.com/engine/install/ubuntu/) + 5. Shut down the Guest VM.
+9. On the host, create a directory to support the virtual TPM device. Only `root` should have read/write permission to this directory: + ```sh + sudo mkdir -p /var/OVSA/ + sudo mkdir /var/OVSA/vtpm + sudo mkdir /var/OVSA/vtpm/vtpm_isv_dev + ``` + **NOTE**: For steps 10 and 11, you can copy and edit the script named `start_ovsa_isv_dev_vm.sh` in the `Scripts/reference` directory in the OpenVINO™ Security Add-on repository instead of manually running the commands. If using the script, select the script with `isv` in the file name regardless of whether you are playing the role of the Model Developer or the role of the Independent Software Vendor. Edit the script to point to the correct directory locations and increment `vnc` for each Guest VM. +10. Start the vTPM on Host: + ```sh + swtpm socket --tpmstate dir=/var/OVSA/vtpm/vtpm_isv_dev \ + --tpm2 \ + --ctrl type=unixio,path=/var/OVSA/vtpm/vtpm_isv_dev/swtpm-sock \ + --log level=20 + ``` + +11. Start the Guest VM: + ```sh + sudo qemu-system-x86_64 \ + -cpu host \ + -enable-kvm \ + -m 8192 \ + -smp 8,sockets=1,cores=8,threads=1 \ + -device e1000,netdev=hostnet0,mac=52:54:00:d1:66:6f \ + -netdev tap,id=hostnet0,script=/br0-qemu-ifup,downscript=/br0-qemu-ifdown \ + -device e1000,netdev=hostnet1,mac=52:54:00:d1:66:5f \ + -netdev tap,id=hostnet1,script=/virbr0-qemu-ifup,downscript=/virbr0-qemu-ifdown \ + -drive if=virtio,file=/ovsa_isv_dev_vm_disk.qcow2,cache=none \ + -chardev socket,id=chrtpm,path=/var/OVSA/vtpm/vtpm_isv_dev/swtpm-sock \ + -tpmdev emulator,id=tpm0,chardev=chrtpm \ + -device tpm-tis,tpmdev=tpm0 \ + -vnc :1 + ``` + Use the QEMU runtime options in the command to change the memory amount or CPU assigned to this Guest VM. + +12. Use a VNC client to log on to the Guest VM at `:1` + +### Step 4: Set Up one Guest VM for the User role + +1. Choose ONE of these options to create a Guest VM for the User role:
+ **Option 1: Copy and Rename the `ovsa_isv_dev_vm_disk.qcow2` disk image** + 1. Copy the `ovsa_isv_dev_vm_disk.qcow2` disk image to a new image named `ovsa_runtime_vm_disk.qcow2`. You created the `ovsa_isv_dev_vm_disk.qcow2` disk image in Step 3. + 2. Boot the new image. + 3. Change the hostname from `ovsa_isv_dev` to `ovsa_runtime`. + ```sh + sudo hostnamectl set-hostname ovsa_runtime + ``` + 4. Replace all instances of `ovsa_isv_dev` to `ovsa_runtime` in the new image. + ```sh + sudo nano /etc/hosts + ``` + 5. Change the `/etc/machine-id`: + ```sh + sudo rm /etc/machine-id + systemd-machine-id-setup + ``` + 6. Shut down the Guest VM.

+ + **Option 2: Manually create the Guest VM** + 1. Create an empty virtual disk image: + ```sh + sudo qemu-img create -f qcow2 /ovsa_ovsa_runtime_vm_disk.qcow2 20G + ``` + 2. Install Ubuntu 18.04 on the Guest VM. Name the Guest VM `ovsa_runtime`: + ```sh + sudo qemu-system-x86_64 -m 8192 -enable-kvm \ + -cpu host \ + -drive if=virtio,file=/ovsa_ovsa_runtime_vm_disk.qcow2,cache=none \ + -cdrom /ubuntu-18.04.5-live-server-amd64.iso \ + -device e1000,netdev=hostnet1,mac=52:54:00:d1:66:5f \ + -netdev tap,id=hostnet1,script=/virbr0-qemu-ifup, downscript=/virbr0-qemu-ifdown \ + -vnc :2 + ``` + 3. Connect a VNC client with `:2`. + 4. Follow the prompts on the screen to finish installing the Guest VM. Name the Guest VM `ovsa_runtime`. + 5. Shut down the Guest VM. + 6. Restart the Guest VM: + ```sh + sudo qemu-system-x86_64 -m 8192 -enable-kvm \ + -cpu host \ + -drive if=virtio,file=/ovsa_ovsa_runtime_vm_disk.qcow2,cache=none \ + -device e1000,netdev=hostnet1,mac=52:54:00:d1:66:5f \ + -netdev tap,id=hostnet1,script=/virbr0-qemu-ifup, downscript=/virbr0-qemu-ifdown \ + -vnc :2 + ``` + 7. Choose ONE of these options to install additional required software: + + **Option 1: Use a script to install additional software** + 1. Copy the script `install_guest_deps.sh` from the `Scripts/reference` directory of the OVSA repository to the Guest VM + 2. Run the script. + 3. Shut down the Guest VM.

+ + **Option 2: Manually install additional software** + 1. Install the software tool [`tpm2-tss`](https://github.com/tpm2-software/tpm2-tss/releases/download/2.4.4/tpm2-tss-2.4.4.tar.gz)
+ Installation information is at https://github.com/tpm2-software/tpm2-tss/blob/master/INSTALL.md

+ 2. Install the software tool [`tpm2-abmrd`](https://github.com/tpm2-software/tpm2-abrmd/releases/download/2.3.3/tpm2-abrmd-2.3.3.tar.gz)
+ Installation information is at https://github.com/tpm2-software/tpm2-abrmd/blob/master/INSTALL.md

+ 3. Install the [`tpm2-tools`](https://github.com/tpm2-software/tpm2-tools/releases/download/4.3.0/tpm2-tools-4.3.0.tar.gz)
+ Installation information is at https://github.com/tpm2-software/tpm2-tools/blob/master/INSTALL.md

+ 4. Install the [Docker packages](https://docs.docker.com/engine/install/ubuntu/) + 5. Shut down the Guest VM.

+ +2. Create a directory to support the virtual TPM device. Only `root` should have read/write permission to this directory: + ```sh + sudo mkdir /var/OVSA/vtpm/vtpm_runtime + ``` + **NOTE**: For steps 3 and 4, you can copy and edit the script named `start_ovsa_runtime_vm.sh` in the scripts directory in the OpenVINO™ Security Add-on repository instead of manually running the commands. Edit the script to point to the correct directory locations and increment `vnc` for each Guest VM. This means that if you are creating a third Guest VM on the same Host Machine, change `-vnc :2` to `-vnc :3` +3. Start the vTPM: + ```sh + swtpm socket --tpmstate dir=/var/OVSA/vtpm/vtpm_runtime \ + --tpm2 \ + --ctrl type=unixio,path=/var/OVSA/vtpm/vtpm_runtime/swtpm-sock \ + --log level=20 + ``` +4. Start the Guest VM in a new terminal. To do so, either copy and edit the script named `start_ovsa_runtime_vm.sh` in the scripts directory in the OpenVINO™ Security Add-on repository or manually run the command: + ```sh + sudo qemu-system-x86_64 \ + -cpu host \ + -enable-kvm \ + -m 8192 \ + -smp 8,sockets=1,cores=8,threads=1 \ + -device e1000,netdev=hostnet2,mac=52:54:00:d1:67:6f \ + -netdev tap,id=hostnet2,script=/br0-qemu-ifup,downscript=/br0-qemu-ifdown \ + -device e1000,netdev=hostnet3,mac=52:54:00:d1:67:5f \ + -netdev tap,id=hostnet3,script=/virbr0-qemu-ifup,downscript=/virbr0-qemu-ifdown \ + -drive if=virtio,file=/ovsa_runtime_vm_disk.qcow2,cache=none \ + -chardev socket,id=chrtpm,path=/var/OVSA/vtpm/vtpm_runtime/swtpm-sock \ + -tpmdev emulator,id=tpm0,chardev=chrtpm \ + -device tpm-tis,tpmdev=tpm0 \ + -vnc :2 + ``` + Use the QEMU runtime options in the command to change the memory amount or CPU assigned to this Guest VM. +5. Use a VNC client to log on to the Guest VM at `:` where `` corresponds to the vnc number in the `start_ovsa_isv_vm.sh` or in step 8. + +## How to Build and Install the OpenVINO™ Security Add-on Software + +Follow the below steps to build and Install OpenVINO™ Security Add-on on host and different VMs. + +### Step 1: Build the OpenVINO™ Model Server image +Building OpenVINO™ Security Add-on depends on OpenVINO™ Model Server docker containers. Download and build OpenVINO™ Model Server first on the host. + +1. Download the [OpenVINO™ Model Server software](https://github.com/openvinotoolkit/model_server) +2. Build the [OpenVINO™ Model Server Docker images](https://github.com/openvinotoolkit/model_server/blob/main/docs/docker_container.md) + ```sh + git clone https://github.com/openvinotoolkit/model_server.git + cd model_server + make docker_build + ``` +### Step 2: Build the software required for all roles + +This step is for the combined role of Model Developer and Independent Software Vendor, and the User + +1. Download the [OpenVINO™ Security Add-on](https://github.com/openvinotoolkit/security_addon) + +2. Go to the top-level OpenVINO™ Security Add-on source directory. + ```sh + cd security_addon + ``` +3. Build the OpenVINO™ Security Add-on: + ```sh + make clean all + sudo make package + ``` + The following packages are created under the `release_files` directory: + - `ovsa-kvm-host.tar.gz`: Host Machine file + - `ovsa-developer.tar.gz`: For the Model Developer and the Independent Software Developer + - `ovsa-model-hosting.tar.gz`: For the User + +### Step 3: Install the host software +This step is for the combined role of Model Developer and Independent Software Vendor, and the User. + +1. Go to the `release_files` directory: + ```sh + cd release_files + ``` +2. Set up the path: + ```sh + export OVSA_RELEASE_PATH=$PWD + ``` +3. Install the OpenVINO™ Security Add-on Software on the Host Machine: + ```sh + cd $OVSA_RELEASE_PATH + tar xvfz ovsa-kvm-host.tar.gz + cd ovsa-kvm-host + ./install.sh + ``` + +If you are using more than one Host Machine repeat Step 3 on each. + +### Step 4: Set up packages on the Guest VM +This step is for the combined role of Model Developer and Independent Software Vendor. References to the Guest VM are to `ovsa_isv_dev`. + +1. Log on to the Guest VM. +2. Create the OpenVINO™ Security Add-on directory in the home directory + ```sh + mkdir OVSA + ``` +3. Go to the Host Machine, outside of the Guest VM. +4. Copy `ovsa-developer.tar.gz` from `release_files` to the Guest VM: + ```sh + cd $OVSA_RELEASE_PATH + scp ovsa-developer.tar.gz username@://OVSA + ``` +5. Go to the Guest VM. +6. Install the software to the Guest VM: + ```sh + cd OVSA + tar xvfz ovsa-developer.tar.gz + cd ovsa-developer + sudo -s + ./install.sh + ``` +7. Create a directory named `artefacts`. This directory will hold artefacts required to create licenses: + ```sh + cd //OVSA + mkdir artefacts + cd artefacts + ``` +8. Start the license server on a separate terminal. + ```sh + sudo -s + source /opt/ovsa/scripts/setupvars.sh + cd /opt/ovsa/bin + ./license_server + ``` + +### Step 5: Install the OpenVINO™ Security Add-on Model Hosting Component + +This step is for the User. References to the Guest VM are to `ovsa_runtime`. + +The Model Hosting components install the OpenVINO™ Security Add-on Runtime Docker container based on OpenVINO™ Model Server NGINX Docker to host a access controlled model. + +1. Log on to the Guest VM as ``. +2. Create the OpenVINO™ Security Add-on directory in the home directory + ```sh + mkdir OVSA + ``` +3. While on the Host Machine copy the ovsa-model-hosting.tar.gz from release_files to the Guest VM: + ```sh + cd $OVSA_RELEASE_PATH + scp ovsa-model-hosting.tar.gz username@://OVSA + ``` +4. Install the software to the Guest VM: + ```sh + cd OVSA + tar xvfz ovsa-model-hosting.tar.gz + cd ovsa-model-hosting + sudo -s + ./install.sh + ``` +5. Create a directory named `artefacts`: + ```sh + cd //OVSA + mkdir artefacts + cd artefacts + ``` + +## How to Use the OpenVINO™ Security Add-on + +This section requires interactions between the Model Developer/Independent Software vendor and the User. All roles must complete all applicable set up steps and installation steps before beginning this section. + +This document uses the [face-detection-retail-0004](@ref omz_models_intel_face_detection_retail_0004_description_face_detection_retail_0004) model as an example. + +The following figure describes the interactions between the Model Developer, Independent Software Vendor, and User. + +**Remember**: The Model Developer/Independent Software Vendor and User roles are related to virtual machine use and one person might fill the tasks required by multiple roles. In this document the tasks of Model Developer and Independent Software Vendor are combined and use the Guest VM named `ovsa_isv`. It is possible to have all roles set up on the same Host Machine. + +![OpenVINO™ Security Add-on Example Diagram](ovsa_example.png) + +### Model Developer Instructions + +The Model Developer creates model, defines access control and creates the user license. References to the Guest VM are to `ovsa_isv_dev`. After the model is created, access control enabled, and the license is ready, the Model Developer provides the license details to the Independent Software Vendor before sharing to the Model User. + +#### Step 1: Create a key store and add a certificate to it + +1. Set up a path to the artefacts directory: + ```sh + sudo -s + cd //OVSA/artefacts + export OVSA_RUNTIME_ARTEFACTS=$PWD + source /opt/ovsa/scripts/setupvars.sh + ``` +2. Create files to request a certificate:
+ This example uses a self-signed certificate for demonstration purposes. In a production environment, use CSR files to request for a CA-signed certificate. + ```sh + cd $OVSA_DEV_ARTEFACTS + /opt/ovsa/bin/ovsatool keygen -storekey -t ECDSA -n Intel -k isv_keystore -r isv_keystore.csr -e "/C=IN/CN=localhost" + ``` + Two files are created: + - `isv_keystore.csr`- A Certificate Signing Request (CSR) + - `isv_keystore.csr.crt` - A self-signed certificate + + In a production environment, send `isv_keystore.csr` to a CA to request a CA-signed certificate. + +3. Add the certificate to the key store + ```sh + /opt/ovsa/bin/ovsatool keygen -storecert -c isv_keystore.csr.crt -k isv_keystore + ``` + +#### Step 2: Create the model + +This example uses `curl` to download the `face-detection-retail-004` model from the OpenVINO Model Zoo. If you are behind a firewall, check and set your proxy settings. + +1. Log on to the Guest VM. + +2. Download a model from the Model Zoo: + ```sh + cd $OVSA_DEV_ARTEFACTS + curl --create-dirs https://download.01.org/opencv/2021/openvinotoolkit/2021.1/open_model_zoo/models_bin/1/face-detection-retail-0004/FP32/face-detection-retail-0004.xml https:// download.01.org/opencv/2021/openvinotoolkit/2021.1/open_model_zoo/models_bin/1/face-detection-retail-0004/FP32/face-detection-retail-0004.bin -o model/face-detection-retail-0004.xml -o model/face-detection-retail-0004.bin + ``` + The model is downloaded to the `OVSA_DEV_ARTEFACTS/model` directory. + +#### Step 3: Define access control for the model and create a master license for it + +1. Go to the `artefacts` directory: + ```sh + cd $OVSA_DEV_ARTEFACTS + ``` +2. Run the `uuidgen` command: + ```sh + uuidgen + ``` +3. Define and enable the model access control and master license: + ```sh + /opt/ovsa/bin/ovsatool protect -i model/face-detection-retail-0004.xml model/face-detection-retail-0004.bin -n "face detection" -d "face detection retail" -v 0004 -p face_detection_model.dat -m face_detection_model.masterlic -k isv_keystore -g + ``` +The Intermediate Representation files for the `face-detection-retail-0004` model are encrypted as `face_detection_model.dat` and a master license is generated as `face_detection_model.masterlic`. + +#### Step 4: Create a Runtime Reference TCB + +Use the runtime reference TCB to create a customer license for the access controlled model and the specific runtime. + +Generate the reference TCB for the runtime +```sh +cd $OVSA_DEV_ARTEFACTS +source /opt/ovsa/scripts/setupvars.sh + /opt/ovsa/bin/ovsaruntime gen-tcb-signature -n "Face Detect @ Runtime VM" -v "1.0" -f face_detect_runtime_vm.tcb -k isv_keystore +``` + +#### Step 5: Publish the access controlled Model and Runtime Reference TCB +The access controlled model is ready to be shared with the User and the reference TCB is ready to perform license checks. + +#### Step 6: Receive a User Request +1. Obtain artefacts from the User who needs access to a access controlled model: + * Customer certificate from the customer's key store. + * Other information that apply to your licensing practices, such as the length of time the user needs access to the model + +2. Create a customer license configuration + ```sh + cd $OVSA_DEV_ARTEFACTS + /opt/ovsa/bin/ovsatool licgen -t TimeLimit -l30 -n "Time Limit License Config" -v 1.0 -u ":" -k isv_keystore -o 30daylicense.config + ``` +3. Create the customer license + ```sh + cd $OVSA_DEV_ARTEFACTS + /opt/ovsa/bin/ovsatool sale -m face_detection_model.masterlic -k isv_keystore -l 30daylicense.config -t face_detect_runtime_vm.tcb -p custkeystore.csr.crt -c face_detection_model.lic + ``` + +4. Update the license server database with the license. + ```sh + cd /opt/ovsa/DB + python3 ovsa_store_customer_lic_cert_db.py ovsa.db $OVSA_DEV_ARTEFACTS/face_detection_model.lic $OVSA_DEV_ARTEFACTS/custkeystore.csr.crt + ``` + +5. Provide these files to the User: + * `face_detection_model.dat` + * `face_detection_model.lic` + +### User Instructions +References to the Guest VM are to `ovsa_rumtime`. + +#### Step 1: Add a CA-Signed Certificate to a Key Store + +1. Set up a path to the artefacts directory: + ```sh + sudo -s + cd //OVSA/artefacts + export OVSA_RUNTIME_ARTEFACTS=$PWD + source /opt/ovsa/scripts/setupvars.sh + ``` +2. Generate a Customer key store file: + ```sh + cd $OVSA_RUNTIME_ARTEFACTS + /opt/ovsa/bin/ovsatool keygen -storekey -t ECDSA -n Intel -k custkeystore -r custkeystore.csr -e "/C=IN/CN=localhost" + ``` + Two files are created: + * `custkeystore.csr` - A Certificate Signing Request (CSR) + * `custkeystore.csr.crt` - A self-signed certificate + +3. Send `custkeystore.csr` to the CA to request a CA-signed certificate. + +4. Add the certificate to the key store: + ```sh + /opt/ovsa/bin/ovsatool keygen -storecert -c custkeystore.csr.crt -k custkeystore + ``` + +#### Step 2: Request an access controlled Model from the Model Developer +This example uses scp to share data between the ovsa_runtime and ovsa_dev Guest VMs on the same Host Machine. + +1. Communicate your need for a model to the Model Developer. The Developer will ask you to provide the certificate from your key store and other information. This example uses the length of time the model needs to be available. +2. Generate an artefact file to provide to the Developer: + ```sh + cd $OVSA_RUNTIME_ARTEFACTS + scp custkeystore.csr.crt username@://OVSA/artefacts + ``` +#### Step 3: Receive and load the access controlled model into the OpenVINO™ Model Server +1. Receive the model as files named + * `face_detection_model.dat` + * `face_detection_model.lic` +2. Prepare the environment: + ```sh + cd $OVSA_RUNTIME_ARTEFACTS/.. + cp /opt/ovsa/example_runtime ovms -r + cd ovms + mkdir -vp model/fd/1 + ``` + The `$OVSA_RUNTIME_ARTEFACTS/../ovms` directory contains scripts and a sample configuration JSON file to start the model server. +3. Copy the artefacts from the Model Developer: + ```sh + cd $OVSA_RUNTIME_ARTEFACTS/../ovms + cp $OVSA_RUNTIME_ARTEFACTS/face_detection_model.dat model/fd/1/. + cp $OVSA_RUNTIME_ARTEFACTS/face_detection_model.lic model/fd/1/. + cp $OVSA_RUNTIME_ARTEFACTS/custkeystore model/fd/1/. + ``` +4. Rename and edit `sample.json` to include the names of the access controlled model artefacts you received from the Model Developer. The file looks like this: + ```sh + { + "custom_loader_config_list":[ + { + "config":{ + "loader_name":"ovsa", + "library_path": "/ovsa-runtime/lib/libovsaruntime.so" + } + } + ], + "model_config_list":[ + { + "config":{ + "name":"protected-model", + "base_path":"/sampleloader/model/fd", + "custom_loader_options": {"loader_name": "ovsa", "keystore": "custkeystore", "protected_file": "face_detection_model"} + } + } + ] + } + ``` +#### Step 4: Start the NGINX Model Server +The NGINX Model Server publishes the access controlled model. + ```sh + ./start_secure_ovsa_model_server.sh + ``` +For information about the NGINX interface, see https://github.com/openvinotoolkit/model_server/blob/main/extras/nginx-mtls-auth/README.md + +#### Step 5: Prepare to run Inference + +1. Log on to the Guest VM from another terminal. + +2. Install the Python dependencies for your set up. For example: + ```sh + sudo apt install pip3 + pip3 install cmake + pip3 install scikit-build + pip3 install opencv-python + pip3 install futures==3.1.1 + pip3 install tensorflow-serving-api==1.14.0 + ``` +3. Copy the `face_detection.py` from the example_client in `/opt/ovsa/example_client` + ```sh + cd /home/intel/OVSA/ovms + cp /opt/ovsa/example_client/* . + ``` +4. Copy the sample images for inferencing. An image directory is created that includes a sample image for inferencing. + ```sh + curl --create-dirs https://raw.githubusercontent.com/openvinotoolkit/model_server/master/example_client/images/people/people1.jpeg -o images/people1.jpeg + ``` +#### Step 6: Run Inference + +Run the `face_detection.py` script: +```sh +python3 face_detection.py --grpc_port 3335 --batch_size 1 --width 300 --height 300 --input_images_dir images --output_dir results --tls --server_cert server.pem --client_cert client.pem --client_key client.key --model_name protected-model +``` + +## Summary +You have completed these tasks: +- Set up one or more computers (Host Machines) with one KVM per machine and one or more virtual machines (Guest VMs) on the Host Machines +- Installed the OpenVINO™ Security Add-on +- Used the OpenVINO™ Model Server to work with OpenVINO™ Security Add-on +- As a Model Developer or Independent Software Vendor, you access controlled a model and prepared a license for it. +- As a Model Developer or Independent Software Vendor, you prepared and ran a License Server and used the License Server to verify a User had a valid license to use a access controlled model. +- As a User, you provided information to a Model Developer or Independent Software Vendor to get a access controlled model and the license for the model. +- As a User, you set up and launched a Host Server on which you can run licensed and access controlled models. +- As a User, you loaded a access controlled model, validated the license for the model, and used the model to run inference. + +## References +Use these links for more information: +- [OpenVINO™ toolkit](https://software.intel.com/en-us/openvino-toolkit) +- [OpenVINO Model Server Quick Start Guide](https://github.com/openvinotoolkit/model_server/blob/main/docs/ovms_quickstart.md) +- [Model repository](https://github.com/openvinotoolkit/model_server/blob/main/docs/models_repository.md) diff --git a/docs/template_plugin/src/template_async_infer_request.cpp b/docs/template_plugin/src/template_async_infer_request.cpp index 3facaf7327d5f6..41c1f62724f6b5 100644 --- a/docs/template_plugin/src/template_async_infer_request.cpp +++ b/docs/template_plugin/src/template_async_infer_request.cpp @@ -25,16 +25,19 @@ TemplateAsyncInferRequest::TemplateAsyncInferRequest( if (remoteDevice) { _pipeline = { {cpuTaskExecutor, [this] { - IE_PROFILING_AUTO_SCOPE(PreprocessingAndStartPipeline) + OV_ITT_SCOPED_TASK(itt::domains::TemplatePlugin, + "TemplateAsyncInferRequest::PreprocessingAndStartPipeline"); _inferRequest->inferPreprocess(); _inferRequest->startPipeline(); }}, {_waitExecutor, [this] { - IE_PROFILING_AUTO_SCOPE(WaitPipeline) + OV_ITT_SCOPED_TASK(itt::domains::TemplatePlugin, + "TemplateAsyncInferRequest::WaitPipeline"); _inferRequest->waitPipeline(); }}, {cpuTaskExecutor, [this] { - IE_PROFILING_AUTO_SCOPE(Postprocessing) + OV_ITT_SCOPED_TASK(itt::domains::TemplatePlugin, + "TemplateAsyncInferRequest::Postprocessing"); _inferRequest->inferPostprocess(); }} }; diff --git a/docs/template_plugin/src/template_executable_network.cpp b/docs/template_plugin/src/template_executable_network.cpp index 7eff8da41484be..0a2193342d8af5 100644 --- a/docs/template_plugin/src/template_executable_network.cpp +++ b/docs/template_plugin/src/template_executable_network.cpp @@ -10,6 +10,7 @@ #include "template/template_config.hpp" #include "template_plugin.hpp" #include "template_executable_network.hpp" +#include "template_itt.hpp" using namespace TemplatePlugin; @@ -61,7 +62,7 @@ TemplatePlugin::ExecutableNetwork::ExecutableNetwork(std::istream & model, model.read(dataBlob->buffer(), dataSize); } - // TODO: implement Import / Export of configuration options + // TODO: implement Import / Export of configuration options and merge with `cfg` // TODO: implement Import / Export of network precisions, layouts, preprocessing info auto cnnnetwork = _plugin->GetCore()->ReadNetwork(xmlString, std::move(dataBlob)); @@ -142,7 +143,7 @@ InferenceEngine::IInferRequest::Ptr TemplatePlugin::ExecutableNetwork::CreateInf auto internalRequest = CreateInferRequestImpl(_networkInputs, _networkOutputs); auto asyncThreadSafeImpl = std::make_shared(std::static_pointer_cast(internalRequest), _taskExecutor, _plugin->_waitExecutor, _callbackExecutor); - asyncRequest.reset(new InferenceEngine::InferRequestBase(asyncThreadSafeImpl), + asyncRequest.reset(new InferenceEngine::InferRequestBase(asyncThreadSafeImpl), [](InferenceEngine::IInferRequest *p) { p->Release(); }); asyncThreadSafeImpl->SetPointerToPublicInterface(asyncRequest); return asyncRequest; @@ -188,6 +189,8 @@ InferenceEngine::Parameter TemplatePlugin::ExecutableNetwork::GetMetric(const st // ! [executable_network:export_impl] void TemplatePlugin::ExecutableNetwork::ExportImpl(std::ostream& modelStream) { + OV_ITT_SCOPED_TASK(itt::domains::TemplatePlugin, "ExecutableNetwork::ExportImpl"); + // Note: custom ngraph extensions are not supported std::map custom_opsets; std::stringstream xmlFile, binFile; diff --git a/docs/template_plugin/src/template_infer_request.cpp b/docs/template_plugin/src/template_infer_request.cpp index 30c9a4c0f9fa95..61f4cb15120b7b 100644 --- a/docs/template_plugin/src/template_infer_request.cpp +++ b/docs/template_plugin/src/template_infer_request.cpp @@ -244,7 +244,8 @@ void TemplateInferRequest::inferPostprocess() { // ! [infer_request:infer_postprocess] // ! [infer_request:get_performance_counts] -void TemplateInferRequest::GetPerformanceCounts(std::map &perfMap) const { +std::map TemplateInferRequest::GetPerformanceCounts() const { + std::map perfMap; InferenceEngineProfileInfo info; info.execution_index = 0; info.status = InferenceEngineProfileInfo::EXECUTED; @@ -259,5 +260,6 @@ void TemplateInferRequest::GetPerformanceCounts(std::map& perfMap) const override; + std::map GetPerformanceCounts() const override; InferenceEngine::StatusCode Cancel() override; diff --git a/docs/template_plugin/src/template_plugin.cpp b/docs/template_plugin/src/template_plugin.cpp index 6b9610f722f00e..ff339499645cb0 100644 --- a/docs/template_plugin/src/template_plugin.cpp +++ b/docs/template_plugin/src/template_plugin.cpp @@ -16,6 +16,7 @@ #include #include "template/template_config.hpp" +#include "template_itt.hpp" #include "template_plugin.hpp" #include "template_executable_network.hpp" #include "template_infer_request.hpp" @@ -74,6 +75,8 @@ std::shared_ptr TransformNetwork(const std::shared_ptr& config) { - // TODO: Import network from stream is not mandatory functionality; - // Can just throw an exception and remove the code below - Configuration exportedCfg; - - // some code below which reads exportedCfg from `model` stream - // .. + OV_ITT_SCOPED_TASK(itt::domains::TemplatePlugin, "Plugin::ImportNetworkImpl"); - auto cfg = Configuration(config, exportedCfg); - auto exec_network_impl = std::make_shared(model, cfg, std::static_pointer_cast(shared_from_this())); + Configuration cfg(config); + auto exec_network_impl = std::make_shared(model, cfg, + std::static_pointer_cast(shared_from_this())); return make_executable_network(exec_network_impl); } @@ -129,6 +128,8 @@ InferenceEngine::ExecutableNetwork Plugin::ImportNetworkImpl(std::istream& model // ! [plugin:query_network] InferenceEngine::QueryNetworkResult Plugin::QueryNetwork(const InferenceEngine::CNNNetwork &network, const ConfigMap& config) const { + OV_ITT_SCOPED_TASK(itt::domains::TemplatePlugin, "Plugin::QueryNetwork"); + InferenceEngine::QueryNetworkResult res; Configuration cfg{config, _cfg, false}; diff --git a/inference-engine/cmake/dependencies.cmake b/inference-engine/cmake/dependencies.cmake index 81e01c6de25bf2..ae69fcc016a54b 100644 --- a/inference-engine/cmake/dependencies.cmake +++ b/inference-engine/cmake/dependencies.cmake @@ -136,7 +136,6 @@ endif () ## TBB package if (THREADING STREQUAL "TBB" OR THREADING STREQUAL "TBB_AUTO") - reset_deps_cache(TBBROOT TBB_DIR) if (WIN32 AND X86_64) @@ -235,22 +234,23 @@ if (ENABLE_OPENCV) elseif (ARM) set(OPENCV_SUFFIX "debian9arm") set(OPENCV_HASH "0e787d6738092993bc92bb55975f52caabae45dc73473b5196d15e65e87d6b9d") - elseif (LINUX_OS_NAME STREQUAL "CentOS 7" OR CMAKE_CXX_COMPILER_VERSION VERSION_LESS "4.9") + elseif ((LINUX_OS_NAME STREQUAL "CentOS 7" OR + CMAKE_CXX_COMPILER_VERSION VERSION_LESS "4.9") AND X86_64) set(OPENCV_SUFFIX "centos7") set(OPENCV_HASH "9b813af064d463b31fa1603b11b6559532a031d59bb0782d234380955fd397e0") - elseif (LINUX_OS_NAME MATCHES "CentOS 8") + elseif (LINUX_OS_NAME MATCHES "CentOS 8" AND X86_64) set(OPENCV_SUFFIX "centos8") set(OPENCV_HASH "8ec3e3552500dee334162386b98cc54a5608de1f1a18f283523fc0cc13ee2f83") - elseif (LINUX_OS_NAME STREQUAL "Ubuntu 16.04") + elseif (LINUX_OS_NAME STREQUAL "Ubuntu 16.04" AND X86_64) set(OPENCV_SUFFIX "ubuntu16") set(OPENCV_HASH "cd46831b4d8d1c0891d8d22ff5b2670d0a465a8a8285243059659a50ceeae2c3") - elseif (LINUX_OS_NAME STREQUAL "Ubuntu 18.04") + elseif (LINUX_OS_NAME STREQUAL "Ubuntu 18.04" AND X86_64) set(OPENCV_SUFFIX "ubuntu18") set(OPENCV_HASH "8ec3e3552500dee334162386b98cc54a5608de1f1a18f283523fc0cc13ee2f83") - elseif (LINUX_OS_NAME STREQUAL "Ubuntu 20.04") + elseif (LINUX_OS_NAME STREQUAL "Ubuntu 20.04" AND X86_64) set(OPENCV_SUFFIX "ubuntu20") set(OPENCV_HASH "2b7808d002864acdc5fc0b19cd30dadc31a37cc267931cad605f23f2383bfc21") - else() + elseif(NOT DEFINED OpenCV_DIR AND NOT DEFINED ENV{OpenCV_DIR}) message(FATAL_ERROR "OpenCV is not available on current platform (${LINUX_OS_NAME})") endif() RESOLVE_DEPENDENCY(OPENCV diff --git a/inference-engine/cmake/features.cmake b/inference-engine/cmake/features.cmake index 8c83eaf24536e7..56aeba29020835 100644 --- a/inference-engine/cmake/features.cmake +++ b/inference-engine/cmake/features.cmake @@ -2,9 +2,9 @@ # SPDX-License-Identifier: Apache-2.0 # -#these options are aimed to optimize build time on development system +# these options are aimed to optimize build time on development system -ie_dependent_option (ENABLE_GNA "GNA support for inference engine" ON "NOT APPLE;NOT ANDROID;X86 OR X86_64" OFF) +ie_dependent_option (ENABLE_GNA "GNA support for inference engine" ON "NOT APPLE;NOT ANDROID;X86_64" OFF) ie_dependent_option (ENABLE_CLDNN_TESTS "Enable clDNN unit tests" OFF "ENABLE_CLDNN" OFF) @@ -26,7 +26,7 @@ if (ENABLE_MKL_DNN) endif() # "MKL-DNN library based on OMP or TBB or Sequential implementation: TBB|OMP|SEQ" -if(ARM OR (MSVC AND (ARM OR AARCH64)) ) +if(X86 OR ARM OR (MSVC AND (ARM OR AARCH64)) ) set(THREADING_DEFAULT "SEQ") else() set(THREADING_DEFAULT "TBB") diff --git a/inference-engine/cmake/vpu_dependencies.cmake b/inference-engine/cmake/vpu_dependencies.cmake index 3c5fd59cb9589f..3ffce54ee1ffd8 100644 --- a/inference-engine/cmake/vpu_dependencies.cmake +++ b/inference-engine/cmake/vpu_dependencies.cmake @@ -1,4 +1,4 @@ -# Copyright (C) 2018-2020 Intel Corporation +# Copyright (C) 2018-2021 Intel Corporation # SPDX-License-Identifier: Apache-2.0 # @@ -6,14 +6,14 @@ include_guard(GLOBAL) set(VPU_SUPPORTED_FIRMWARES usb-ma2x8x pcie-ma2x8x) set(VPU_SUPPORTED_FIRMWARES_HASH - "39a35758b76463f633f377616057c7d2a24562c7c1cfc36744f28949619e57c9" - "798df21b5b3a8c4a6faab61f9220b2b216ba6c4a5acf75aaa17a8520bc639bfe") + "7892e82f8ba90b487c4b115bfc266265d8ceb6f3cfc3e7e203ec6150d041fa2c" + "bec36fa7a8b64cd50df8b7782c594df32267c5081d7aa2e77a701dcfa18b3ec6") # # Default packages # -set(FIRMWARE_PACKAGE_VERSION 1579) +set(FIRMWARE_PACKAGE_VERSION 1599) set(VPU_CLC_MA2X8X_VERSION "movi-cltools-20.09.2") # diff --git a/inference-engine/ie_bridges/c/samples/hello_classification/README.md b/inference-engine/ie_bridges/c/samples/hello_classification/README.md index 845a19e1bf52dc..6bf0ddf0b6369b 100644 --- a/inference-engine/ie_bridges/c/samples/hello_classification/README.md +++ b/inference-engine/ie_bridges/c/samples/hello_classification/README.md @@ -14,7 +14,7 @@ To properly demonstrate this API, it is required to run several networks in pipe ## Running -To run the sample, you can use public or pre-trained models. To download the pre-trained models, use the OpenVINO [Model Downloader](@ref omz_tools_downloader_README) or go to [https://download.01.org/opencv/](https://download.01.org/opencv/). +To run the sample, you can use [public](@ref omz_models_public_index) or [Intel's](@ref omz_models_intel_index) pre-trained models from the Open Model Zoo. The models can be downloaded using the [Model Downloader](@ref omz_tools_downloader_README). > **NOTE**: Before running the sample with a trained model, make sure the model is converted to the Inference Engine format (\*.xml + \*.bin) using the [Model Optimizer tool](../../../../../docs/MO_DG/Deep_Learning_Model_Optimizer_DevGuide.md). > diff --git a/inference-engine/ie_bridges/c/samples/hello_nv12_input_classification/README.md b/inference-engine/ie_bridges/c/samples/hello_nv12_input_classification/README.md index eeadef10cdbf88..a9e1e20056b049 100644 --- a/inference-engine/ie_bridges/c/samples/hello_nv12_input_classification/README.md +++ b/inference-engine/ie_bridges/c/samples/hello_nv12_input_classification/README.md @@ -34,9 +34,7 @@ ffmpeg -i cat.jpg -pix_fmt nv12 cat.yuv ## Running -To run the sample, you can use public or pre-trained models. To download pre-trained models, use -the OpenVINO™ [Model Downloader](@ref omz_tools_downloader_README) -or go to [https://download.01.org/opencv/](https://download.01.org/opencv/). +To run the sample, you can use [public](@ref omz_models_public_index) or [Intel's](@ref omz_models_intel_index) pre-trained models from the Open Model Zoo. The models can be downloaded using the [Model Downloader](@ref omz_tools_downloader_README). > **NOTE**: Before running the sample with a trained model, make sure the model is converted to the > Inference Engine format (\*.xml + \*.bin) using the [Model Optimizer tool](../../../../../docs/MO_DG/Deep_Learning_Model_Optimizer_DevGuide.md). diff --git a/inference-engine/ie_bridges/c/samples/object_detection_sample_ssd/README.md b/inference-engine/ie_bridges/c/samples/object_detection_sample_ssd/README.md index 2e70a23f0576a6..55916a129f9473 100644 --- a/inference-engine/ie_bridges/c/samples/object_detection_sample_ssd/README.md +++ b/inference-engine/ie_bridges/c/samples/object_detection_sample_ssd/README.md @@ -39,7 +39,7 @@ Options: Running the application with the empty list of options yields the usage message given above and an error message. -To run the sample, you can use public or pre-trained models. To download the pre-trained models, use the OpenVINO [Model Downloader](@ref omz_tools_downloader_README) or go to [https://download.01.org/opencv/](https://download.01.org/opencv/). +To run the sample, you can use [public](@ref omz_models_public_index) or [Intel's](@ref omz_models_intel_index) pre-trained models from the Open Model Zoo. The models can be downloaded using the [Model Downloader](@ref omz_tools_downloader_README). > **NOTE**: Before running the sample with a trained model, make sure the model is converted to the Inference Engine format (\*.xml + \*.bin) using the [Model Optimizer tool](../../../../../docs/MO_DG/Deep_Learning_Model_Optimizer_DevGuide.md). > diff --git a/inference-engine/ie_bridges/c/tests/ie_c_api_test.cpp b/inference-engine/ie_bridges/c/tests/ie_c_api_test.cpp index 561ede3b3c76fc..b8c1f61adfcc9d 100644 --- a/inference-engine/ie_bridges/c/tests/ie_c_api_test.cpp +++ b/inference-engine/ie_bridges/c/tests/ie_c_api_test.cpp @@ -235,6 +235,7 @@ TEST(ie_core_get_metric, getMetric) { const char *device_name = "CPU"; const char *metric_name = "SUPPORTED_CONFIG_KEYS"; ie_param_t param; + param.params = nullptr; IE_EXPECT_OK(ie_core_get_metric(core, device_name, metric_name, ¶m)); ie_param_free(¶m); @@ -249,6 +250,7 @@ TEST(ie_core_get_config, getConfig) { const char *device_name = "CPU"; const char *config_name = "CPU_THREADS_NUM"; ie_param_t param; + param.params = nullptr; IE_EXPECT_OK(ie_core_get_config(core, device_name, config_name, ¶m)); EXPECT_STREQ(param.params, "0"); @@ -847,6 +849,7 @@ TEST(ie_exec_network_get_config, getConfig) { EXPECT_NE(nullptr, exe_network); ie_param_t param; + param.params = nullptr; IE_EXPECT_OK(ie_exec_network_get_config(exe_network, "CPU_THREADS_NUM", ¶m)); ie_param_free(¶m); @@ -901,6 +904,7 @@ TEST(ie_exec_network_get_metric, getMetric) { EXPECT_NE(nullptr, exe_network); ie_param_t param; + param.params = nullptr; IE_EXPECT_OK(ie_exec_network_get_metric(exe_network, "SUPPORTED_CONFIG_KEYS", ¶m)); ie_param_free(¶m); @@ -1735,11 +1739,16 @@ TEST(ie_blob_make_memory_nv12, inferRequestWithNV12Blob) { ie_blob_t *output_blob = nullptr; IE_EXPECT_OK(ie_infer_request_get_blob(infer_request, "fc_out", &output_blob)); + EXPECT_NE(nullptr, output_blob); ie_blob_buffer_t buffer; + buffer.buffer = nullptr; IE_EXPECT_OK(ie_blob_get_buffer(output_blob, &buffer)); - float *output_data = (float *)(buffer.buffer); - EXPECT_NEAR(output_data[1], 0.f, 1.e-5); + EXPECT_NE(buffer.buffer, nullptr); + if (buffer.buffer) { + float *output_data = (float *)(buffer.buffer); + EXPECT_NEAR(output_data[1], 0.f, 1.e-5); + } ie_blob_free(&output_blob); ie_blob_free(&blob_nv12); diff --git a/inference-engine/ie_bridges/python/sample/classification_sample_async/README.md b/inference-engine/ie_bridges/python/sample/classification_sample_async/README.md index d7a20f5037333d..80dc537b9a5702 100644 --- a/inference-engine/ie_bridges/python/sample/classification_sample_async/README.md +++ b/inference-engine/ie_bridges/python/sample/classification_sample_async/README.md @@ -59,7 +59,7 @@ Options: Running the application with the empty list of options yields the usage message given above and an error message. -To run the sample, you can use AlexNet and GoogLeNet or other image classification models. You can download the pre-trained models with the OpenVINO [Model Downloader](@ref omz_tools_downloader_README) or from [https://download.01.org/opencv/](https://download.01.org/opencv/). +To run the sample, you can use AlexNet and GoogLeNet or other image classification models. You can download [public](@ref omz_models_public_index) or [Intel's](@ref omz_models_intel_index) pre-trained models using the [Model Downloader](@ref omz_tools_downloader_README). > **NOTE**: Before running the sample with a trained model, make sure the model is converted to the Inference Engine format (\*.xml + \*.bin) using the [Model Optimizer tool](../../../../../docs/MO_DG/Deep_Learning_Model_Optimizer_DevGuide.md). > diff --git a/inference-engine/ie_bridges/python/sample/hello_classification/README.md b/inference-engine/ie_bridges/python/sample/hello_classification/README.md index 488278c87d2ff4..34858bb437a1bb 100644 --- a/inference-engine/ie_bridges/python/sample/hello_classification/README.md +++ b/inference-engine/ie_bridges/python/sample/hello_classification/README.md @@ -46,7 +46,7 @@ Options: Running the application with the empty list of options yields the usage message given above. -To run the sample, you can use AlexNet and GoogLeNet or other image classification models. You can download the pre-trained models with the OpenVINO [Model Downloader](@ref omz_tools_downloader_README) or from [https://download.01.org/opencv/](https://download.01.org/opencv/). +To run the sample, you can use AlexNet and GoogLeNet or other image classification models. You can download [public](@ref omz_models_public_index) or [Intel's](@ref omz_models_intel_index) pre-trained models using the [Model Downloader](@ref omz_tools_downloader_README). > **NOTE**: Before running the sample with a trained model, make sure the model is converted to the Inference Engine format (\*.xml + \*.bin) using the [Model Optimizer tool](../../../../../docs/MO_DG/Deep_Learning_Model_Optimizer_DevGuide.md). > diff --git a/inference-engine/ie_bridges/python/sample/ngraph_function_creation_sample/README.md b/inference-engine/ie_bridges/python/sample/ngraph_function_creation_sample/README.md index 75b05f78c5f5df..bdba6c38ab46e3 100644 --- a/inference-engine/ie_bridges/python/sample/ngraph_function_creation_sample/README.md +++ b/inference-engine/ie_bridges/python/sample/ngraph_function_creation_sample/README.md @@ -13,7 +13,7 @@ When the inference is done, the application outputs inference results to the sta > **NOTE**: This sample supports models with FP32 weights only. -The `lenet.bin` weights file was generated by the [Model Optimizer](../../../docs/MO_DG/Deep_Learning_Model_Optimizer_DevGuide.md) +The `lenet.bin` weights file was generated by the [Model Optimizer](../../../../../docs/MO_DG/Deep_Learning_Model_Optimizer_DevGuide.md) tool from the public LeNet model with the `--input_shape [64,1,28,28]` parameter specified. The original model is available in the [Caffe* repository](https://github.com/BVLC/caffe/tree/master/examples/mnist) on GitHub\*. @@ -69,4 +69,4 @@ By default, the application outputs top-1 inference result for each inference re ## See Also -* [Using Inference Engine Samples](../../../docs/IE_DG/Samples_Overview.md) +* [Using Inference Engine Samples](../../../../../docs/IE_DG/Samples_Overview.md) diff --git a/inference-engine/ie_bridges/python/sample/object_detection_sample_ssd/README.md b/inference-engine/ie_bridges/python/sample/object_detection_sample_ssd/README.md index 26b8394cdb7260..90bc09ff2e75bf 100644 --- a/inference-engine/ie_bridges/python/sample/object_detection_sample_ssd/README.md +++ b/inference-engine/ie_bridges/python/sample/object_detection_sample_ssd/README.md @@ -55,7 +55,7 @@ Options: Running the application with the empty list of options yields the usage message given above and an error message. -To run the sample, you can use RMNet_SSD or other object-detection models. You can download the pre-trained models with the OpenVINO [Model Downloader](@ref omz_tools_downloader_README) or from [https://download.01.org/opencv/](https://download.01.org/opencv/). +To run the sample, you can use RMNet_SSD or other object-detection models. You can download [public](@ref omz_models_public_index) or [Intel's](@ref omz_models_intel_index) pre-trained models using the [Model Downloader](@ref omz_tools_downloader_README). > **NOTE**: Before running the sample with a trained model, make sure the model is converted to the Inference Engine format (\*.xml + \*.bin) using the [Model Optimizer tool](../../../../../docs/MO_DG/Deep_Learning_Model_Optimizer_DevGuide.md). > diff --git a/inference-engine/ie_bridges/python/sample/object_detection_sample_ssd/object_detection_sample_ssd.py b/inference-engine/ie_bridges/python/sample/object_detection_sample_ssd/object_detection_sample_ssd.py index 0ff4523c931b0d..5bbd3a4609268b 100644 --- a/inference-engine/ie_bridges/python/sample/object_detection_sample_ssd/object_detection_sample_ssd.py +++ b/inference-engine/ie_bridges/python/sample/object_detection_sample_ssd/object_detection_sample_ssd.py @@ -73,13 +73,7 @@ def main(): # ----------------------------------------------------------------------------------------------------- # --------------------------- 3. Read and preprocess input -------------------------------------------- - - print("inputs number: " + str(len(net.input_info.keys()))) - assert len(net.input_info.keys()) == 1, 'Sample supports networks with one input' - for input_key in net.input_info: - print("input shape: " + str(net.input_info[input_key].input_data.shape)) - print("input key: " + input_key) if len(net.input_info[input_key].input_data.layout) == 4: n, c, h, w = net.input_info[input_key].input_data.shape @@ -96,7 +90,6 @@ def main(): image = cv2.resize(image, (w, h)) image = image.transpose((2, 0, 1)) # Change data layout from HWC to CHW images[i] = image - # ----------------------------------------------------------------------------------------------------- # --------------------------- 4. Configure input & output --------------------------------------------- @@ -122,23 +115,30 @@ def main(): data[input_name] = images if input_info_name != "": - infos = np.ndarray(shape=(n, c), dtype=float) + detection_size = net.input_info[input_info_name].input_data.shape[1] + infos = np.ndarray(shape=(n, detection_size), dtype=float) for i in range(n): infos[i, 0] = h infos[i, 1] = w - infos[i, 2] = 1.0 + for j in range(2, detection_size): + infos[i, j] = 1.0 data[input_info_name] = infos # --------------------------- Prepare output blobs ---------------------------------------------------- log.info('Preparing output blobs') + output_name, output_info = "", None func = ng.function_from_cnn(net) - ops = func.get_ordered_ops() - output_name, output_info = "", net.outputs[next(iter(net.outputs.keys()))] - output_ops = {op.friendly_name : op for op in ops \ - if op.friendly_name in net.outputs and op.get_type_name() == "DetectionOutput"} - if len(output_ops) != 0: - output_name, output_info = output_ops.popitem() + if func: + ops = func.get_ordered_ops() + for op in ops: + if op.friendly_name in net.outputs and op.get_type_name() == "DetectionOutput": + output_name = op.friendly_name + output_info = net.outputs[output_name] + break + else: + output_name = list(net.outputs.keys())[0] + output_info = net.outputs[output_name] if output_name == "": log.error("Can't find a DetectionOutput layer in the topology") @@ -189,12 +189,12 @@ def main(): else: print() + tmp_image = cv2.imread(args.input) for imid in classes: - tmp_image = cv2.imread(args.input) for box in boxes[imid]: cv2.rectangle(tmp_image, (box[0], box[1]), (box[2], box[3]), (232, 35, 244), 2) - cv2.imwrite("out.bmp", tmp_image) - log.info("Image out.bmp created!") + cv2.imwrite("out.bmp", tmp_image) + log.info("Image out.bmp created!") # ----------------------------------------------------------------------------------------------------- log.info("Execution successful\n") diff --git a/inference-engine/ie_bridges/python/src/openvino/inference_engine/constants.pyx b/inference-engine/ie_bridges/python/src/openvino/inference_engine/constants.pyx index 188d38940bd422..ce5ca4d6dede81 100644 --- a/inference-engine/ie_bridges/python/src/openvino/inference_engine/constants.pyx +++ b/inference-engine/ie_bridges/python/src/openvino/inference_engine/constants.pyx @@ -18,7 +18,7 @@ from .cimport ie_api_impl_defs as C import numpy as np from enum import Enum -supported_precisions = ["FP32", "FP64", "FP16", "I64", "U64", "I32", "U32", "I16", "I8", "U16", "U8"] +supported_precisions = ["FP32", "FP64", "FP16", "I64", "U64", "I32", "U32", "I16", "I8", "U16", "U8", "BOOL"] known_plugins = ['CPU', 'GPU', 'FPGA', 'MYRIAD', 'HETERO', 'HDDL', 'MULTI'] @@ -34,7 +34,8 @@ format_map = { 'U16' : np.uint16, 'I8' : np.int8, 'U8' : np.uint8, - 'I64' : np.int64 + 'I64' : np.int64, + 'BOOL' : np.uint8 } layout_str_to_enum = {'ANY': C.Layout.ANY, diff --git a/inference-engine/ie_bridges/python/src/openvino/inference_engine/ie_api.pyx b/inference-engine/ie_bridges/python/src/openvino/inference_engine/ie_api.pyx index e92dcaaa58e8ba..c92040d40cf76a 100644 --- a/inference-engine/ie_bridges/python/src/openvino/inference_engine/ie_api.pyx +++ b/inference-engine/ie_bridges/python/src/openvino/inference_engine/ie_api.pyx @@ -173,7 +173,8 @@ cdef class Blob: fp64_array_memview = self._array_data self._ptr = C.make_shared_blob[double](c_tensor_desc, &fp64_array_memview[0], fp64_array_memview.shape[0]) elif precision == "FP16": - raise RuntimeError("Currently, it's impossible to set_blob with FP16 precision") + I16_array_memview = self._array_data.view(dtype=np.int16) + self._ptr = C.make_shared_blob[int16_t](c_tensor_desc, &I16_array_memview[0], I16_array_memview.shape[0]) elif precision == "I16": I16_array_memview = self._array_data self._ptr = C.make_shared_blob[int16_t](c_tensor_desc, &I16_array_memview[0], I16_array_memview.shape[0]) @@ -1222,7 +1223,10 @@ cdef class InferRequest: def _fill_inputs(self, inputs): for k, v in inputs.items(): assert k in self._inputs_list, f"No input with name {k} found in network" - self.input_blobs[k].buffer[:] = v + if self.input_blobs[k].tensor_desc.precision == "FP16": + self.input_blobs[k].buffer[:] = v.view(dtype=np.int16) + else: + self.input_blobs[k].buffer[:] = v ## This class contains the information about the network model read from IR and allows you to manipulate with @@ -1439,6 +1443,14 @@ cdef class IENetwork: def _get_function_capsule(self): return self.impl.getFunction() + def get_ov_name_for_tensor(self, orig_name: str): + name = bytes(orig_name, 'utf-8') + return self.impl.getOVNameForTensor(name).decode('utf-8') + + def get_ov_name_for_operation(self, orig_name: str): + name = bytes(orig_name, 'utf-8') + return self.impl.getOVNameForOperation(name).decode('utf-8') + cdef class BlobBuffer: """Copy-less accessor for Inference Engine Blob""" diff --git a/inference-engine/ie_bridges/python/src/openvino/inference_engine/ie_api_impl.cpp b/inference-engine/ie_bridges/python/src/openvino/inference_engine/ie_api_impl.cpp index 226cc73bc2ee42..7a2bd205a0837d 100644 --- a/inference-engine/ie_bridges/python/src/openvino/inference_engine/ie_api_impl.cpp +++ b/inference-engine/ie_bridges/python/src/openvino/inference_engine/ie_api_impl.cpp @@ -260,6 +260,14 @@ const std::map InferenceEnginePython::IE return outputs; } +std::string InferenceEnginePython::IENetwork::getOVNameForTensor(const std::string& orig_name) { + return actual->getOVNameForTensor(orig_name); +} + +std::string InferenceEnginePython::IENetwork::getOVNameForOperation(const std::string& orig_name) { + return actual->getOVNameForOperation(orig_name); +} + void InferenceEnginePython::IENetwork::addOutput(const std::string &out_layer, size_t port_id) { actual->addOutput(out_layer, port_id); diff --git a/inference-engine/ie_bridges/python/src/openvino/inference_engine/ie_api_impl.hpp b/inference-engine/ie_bridges/python/src/openvino/inference_engine/ie_api_impl.hpp index 5534d1ddb53215..eff8c8cec3f504 100644 --- a/inference-engine/ie_bridges/python/src/openvino/inference_engine/ie_api_impl.hpp +++ b/inference-engine/ie_bridges/python/src/openvino/inference_engine/ie_api_impl.hpp @@ -71,6 +71,9 @@ struct IENetwork { IENetwork() = default; void convertToOldRepresentation(); + + std::string getOVNameForTensor(const std::string& orig_name); + std::string getOVNameForOperation(const std::string& orig_name); }; diff --git a/inference-engine/ie_bridges/python/src/openvino/inference_engine/ie_api_impl_defs.pxd b/inference-engine/ie_bridges/python/src/openvino/inference_engine/ie_api_impl_defs.pxd index d11d8b526a8743..91b3e9af849e90 100644 --- a/inference-engine/ie_bridges/python/src/openvino/inference_engine/ie_api_impl_defs.pxd +++ b/inference-engine/ie_bridges/python/src/openvino/inference_engine/ie_api_impl_defs.pxd @@ -175,6 +175,8 @@ cdef extern from "ie_api_impl.hpp" namespace "InferenceEnginePython": void load_from_buffer(const char*xml, size_t xml_size, uint8_t*bin, size_t bin_size) except + object getFunction() except + void convertToOldRepresentation() except + + string getOVNameForTensor(const string &) except + + string getOVNameForOperation(const string &) except + cdef cppclass InferRequestWrap: double exec_time; diff --git a/inference-engine/ie_bridges/python/tests/test_Blob.py b/inference-engine/ie_bridges/python/tests/test_Blob.py index 7220f87cbd8adf..2353e60c61c293 100644 --- a/inference-engine/ie_bridges/python/tests/test_Blob.py +++ b/inference-engine/ie_bridges/python/tests/test_Blob.py @@ -36,87 +36,48 @@ def test_get_buffer(): blob = Blob(tensor_desc, array) assert np.array_equal(blob.buffer, array) - -def test_write_to_buffer_fp32(): - tensor_desc = TensorDesc("FP32", [1, 3, 127, 127], "NCHW") - array = np.zeros(shape=(1, 3, 127, 127), dtype=np.float32) +def write_to_buffer(precision, numpy_precision): + tensor_desc = TensorDesc(precision, [1, 3, 127, 127], "NCHW") + array = np.zeros(shape=(1, 3, 127, 127), dtype=numpy_precision) blob = Blob(tensor_desc, array) - ones_arr = np.ones(shape=(1, 3, 127, 127), dtype=np.float32) + ones_arr = np.ones(shape=(1, 3, 127, 127), dtype=numpy_precision) blob.buffer[:] = ones_arr assert np.array_equal(blob.buffer, ones_arr) +def test_write_to_buffer_fp32(): + write_to_buffer("FP32", np.float32) + def test_write_to_buffer_fp64(): - tensor_desc = TensorDesc("FP64", [1, 3, 127, 127], "NCHW") - array = np.zeros(shape=(1, 3, 127, 127), dtype=np.float64) - blob = Blob(tensor_desc, array) - ones_arr = np.ones(shape=(1, 3, 127, 127), dtype=np.float64) - blob.buffer[:] = ones_arr - assert np.array_equal(blob.buffer, ones_arr) + write_to_buffer("FP64", np.float64) -@pytest.mark.skip(reason="Need to figure out how to implement right conversion") def test_write_to_buffer_fp16(): - tensor_desc = TensorDesc("FP16", [1, 3, 127, 127], "NCHW") - array = np.zeros(shape=(1, 3, 127, 127), dtype=np.float16) - blob = Blob(tensor_desc, array) - ones_arr = np.ones(shape=(1, 3, 127, 127), dtype=np.float16) - blob.buffer[:] = ones_arr - assert np.array_equal(blob.buffer, ones_arr) + write_to_buffer("FP16", np.float16) def test_write_to_buffer_int8(): - tensor_desc = TensorDesc("I8", [1, 3, 127, 127], "NCHW") - array = np.zeros(shape=(1, 3, 127, 127), dtype=np.int8) - blob = Blob(tensor_desc, array) - ones_arr = np.ones(shape=(1, 3, 127, 127), dtype=np.int8) - blob.buffer[:] = ones_arr - assert np.array_equal(blob.buffer, ones_arr) + write_to_buffer("I8", np.int8) def test_write_to_buffer_uint8(): - tensor_desc = TensorDesc("U8", [1, 3, 127, 127], "NCHW") - array = np.zeros(shape=(1, 3, 127, 127), dtype=np.uint8) - blob = Blob(tensor_desc, array) - ones_arr = np.ones(shape=(1, 3, 127, 127), dtype=np.uint8) - blob.buffer[:] = ones_arr - assert np.array_equal(blob.buffer, ones_arr) + write_to_buffer("U8", np.uint8) def test_write_to_buffer_int32(): - tensor_desc = TensorDesc("I32", [1, 3, 127, 127], "NCHW") - array = np.zeros(shape=(1, 3, 127, 127), dtype=np.int32) - blob = Blob(tensor_desc, array) - ones_arr = np.ones(shape=(1, 3, 127, 127), dtype=np.int32) - blob.buffer[:] = ones_arr - assert np.array_equal(blob.buffer, ones_arr) + write_to_buffer("I32", np.int32) def test_write_to_buffer_int16(): - tensor_desc = TensorDesc("I16", [1, 3, 127, 127], "NCHW") - array = np.zeros(shape=(1, 3, 127, 127), dtype=np.int16) - blob = Blob(tensor_desc, array) - ones_arr = np.ones(shape=(1, 3, 127, 127), dtype=np.int16) - blob.buffer[:] = ones_arr - assert np.array_equal(blob.buffer, ones_arr) + write_to_buffer("I16", np.int16) def test_write_to_buffer_uint16(): - tensor_desc = TensorDesc("U16", [1, 3, 127, 127], "NCHW") - array = np.zeros(shape=(1, 3, 127, 127), dtype=np.uint16) - blob = Blob(tensor_desc, array) - ones_arr = np.ones(shape=(1, 3, 127, 127), dtype=np.uint16) - blob.buffer[:] = ones_arr - assert np.array_equal(blob.buffer, ones_arr) + write_to_buffer("U16", np.uint16) def test_write_to_buffer_int64(): - tensor_desc = TensorDesc("I64", [1, 3, 127, 127], "NCHW") - array = np.zeros(shape=(1, 3, 127, 127), dtype=np.int64) - blob = Blob(tensor_desc, array) - ones_arr = np.ones(shape=(1, 3, 127, 127), dtype=np.int64) - blob.buffer[:] = ones_arr - assert np.array_equal(blob.buffer, ones_arr) + write_to_buffer("I64", np.int64) def test_write_numpy_scalar_int64(): diff --git a/inference-engine/ie_bridges/python/tests/test_IENetwork.py b/inference-engine/ie_bridges/python/tests/test_IENetwork.py index e3c52497814e1f..a1192fe64e9ccf 100644 --- a/inference-engine/ie_bridges/python/tests/test_IENetwork.py +++ b/inference-engine/ie_bridges/python/tests/test_IENetwork.py @@ -247,3 +247,61 @@ def test_multi_out_data(): assert net.outputs["28/Reshape"].name == "28/Reshape" and net.outputs["28/Reshape"].shape == [1, 5184] assert net.outputs["fc_out"].name == "fc_out" and net.outputs["fc_out"].shape == [1, 10] pass + +def test_tensor_names(): + model = """ + + + + + + + 1 + 3 + 22 + 22 + + + + + + + 1 + 3 + 22 + 22 + + + + + 1 + 3 + 22 + 22 + + + + + + + 1 + 3 + 22 + 22 + + + + + + + + + + """ + ie = IECore() + weights = b'' + net = ie.read_network(model=model.encode('utf-8'), weights=weights, init_from_buffer=True) + assert net.get_ov_name_for_tensor("relu_t") == "activation" + assert net.get_ov_name_for_tensor("identity_t") == "activation" + assert net.get_ov_name_for_tensor("input") == "in1" + assert net.get_ov_name_for_operation("output") == "activation" diff --git a/inference-engine/include/cpp/ie_cnn_network.h b/inference-engine/include/cpp/ie_cnn_network.h index 9544646a6d089d..8fc28ec41351d0 100644 --- a/inference-engine/include/cpp/ie_cnn_network.h +++ b/inference-engine/include/cpp/ie_cnn_network.h @@ -1,4 +1,4 @@ -// Copyright (C) 2018-2020 Intel Corporation +// Copyright (C) 2018-2021 Intel Corporation // SPDX-License-Identifier: Apache-2.0 // @@ -189,6 +189,32 @@ class INFERENCE_ENGINE_API_CLASS(CNNNetwork) { */ void serialize(const std::string& xmlPath, const std::string& binPath = {}) const; + /** + * @brief Method maps framework tensor name to OpenVINO name + * + * @param orig_name Framework tensor name + * + * @return OpenVINO name + */ + std::string getOVNameForTensor(const std::string& orig_name) const { + std::string ov_name; + CALL_STATUS_FNC(getOVNameForTensor, ov_name, orig_name); + return ov_name; + } + + /** + * @brief Method maps framework operator name to OpenVINO name + * + * @param orig_name Framework operation name + * + * @return OpenVINO name + */ + std::string getOVNameForOperation(const std::string& orig_name) const { + std::string ov_name; + CALL_STATUS_FNC(getOVNameForOperation, ov_name, orig_name); + return ov_name; + } + protected: IE_SUPPRESS_DEPRECATED_START /** diff --git a/inference-engine/include/cpp/ie_infer_request.hpp b/inference-engine/include/cpp/ie_infer_request.hpp index bb635b05c45c63..ea13c956948b30 100644 --- a/inference-engine/include/cpp/ie_infer_request.hpp +++ b/inference-engine/include/cpp/ie_infer_request.hpp @@ -241,7 +241,7 @@ class InferRequest { auto res = actual->Wait(millis_timeout, &resp); if (res != OK && res != RESULT_NOT_READY && res != INFER_NOT_STARTED && res != INFER_CANCELLED) { - InferenceEngine::details::extract_exception(res, resp.msg); + THROW_IE_EXCEPTION << InferenceEngine::details::as_status << res << resp.msg; } return res; } diff --git a/inference-engine/include/gpu/gpu_params.hpp b/inference-engine/include/gpu/gpu_params.hpp index 43d93bf8d69780..308f0af6f78963 100644 --- a/inference-engine/include/gpu/gpu_params.hpp +++ b/inference-engine/include/gpu/gpu_params.hpp @@ -101,7 +101,7 @@ DECLARE_GPU_PARAM_KEY(MEM_HANDLE, gpu_handle_param); * @brief This key identifies video decoder surface handle * in a shared memory blob parameter map */ -#ifdef WIN32 +#ifdef _WIN32 DECLARE_GPU_PARAM_KEY(DEV_OBJECT_HANDLE, gpu_handle_param); #else DECLARE_GPU_PARAM_KEY(DEV_OBJECT_HANDLE, uint32_t); diff --git a/inference-engine/include/ie_common.h b/inference-engine/include/ie_common.h index e9d228d6653c06..cb04cf049a2f0b 100644 --- a/inference-engine/include/ie_common.h +++ b/inference-engine/include/ie_common.h @@ -83,6 +83,7 @@ enum Layout : uint8_t { // Single image layouts CHW = 128, //!< A single image layout (e.g. for mean image) + HWC = 129, //!< A single image layout (e.g. for mean image) // 2D HW = 192, //!< HW 2D layout @@ -113,6 +114,7 @@ inline std::ostream& operator<<(std::ostream& out, const Layout& p) { PRINT_LAYOUT(OIHW); PRINT_LAYOUT(C); PRINT_LAYOUT(CHW); + PRINT_LAYOUT(HWC); PRINT_LAYOUT(HW); PRINT_LAYOUT(NC); PRINT_LAYOUT(CN); diff --git a/inference-engine/include/ie_compound_blob.h b/inference-engine/include/ie_compound_blob.h index 526402b9dfd85e..b52347a7b6175e 100644 --- a/inference-engine/include/ie_compound_blob.h +++ b/inference-engine/include/ie_compound_blob.h @@ -289,8 +289,8 @@ class INFERENCE_ENGINE_API_CLASS(BatchedBlob) : public CompoundBlob { * @brief Constructs a batched blob from a vector of blobs * @details All passed blobs should meet following requirements: * - all blobs have equal tensor descriptors, - * - blobs layouts should be one of: NCHW, NHWC, NCDHW, NDHWC, NC, CN, C, CHW - * - batch dimensions should be equal to 1 or not defined (C, CHW). + * - blobs layouts should be one of: NCHW, NHWC, NCDHW, NDHWC, NC, CN, C, CHW, HWC + * - batch dimensions should be equal to 1 or not defined (C, CHW, HWC). * Resulting blob's tensor descriptor is constructed using tensor descriptors * of passed blobs by setting batch dimension to blobs.size() * @@ -302,8 +302,8 @@ class INFERENCE_ENGINE_API_CLASS(BatchedBlob) : public CompoundBlob { * @brief Constructs a batched blob from a vector of blobs * @details All passed blobs should meet following requirements: * - all blobs have equal tensor descriptors, - * - blobs layouts should be one of: NCHW, NHWC, NCDHW, NDHWC, NC, CN, C, CHW - * - batch dimensions should be equal to 1 or not defined (C, CHW). + * - blobs layouts should be one of: NCHW, NHWC, NCDHW, NDHWC, NC, CN, C, CHW, HWC + * - batch dimensions should be equal to 1 or not defined (C, CHW, HWC). * Resulting blob's tensor descriptor is constructed using tensor descriptors * of passed blobs by setting batch dimension to blobs.size() * diff --git a/inference-engine/include/ie_icnn_network.hpp b/inference-engine/include/ie_icnn_network.hpp index 946e3044a30daf..2c6b5bea3ff2f0 100644 --- a/inference-engine/include/ie_icnn_network.hpp +++ b/inference-engine/include/ie_icnn_network.hpp @@ -1,4 +1,4 @@ -// Copyright (C) 2018-2020 Intel Corporation +// Copyright (C) 2018-2021 Intel Corporation // SPDX-License-Identifier: Apache-2.0 // @@ -69,9 +69,11 @@ class INFERENCE_ENGINE_ICNNNETWORK_CLASS(ICNNNetwork) : public details::IRelease * * For single and multiple outputs networks. * - * This method need to be called to find output names for using them later + * This method need to be called to find out OpenVINO output names for using them later * when calling InferenceEngine::InferRequest::GetBlob or InferenceEngine::InferRequest::SetBlob * + * If you want to use framework names, you can use InferenceEngine::ICNNNetwork::getOVNameForTensor or + * InferenceEngine::ICNNNetwork::getOVNameForOperation methods to map framework names to OpenVINO names * * @param out Reference to the OutputsDataMap object */ @@ -82,9 +84,12 @@ class INFERENCE_ENGINE_ICNNNETWORK_CLASS(ICNNNetwork) : public details::IRelease * object. * * For single and multiple inputs networks. - * This method need to be called to find out input names for using them later + * This method need to be called to find out OpenVINO input names for using them later * when calling InferenceEngine::InferRequest::SetBlob * + * If you want to use framework names, you can use InferenceEngine::ICNNNetwork::getOVNameForTensor or + * InferenceEngine::ICNNNetwork::getOVNameForOperation methods to map framework names to OpenVINO names + * * @param inputs Reference to InputsDataMap object. */ virtual void getInputsInfo(InputsDataMap& inputs) const noexcept = 0; @@ -179,6 +184,38 @@ class INFERENCE_ENGINE_ICNNNETWORK_CLASS(ICNNNetwork) : public details::IRelease virtual StatusCode serialize(const std::string& xmlPath, const std::string& binPath, ResponseDesc* resp) const noexcept = 0; + /** + * @brief Methods maps framework tensor name to OpenVINO name + * + * @param ov_name OpenVINO name + * @param orig_name Framework tensor name + * @param resp Pointer to the response message that holds a description of an error if any occurred + * + * @return Status code of the operation + */ + virtual StatusCode getOVNameForTensor(std::string& ov_name, const std::string& orig_name, ResponseDesc* resp) const noexcept { + (void) ov_name; + (void) orig_name; + (void) resp; + return NOT_IMPLEMENTED; + } + + /** + * @brief Methods maps framework operation name to OpenVINO name + * + * @param ov_name OpenVINO name + * @param orig_name Framework operation name + * @param resp Pointer to the response message that holds a description of an error if any occurred + * + * @return Status code of the operation + */ + virtual StatusCode getOVNameForOperation(std::string& ov_name, const std::string& orig_name, ResponseDesc* resp) const noexcept { + (void) ov_name; + (void) orig_name; + (void) resp; + return NOT_IMPLEMENTED; + } + /** * @brief A virtual destructor. */ diff --git a/inference-engine/include/ie_input_info.hpp b/inference-engine/include/ie_input_info.hpp index 5d6b8f8680383b..fe2d92b4e3ee13 100644 --- a/inference-engine/include/ie_input_info.hpp +++ b/inference-engine/include/ie_input_info.hpp @@ -70,6 +70,7 @@ class InputInfo { * NC - for 2-dimensional, * CHW - for 3-dimensional, * NCHW - for 4-dimensional + * NCDHW - for 5-dimensional * The default input layout might be changed preferred one using setLayout() function. * @return The precision used for input blob creation */ diff --git a/inference-engine/samples/benchmark_app/README.md b/inference-engine/samples/benchmark_app/README.md index 41f48e4735d886..3bba703c68bb71 100644 --- a/inference-engine/samples/benchmark_app/README.md +++ b/inference-engine/samples/benchmark_app/README.md @@ -4,6 +4,12 @@ This topic demonstrates how to use the Benchmark C++ Tool to estimate deep learn > **NOTE:** This topic describes usage of C++ implementation of the Benchmark Tool. For the Python* implementation, refer to [Benchmark Python* Tool](../../tools/benchmark_tool/README.md). +> **TIP**: You also can work with the Benchmark Tool inside the OpenVINO™ [Deep Learning Workbench](@ref workbench_docs_Workbench_DG_Introduction) (DL Workbench). +> [DL Workbench](@ref workbench_docs_Workbench_DG_Introduction) is a platform built upon OpenVINO™ and provides a web-based graphical environment that enables you to optimize, fine-tune, analyze, visualize, and compare +> performance of deep learning models on various Intel® architecture +> configurations. In the DL Workbench, you can use most of OpenVINO™ toolkit components. +>
+> Proceed to an [easy installation from Docker](@ref workbench_docs_Workbench_DG_Install_from_Docker_Hub) to get started. ## How It Works @@ -43,6 +49,7 @@ The application also saves executable graph information serialized to an XML fil ## Run the Tool + Note that the benchmark_app usually produces optimal performance for any device out of the box. **So in most cases you don't need to play the app options explicitly and the plain device name is enough**, for example, for CPU: @@ -115,7 +122,7 @@ If a model has only image input(s), please provide a folder with images or a pat If a model has some specific input(s) (not images), please prepare a binary file(s) that is filled with data of appropriate precision and provide a path to them as input. If a model has mixed input types, input folder should contain all required files. Image inputs are filled with image files one by one. Binary inputs are filled with binary inputs one by one. -To run the tool, you can use public or Intel's pre-trained models. To download the models, use the OpenVINO [Model Downloader](@ref omz_tools_downloader_README) or go to [https://download.01.org/opencv/](https://download.01.org/opencv/). +To run the tool, you can use [public](@ref omz_models_public_index) or [Intel's](@ref omz_models_intel_index) pre-trained models from the Open Model Zoo. The models can be downloaded using the [Model Downloader](@ref omz_tools_downloader_README). > **NOTE**: Before running the tool with a trained model, make sure the model is converted to the Inference Engine format (\*.xml + \*.bin) using the [Model Optimizer tool](../../../docs/MO_DG/Deep_Learning_Model_Optimizer_DevGuide.md). > diff --git a/inference-engine/samples/classification_sample_async/README.md b/inference-engine/samples/classification_sample_async/README.md index 5d9abb063350f8..32df39493566ed 100644 --- a/inference-engine/samples/classification_sample_async/README.md +++ b/inference-engine/samples/classification_sample_async/README.md @@ -49,7 +49,7 @@ Options: Running the application with the empty list of options yields the usage message given above and an error message. -To run the sample, use AlexNet and GoogLeNet or other public or pre-trained image classification models. To download the pre-trained models, use the OpenVINO [Model Downloader](@ref omz_tools_downloader_README) or go to [https://download.01.org/opencv/](https://download.01.org/opencv/). +To run the sample, use AlexNet and GoogLeNet or other public or pre-trained image classification models. You can download [public](@ref omz_models_public_index) or [Intel's](@ref omz_models_intel_index) pre-trained models using the [Model Downloader](@ref omz_tools_downloader_README). > **NOTE**: Before running the sample with a trained model, make sure the model is converted to the Inference Engine format (\*.xml + \*.bin) using the [Model Optimizer tool](../../../docs/MO_DG/Deep_Learning_Model_Optimizer_DevGuide.md). > diff --git a/inference-engine/samples/hello_classification/README.md b/inference-engine/samples/hello_classification/README.md index f390cf9a874b8a..1244e68343a770 100644 --- a/inference-engine/samples/hello_classification/README.md +++ b/inference-engine/samples/hello_classification/README.md @@ -19,7 +19,7 @@ Refer to [Integrate the Inference Engine New Request API with Your Application]( ## Running -To run the sample, you can use public or pre-trained models. To download the pre-trained models, use the OpenVINO [Model Downloader](@ref omz_tools_downloader_README) or go to [https://download.01.org/opencv/](https://download.01.org/opencv/). +To run the sample, you can use [public](@ref omz_models_public_index) or [Intel's](@ref omz_models_intel_index) pre-trained models from the Open Model Zoo. The models can be downloaded using the [Model Downloader](@ref omz_tools_downloader_README). > **NOTE**: Before running the sample with a trained model, make sure the model is converted to the Inference Engine format (\*.xml + \*.bin) using the [Model Optimizer tool](../../../docs/MO_DG/Deep_Learning_Model_Optimizer_DevGuide.md). > diff --git a/inference-engine/samples/hello_nv12_input_classification/README.md b/inference-engine/samples/hello_nv12_input_classification/README.md index b80a5a86a59121..5d781bc66923c7 100644 --- a/inference-engine/samples/hello_nv12_input_classification/README.md +++ b/inference-engine/samples/hello_nv12_input_classification/README.md @@ -35,9 +35,7 @@ ffmpeg -i cat.jpg -pix_fmt nv12 cat.yuv ## Running -To run the sample, you can use public or pre-trained models. To download pre-trained models, use -the OpenVINO™ [Model Downloader](@ref omz_tools_downloader_README) -or go to [https://download.01.org/opencv/](https://download.01.org/opencv/). +To run the sample, you can use [public](@ref omz_models_public_index) or [Intel's](@ref omz_models_intel_index) pre-trained models from the Open Model Zoo. The models can be downloaded using the [Model Downloader](@ref omz_tools_downloader_README). > **NOTE**: Before running the sample with a trained model, make sure the model is converted to the > Inference Engine format (\*.xml + \*.bin) using the [Model Optimizer tool](../../../docs/MO_DG/Deep_Learning_Model_Optimizer_DevGuide.md). diff --git a/inference-engine/samples/hello_reshape_ssd/README.md b/inference-engine/samples/hello_reshape_ssd/README.md index ae14ddcc5a92c2..4392a3eafcf369 100644 --- a/inference-engine/samples/hello_reshape_ssd/README.md +++ b/inference-engine/samples/hello_reshape_ssd/README.md @@ -7,7 +7,7 @@ networks like SSD-VGG. The sample shows how to use [Shape Inference feature](../ ## Running -To run the sample, you can use public or pre-trained models. To download the pre-trained models, use the OpenVINO [Model Downloader](@ref omz_tools_downloader_README) or go to [https://download.01.org/opencv/](https://download.01.org/opencv/). +To run the sample, you can use [public](@ref omz_models_public_index) or [Intel's](@ref omz_models_intel_index) pre-trained models from the Open Model Zoo. The models can be downloaded using the [Model Downloader](@ref omz_tools_downloader_README). > **NOTE**: Before running the sample with a trained model, make sure the model is converted to the Inference Engine format (\*.xml + \*.bin) using the [Model Optimizer tool](../../../docs/MO_DG/Deep_Learning_Model_Optimizer_DevGuide.md). > diff --git a/inference-engine/samples/object_detection_sample_ssd/README.md b/inference-engine/samples/object_detection_sample_ssd/README.md index b0a4f4e84652f9..46849d90bfecc2 100644 --- a/inference-engine/samples/object_detection_sample_ssd/README.md +++ b/inference-engine/samples/object_detection_sample_ssd/README.md @@ -36,7 +36,7 @@ Options: Running the application with the empty list of options yields the usage message given above and an error message. -To run the sample, you can use public or pre-trained models. To download the pre-trained models, use the OpenVINO [Model Downloader](@ref omz_tools_downloader_README) or go to [https://download.01.org/opencv/](https://download.01.org/opencv/). +To run the sample, you can use [public](@ref omz_models_public_index) or [Intel's](@ref omz_models_intel_index) pre-trained models from the Open Model Zoo. The models can be downloaded using the [Model Downloader](@ref omz_tools_downloader_README). > **NOTE**: Before running the sample with a trained model, make sure the model is converted to the Inference Engine format (\*.xml + \*.bin) using the [Model Optimizer tool](../../../docs/MO_DG/Deep_Learning_Model_Optimizer_DevGuide.md). > diff --git a/inference-engine/samples/speech_sample/main.cpp b/inference-engine/samples/speech_sample/main.cpp index b2932ece759010..dced4a7f9abbaa 100644 --- a/inference-engine/samples/speech_sample/main.cpp +++ b/inference-engine/samples/speech_sample/main.cpp @@ -269,7 +269,7 @@ float StdDevRelError(score_error_t error) { } #if !defined(__arm__) && !defined(_M_ARM) && !defined(__aarch64__) && !defined(_M_ARM64) -#if defined(_WIN32) || defined(WIN32) +#ifdef _WIN32 #include #include #else @@ -281,7 +281,7 @@ float StdDevRelError(score_error_t error) { inline void native_cpuid(unsigned int *eax, unsigned int *ebx, unsigned int *ecx, unsigned int *edx) { size_t level = *eax; -#if defined(_WIN32) || defined(WIN32) +#ifdef _WIN32 int regs[4] = {static_cast(*eax), static_cast(*ebx), static_cast(*ecx), static_cast(*edx)}; __cpuid(regs, level); *eax = static_cast(regs[0]); diff --git a/inference-engine/src/cldnn_engine/cldnn_custom_layer.cpp b/inference-engine/src/cldnn_engine/cldnn_custom_layer.cpp index 09032abd9148e8..523187dc64b4ce 100644 --- a/inference-engine/src/cldnn_engine/cldnn_custom_layer.cpp +++ b/inference-engine/src/cldnn_engine/cldnn_custom_layer.cpp @@ -53,7 +53,7 @@ void CLDNNCustomLayer::ProcessKernelNode(const pugi::xml_node & node) { CheckAndReturnError(m_kernelEntry.length() == 0, "No Kernel entry in layer: " << GetStrAttr(node.parent(), "name")); // Handle Source nodes - for (auto sourceNode = node.child("Source"); !sourceNode.empty(); sourceNode = sourceNode.next_sibling("Source")) { + FOREACH_CHILD(sourceNode, node, "Source") { // open file std::string filename = m_configDir + "/" + GetStrAttr(sourceNode, "filename", ""); std::ifstream inputFile(filename); @@ -74,7 +74,7 @@ void CLDNNCustomLayer::ProcessKernelNode(const pugi::xml_node & node) { } // Handle Define nodes - for (auto defineNode = node.child("Define"); !defineNode.empty(); defineNode = defineNode.next_sibling("Define")) { + FOREACH_CHILD(defineNode, node, "Define") { KernelDefine kd; kd.name = GetStrAttr(defineNode, "name", ""); CheckAndReturnError((kd.name.length() == 0), "Missing name for define node"); @@ -91,7 +91,7 @@ void CLDNNCustomLayer::ProcessKernelNode(const pugi::xml_node & node) { void CLDNNCustomLayer::ProcessBuffersNode(const pugi::xml_node & node) { CheckNodeTypeAndReturnError(node, "Buffers"); - for (auto tensorNode = node.child("Tensor"); !tensorNode.empty(); tensorNode = tensorNode.next_sibling("Tensor")) { + FOREACH_CHILD(tensorNode, node, "Tensor") { KerenlParam kp; kp.format = FormatFromString(GetStrAttr(tensorNode, "format", "BFYX")); CheckAndReturnError(kp.format == cldnn::format::format_num, "Tensor node has an invalid format: " << GetStrAttr(tensorNode, "format")); @@ -109,7 +109,7 @@ void CLDNNCustomLayer::ProcessBuffersNode(const pugi::xml_node & node) { } m_kernelParams.push_back(kp); } - for (auto dataNode = node.child("Data"); !dataNode.empty(); dataNode = dataNode.next_sibling("Data")) { + FOREACH_CHILD(dataNode, node, "Data") { KerenlParam kp; kp.type = ParamType::Data; kp.paramIndex = GetIntAttr(dataNode, "arg-index", -1); diff --git a/inference-engine/src/cldnn_engine/cldnn_engine.cpp b/inference-engine/src/cldnn_engine/cldnn_engine.cpp index c39cdc20433f45..f82e866630ddb8 100644 --- a/inference-engine/src/cldnn_engine/cldnn_engine.cpp +++ b/inference-engine/src/cldnn_engine/cldnn_engine.cpp @@ -1,4 +1,4 @@ -// Copyright (C) 2018-2020 Intel Corporation +// Copyright (C) 2018-2021 Intel Corporation // SPDX-License-Identifier: Apache-2.0 // @@ -30,6 +30,7 @@ #include #include +#include "transformations/common_optimizations/convert_quantize_dequantize.hpp" #include #include #include @@ -47,6 +48,7 @@ #include #include #include +#include #include #include #include @@ -60,9 +62,11 @@ #include #include +#include #include #include #include +#include #include "cldnn_engine.h" #include "cldnn_executable_network.h" @@ -135,8 +139,15 @@ InferenceEngine::CNNNetwork clDNNEngine::CloneAndTransformNetwork(const Inferenc // Disable shape inference (WA for generic operations) ngraph::op::GenericIE::DisableReshape noReshape(nGraphFunc); + bool enableInt8; { ngraph::pass::Manager manager; + enableInt8 = config.enableInt8 && ngraph::pass::low_precision::LowPrecisionTransformer::isFunctionQuantized(nGraphFunc); + if (enableInt8) { + manager.register_pass( + std::vector{ ngraph::element::i8, ngraph::element::u8 }); + } + manager.register_pass(); manager.register_pass(); manager.register_pass(); @@ -271,10 +282,19 @@ InferenceEngine::CNNNetwork clDNNEngine::CloneAndTransformNetwork(const Inferenc pass_config->enable(); + if (enableInt8) { + pass_config->set_callback([](const_node_ptr &node) -> bool { + return ngraph::pass::low_precision::NetworkHelper::areQuantizeAndDequantizeSupportedForMultiply(node); + }); + + pass_config->set_callback([](const_node_ptr &node) -> bool { + return ngraph::pass::low_precision::NetworkHelper::areQuantizeAndDequantizeSupportedForSubtract(node); + }); + } + manager.run_passes(nGraphFunc); } - bool enableInt8 = config.enableInt8 && ngraph::pass::low_precision::LowPrecisionTransformer::isFunctionQuantized(nGraphFunc); if (enableInt8) { OV_ITT_SCOPED_TASK(itt::domains::CLDNNPlugin, "clDNNEngine::TransformNetwork::LPT"); using namespace ngraph::pass::low_precision; @@ -445,11 +465,11 @@ RemoteContext::Ptr clDNNEngine::CreateContext(const ParamMap& params) { auto context = std::make_shared(shared_from_this(), params, _impl->m_config); return std::dynamic_pointer_cast(context); } else if (GPU_PARAM_VALUE(VA_SHARED) == contextTypeStr) { - #ifdef WIN32 +#ifdef _WIN32 auto context = std::make_shared(shared_from_this(), params, _impl->m_config); - #else +#else auto context = std::make_shared(shared_from_this(), params, _impl->m_config); - #endif +#endif return std::dynamic_pointer_cast(context); } else { THROW_IE_EXCEPTION << "Invalid remote context type" << contextTypeStr; diff --git a/inference-engine/src/cldnn_engine/cldnn_graph.cpp b/inference-engine/src/cldnn_engine/cldnn_graph.cpp index bff05e90077ff3..340853cf44e5d0 100644 --- a/inference-engine/src/cldnn_engine/cldnn_graph.cpp +++ b/inference-engine/src/cldnn_engine/cldnn_graph.cpp @@ -591,8 +591,9 @@ void CLDNNGraph::UpdateImplementationsMap() { } } -void CLDNNGraph::GetPerformanceCounts(std::map &result) const { +std::map CLDNNGraph::GetPerformanceCounts() const { OV_ITT_SCOPED_TASK(itt::domains::CLDNNPlugin, "CLDNNGraph::GetPerformanceCounts"); + std::map result; bool combinePrimByIRLayers = false; unsigned i = 0; auto allIds = GetNetwork()->get_all_primitive_org_ids(); @@ -738,6 +739,7 @@ void CLDNNGraph::GetPerformanceCounts(std::map CLDNNGraph::GetNetwork(size_t idx) const { diff --git a/inference-engine/src/cldnn_engine/cldnn_graph.h b/inference-engine/src/cldnn_engine/cldnn_graph.h index 40dc4bab68b971..45a4510843defc 100644 --- a/inference-engine/src/cldnn_engine/cldnn_graph.h +++ b/inference-engine/src/cldnn_engine/cldnn_graph.h @@ -38,7 +38,7 @@ class CLDNNGraph { bool IsLoaded() const; - void GetPerformanceCounts(std::map& perfMap) const; + std::map GetPerformanceCounts() const; void UpdatePerfStatistics(); const Config& getConfig() const { return m_config; } diff --git a/inference-engine/src/cldnn_engine/cldnn_infer_request.cpp b/inference-engine/src/cldnn_engine/cldnn_infer_request.cpp index 0ce5dfbcb47b3f..2e7b1b00d9a1dd 100644 --- a/inference-engine/src/cldnn_engine/cldnn_infer_request.cpp +++ b/inference-engine/src/cldnn_engine/cldnn_infer_request.cpp @@ -421,8 +421,9 @@ void CLDNNInferRequest::checkBlobs() { } } -void CLDNNInferRequest::GetBlob(const char *name, Blob::Ptr &data) { +Blob::Ptr CLDNNInferRequest::GetBlob(const std::string& name) { OV_ITT_SCOPED_TASK(itt::domains::CLDNNPlugin, "CLDNNInferRequest::GetBlob"); + Blob::Ptr data; InputInfo::Ptr foundInput; DataPtr foundOutput; bool is_input = findInputAndOutputBlobByName(name, foundInput, foundOutput); @@ -440,13 +441,14 @@ void CLDNNInferRequest::GetBlob(const char *name, Blob::Ptr &data) { data = _outputs[name]; checkOutputBlob(data, name, foundOutput); } + return data; } -void CLDNNInferRequest::SetBlob(const char *name, const Blob::Ptr &data) { +void CLDNNInferRequest::SetBlob(const std::string& name, const Blob::Ptr &data) { OV_ITT_SCOPED_TASK(itt::domains::CLDNNPlugin, "CLDNNInferRequest::SetBlob"); // perform all common checks first - if (name == nullptr) { + if (name.empty()) { THROW_IE_EXCEPTION << NOT_FOUND_str + "Failed to set blob with empty name"; } if (!data) @@ -884,13 +886,12 @@ void CLDNNInferRequest::InferImpl() { } } -void CLDNNInferRequest::GetPerformanceCounts( - std::map &perfMap) const { +std::map CLDNNInferRequest::GetPerformanceCounts() const { OV_ITT_SCOPED_TASK(itt::domains::CLDNNPlugin, "CLDNNInferRequest::GetPerformanceCounts"); if (!m_useProfiling) { THROW_IE_EXCEPTION << "Performance counters were not enabled"; } else { - m_graph->GetPerformanceCounts(perfMap); + return m_graph->GetPerformanceCounts(); } } diff --git a/inference-engine/src/cldnn_engine/cldnn_infer_request.h b/inference-engine/src/cldnn_engine/cldnn_infer_request.h index 739be8594dd48f..a557c9b94af303 100644 --- a/inference-engine/src/cldnn_engine/cldnn_infer_request.h +++ b/inference-engine/src/cldnn_engine/cldnn_infer_request.h @@ -29,7 +29,7 @@ class CLDNNInferRequest : public InferenceEngine::InferRequestInternal { void checkBlobs() override; void InferImpl() override; - void GetPerformanceCounts(std::map &perfMap) const override; + std::map GetPerformanceCounts() const override; CLDNNInferRequest(InferenceEngine::InputsDataMap networkInputs, InferenceEngine::OutputsDataMap networkOutputs, const std::shared_ptr& execNetwork); @@ -38,8 +38,8 @@ class CLDNNInferRequest : public InferenceEngine::InferRequestInternal { virtual ~CLDNNInferRequest() = default; - void GetBlob(const char *name, InferenceEngine::Blob::Ptr &data) override; - void SetBlob(const char *name, const InferenceEngine::Blob::Ptr &data) override; + InferenceEngine::Blob::Ptr GetBlob(const std::string& name) override; + void SetBlob(const std::string& name, const InferenceEngine::Blob::Ptr &data) override; void SetBatch(int batch = -1) override; void SetGraph(std::shared_ptr graph); diff --git a/inference-engine/src/cldnn_engine/cldnn_primitives_list.hpp b/inference-engine/src/cldnn_engine/cldnn_primitives_list.hpp index 8ce7c0ebe86534..c3fb7cad324eb0 100644 --- a/inference-engine/src/cldnn_engine/cldnn_primitives_list.hpp +++ b/inference-engine/src/cldnn_engine/cldnn_primitives_list.hpp @@ -1,4 +1,4 @@ -// Copyright (C) 2020 Intel Corporation +// Copyright (C) 2021 Intel Corporation // SPDX-License-Identifier: Apache-2.0 // @@ -156,18 +156,17 @@ REGISTER_FACTORY(v3, EmbeddingBagOffsetsSum); REGISTER_FACTORY(v3, EmbeddingBagPackedSum); REGISTER_FACTORY(v3, EmbeddingSegmentsSum); REGISTER_FACTORY(v3, ExtractImagePatches); +REGISTER_FACTORY(v3, ScatterUpdate); +REGISTER_FACTORY(v3, ScatterElementsUpdate); // REGISTER_FACTORY(v3, NonMaxSuppression); Supported via v3 -> v5 internal conversion // ----------------------------- Unsupported v3 ops ----------------------------- // -// REGISTER_FACTORY(v3, ScatterUpdate); // There is the scatter_update primitive, but seems like it produces wrong results // REGISTER_FACTORY(v3, Assign); // REGISTER_FACTORY(v3, Bucketize); // REGISTER_FACTORY(v3, GRUCell); // REGISTER_FACTORY(v3, NonZero); // REGISTER_FACTORY(v3, ROIAlign); // REGISTER_FACTORY(v3, ReadValue); -// REGISTER_FACTORY(v3, ScatterElementsUpdate); -// REGISTER_FACTORY(v3, ScatterUpdate); // REGISTER_FACTORY(v3, ScatterNDUpdate); // REGISTER_FACTORY(v3, ShapeOf); // REGISTER_FACTORY(v3, TopK); diff --git a/inference-engine/src/cldnn_engine/cldnn_remote_context.cpp b/inference-engine/src/cldnn_engine/cldnn_remote_context.cpp index 66d0949fb81940..6cc88ee77df85f 100644 --- a/inference-engine/src/cldnn_engine/cldnn_remote_context.cpp +++ b/inference-engine/src/cldnn_engine/cldnn_remote_context.cpp @@ -36,7 +36,7 @@ ParamMap CLDNNRemoteBlobImpl::getParams() const { { GPU_PARAM_KEY(OCL_CONTEXT), params.context }, { GPU_PARAM_KEY(MEM_HANDLE), params.mem } }; -#ifdef WIN32 +#ifdef _WIN32 case BT_DX_BUF_SHARED: return{ { GPU_PARAM_KEY(SHARED_MEM_TYPE), GPU_PARAM_VALUE(DX_BUFFER) }, @@ -94,7 +94,7 @@ void CLDNNRemoteBlobImpl::allocate_if_needed() { case BlobType::BT_BUF_SHARED: m_memObject = std::unique_ptr(new cldnn::memory(cldnn::memory::share_buffer(*eng, m_layout, m_mem))); break; -#ifdef WIN32 +#ifdef _WIN32 case BlobType::BT_SURF_SHARED: m_memObject = std::unique_ptr(new cldnn::memory(cldnn::memory::share_surface(*eng, m_layout, m_mem, m_plane))); break; @@ -130,7 +130,7 @@ void CLDNNRemoteBlobImpl::allocate() noexcept { case BlobType::BT_BUF_SHARED: m_memObject = std::unique_ptr(new cldnn::memory(cldnn::memory::share_buffer(*eng, m_layout, m_mem))); break; -#ifdef WIN32 +#ifdef _WIN32 case BlobType::BT_SURF_SHARED: m_memObject = std::unique_ptr(new cldnn::memory(cldnn::memory::share_surface(*eng, m_layout, m_mem, m_plane))); break; diff --git a/inference-engine/src/cldnn_engine/cldnn_remote_context.h b/inference-engine/src/cldnn_engine/cldnn_remote_context.h index 0b52527a244829..a539e2c6b50a84 100644 --- a/inference-engine/src/cldnn_engine/cldnn_remote_context.h +++ b/inference-engine/src/cldnn_engine/cldnn_remote_context.h @@ -19,10 +19,10 @@ # define NOMINMAX #endif -#ifdef WIN32 -#include +#ifdef _WIN32 +# include #else -#include +# include #endif namespace CLDNNPlugin { @@ -122,7 +122,7 @@ class typedCLDNNRemoteBlob : public TpublicAPI { using CLDNNRemoteCLbuffer = typedCLDNNRemoteBlob; using CLDNNRemoteCLImage2D = typedCLDNNRemoteBlob; -#ifdef WIN32 +#ifdef _WIN32 using CLDNNRemoteD3DBuffer = typedCLDNNRemoteBlob; using CLDNNRemoteD3DSurface = typedCLDNNRemoteBlob; #else @@ -130,7 +130,7 @@ using CLDNNRemoteVASurface = typedCLDNNRemoteBlobas(); if (ptr) return ptr->getImpl(); @@ -257,7 +257,7 @@ class typedCLDNNExecutionContext : public TpublicContextAPI, } }; -#ifdef WIN32 +#ifdef _WIN32 using surf_key = _Key; #else using surf_key = _Key; @@ -270,7 +270,7 @@ class typedCLDNNExecutionContext : public TpublicContextAPI, using InferenceEngine::gpu::details::param_map_obj_getter; InferenceEngine::RemoteBlob::Ptr ret = nullptr; uint32_t plane = param_map_obj_getter::_ObjFromParamSimple(params, GPU_PARAM_KEY(VA_PLANE)); -#ifdef WIN32 +#ifdef _WIN32 cldnn::shared_handle mem = param_map_obj_getter::_ObjFromParamSimple(params, GPU_PARAM_KEY(DEV_OBJECT_HANDLE)); surf_key skey(mem, plane); #else @@ -291,7 +291,7 @@ class typedCLDNNExecutionContext : public TpublicContextAPI, auto smart_this = std::dynamic_pointer_cast (std::enable_shared_from_this>::shared_from_this()); -#ifdef WIN32 +#ifdef _WIN32 ret = std::make_shared(smart_this, tensorDesc, layout, mem, 0, plane, CLDNNRemoteBlobImpl::BlobType::BT_SURF_SHARED); @@ -335,7 +335,7 @@ class typedCLDNNExecutionContext : public TpublicContextAPI, layout.format = ImageFormatFromLayout(tensorDesc.getLayout()); ret = std::make_shared(smart_this, tensorDesc, layout, mem, 0, 0, blob_type); break; -#ifdef WIN32 +#ifdef _WIN32 case CLDNNRemoteBlobImpl::BlobType::BT_DX_BUF_SHARED: ret = std::make_shared(smart_this, tensorDesc, layout, mem, 0, 0, blob_type); break; @@ -402,7 +402,7 @@ class typedCLDNNExecutionContext : public TpublicContextAPI, } else if (GPU_PARAM_VALUE(OCL_IMAGE2D) == memTypeStr) { blob_type = CLDNNRemoteBlobImpl::BlobType::BT_IMG_SHARED; mem = param_map_obj_getter::_ObjFromParamSimple(params, GPU_PARAM_KEY(MEM_HANDLE)); -#ifdef WIN32 +#ifdef _WIN32 } else if (GPU_PARAM_VALUE(DX_BUFFER) == memTypeStr) { blob_type = CLDNNRemoteBlobImpl::BlobType::BT_DX_BUF_SHARED; mem = param_map_obj_getter::_ObjFromParamSimple(params, GPU_PARAM_KEY(DEV_OBJECT_HANDLE)); @@ -426,14 +426,14 @@ class typedCLDNNExecutionContext : public TpublicContextAPI, }; using CLDNNRemoteCLContext = typedCLDNNExecutionContext; -#ifdef WIN32 +#ifdef _WIN32 using CLDNNRemoteD3DContext = typedCLDNNExecutionContext; #else using CLDNNRemoteVAContext = typedCLDNNExecutionContext; #endif inline CLDNNExecutionContextImpl* getContextImpl(InferenceEngine::gpu::ClContext::Ptr ctxPtr) { -#ifdef WIN32 +#ifdef _WIN32 { auto ptr = ctxPtr->as(); if (ptr) return ptr->getImpl(); diff --git a/inference-engine/src/cldnn_engine/ops/result.cpp b/inference-engine/src/cldnn_engine/ops/result.cpp index 56ad5e9f5c017a..536caf22eb7555 100644 --- a/inference-engine/src/cldnn_engine/ops/result.cpp +++ b/inference-engine/src/cldnn_engine/ops/result.cpp @@ -18,7 +18,9 @@ void CreateResultOp(Program& p, const std::shared_ptr& o p.ValidateInputs(op, {1}); auto prev = op->get_input_node_shared_ptr(0); + NGRAPH_SUPPRESS_DEPRECATED_START auto inputID = op->get_input_source_output(0).get_tensor().get_name(); + NGRAPH_SUPPRESS_DEPRECATED_END if (inputID.empty()) { inputID = prev->get_friendly_name(); if (prev->get_output_size() > 1) { diff --git a/inference-engine/src/cldnn_engine/ops/scatter_elements_update.cpp b/inference-engine/src/cldnn_engine/ops/scatter_elements_update.cpp new file mode 100644 index 00000000000000..5c9002c64aaa44 --- /dev/null +++ b/inference-engine/src/cldnn_engine/ops/scatter_elements_update.cpp @@ -0,0 +1,68 @@ +// Copyright (C) 2021 Intel Corporation +// SPDX-License-Identifier: Apache-2.0 +// + +#include "cldnn_program.h" +#include "cldnn_common_utils.h" + +#include "ngraph/op/scatter_elements_update.hpp" +#include "ngraph/op/constant.hpp" + +#include "api/scatter_elements_update.hpp" + +namespace CLDNNPlugin { + +static inline cldnn::scatter_elements_update::scatter_elements_update_axis GetScatterElementsUpdateAxis(int axis, unsigned rank) { + if (axis < 0) + axis += rank; + if (axis < 0 || axis >= rank) + THROW_IE_EXCEPTION << "ScatterElementsUpdate axis is not correspond to number of dimensions"; + + // Difference in dimension ordering between IE and clDNN, + // reverse spatial dimensions after batch and feature. + unsigned cldnn_axis = axis; + if (axis >= 2) { + auto spatial_axis = axis - 2; + // Default and minimum number of dimensions is 4 + auto spatial_size = std::max(rank, 4u) - 2; + cldnn_axis = spatial_size - spatial_axis - 1 + 2; + } + + switch (cldnn_axis) { + case 0: return cldnn::scatter_elements_update::scatter_elements_update_axis::along_b; + case 1: return cldnn::scatter_elements_update::scatter_elements_update_axis::along_f; + case 2: return cldnn::scatter_elements_update::scatter_elements_update_axis::along_x; + case 3: return cldnn::scatter_elements_update::scatter_elements_update_axis::along_y; + case 4: return cldnn::scatter_elements_update::scatter_elements_update_axis::along_z; + case 5: return cldnn::scatter_elements_update::scatter_elements_update_axis::along_w; + default: THROW_IE_EXCEPTION << "Unsupported ScatterElementsUpdate axis: " << axis; + } + + return cldnn::scatter_elements_update::scatter_elements_update_axis::along_f; // shouldn't get here +} + +void CreateScatterElementsUpdateOp(Program& p, const std::shared_ptr& op) { + p.ValidateInputs(op, {4}); + auto inputPrimitives = p.GetInputPrimitiveIDs(op); + std::string layerName = layer_type_name_ID(op); + + size_t rank = op->get_input_shape(0).size(); + auto axes_constant = std::dynamic_pointer_cast(op->get_input_node_shared_ptr(3)); + if (!axes_constant) { + THROW_IE_EXCEPTION << "Unsupported parameter nodes type in " << op->get_friendly_name() << " (" << op->get_type_name() << ")"; + } + int32_t axis = axes_constant->cast_vector()[0]; + + auto primitive = cldnn::scatter_elements_update(layerName, + inputPrimitives[0], + inputPrimitives[1], + inputPrimitives[2], + GetScatterElementsUpdateAxis(axis, rank)); + + p.AddPrimitive(primitive); + p.AddPrimitiveToProfiler(op); +} + +REGISTER_FACTORY_IMPL(v3, ScatterElementsUpdate); + +} // namespace CLDNNPlugin diff --git a/inference-engine/src/cldnn_engine/ops/split.cpp b/inference-engine/src/cldnn_engine/ops/split.cpp index 65cbf59873b831..3639a3c583a2e5 100644 --- a/inference-engine/src/cldnn_engine/ops/split.cpp +++ b/inference-engine/src/cldnn_engine/ops/split.cpp @@ -24,6 +24,7 @@ void CreateCommonSplitOp(Program& p, const std::shared_ptr& op) { for (size_t i = 0; i < op->get_output_size(); i++) { std::string outLayerName = layerName + (is_single_out_split ? "" : "." + std::to_string(i)); const auto outLayerDims = op->get_output_shape(i); + NGRAPH_SUPPRESS_DEPRECATED_START if (outLayerDims.size() != startOffset.size()) { THROW_IE_EXCEPTION << "Invalid dimesions in split layer: " << op->get_friendly_name() << " output: " << op->get_output_tensor_name(i); @@ -34,6 +35,7 @@ void CreateCommonSplitOp(Program& p, const std::shared_ptr& op) { << " output: " << op->get_output_tensor_name(i); } } + NGRAPH_SUPPRESS_DEPRECATED_END auto outTensor = CldnnTensorFromIEDims(outLayerDims, 1); auto offsetTensor = CldnnTensorFromIEDims(startOffset, 0); diff --git a/inference-engine/src/gna_plugin/gna_infer_request.hpp b/inference-engine/src/gna_plugin/gna_infer_request.hpp index 52b9c0fdd24e49..9b04e759d813d4 100644 --- a/inference-engine/src/gna_plugin/gna_infer_request.hpp +++ b/inference-engine/src/gna_plugin/gna_infer_request.hpp @@ -67,9 +67,8 @@ class GNAInferRequest : public InferenceEngine::AsyncInferRequestInternal { * Note: not all plugins may provide meaningful data * @param perfMap - a map of layer names to profiling information for that layer. */ - void GetPerformanceCounts(std::map &perfMap) const override { - plg->GetPerformanceCounts(perfMap); + std::map GetPerformanceCounts() const override { + return plg->GetPerformanceCounts(); } /** diff --git a/inference-engine/src/gna_plugin/gna_plugin.cpp b/inference-engine/src/gna_plugin/gna_plugin.cpp index e500722d5e6669..c8e3c225cfb8dd 100644 --- a/inference-engine/src/gna_plugin/gna_plugin.cpp +++ b/inference-engine/src/gna_plugin/gna_plugin.cpp @@ -1451,9 +1451,13 @@ void GNAPlugin::Export(const std::string &fileName) { serial.Export(gnamem->getBasePtr(), gnamem->getTotalBytes(), outStream); } -void GNAPlugin::GetPerformanceCounts(std::map &perfMap) { +std::map GNAPlugin::GetPerformanceCounts() { if (gnaFlags->performance_counting) { + std::map perfMap; gnadevice->getGnaPerfCounters(perfMap); + return perfMap; + } else { + return {}; } } diff --git a/inference-engine/src/gna_plugin/gna_plugin.hpp b/inference-engine/src/gna_plugin/gna_plugin.hpp index 6c5acd33eaef49..6441e8e5f5e762 100644 --- a/inference-engine/src/gna_plugin/gna_plugin.hpp +++ b/inference-engine/src/gna_plugin/gna_plugin.hpp @@ -102,7 +102,7 @@ class GNAPlugin : public InferenceEngine::IInferencePlugin { void LoadNetwork(InferenceEngine::CNNNetwork &network); bool Infer(const InferenceEngine::BlobMap &input, InferenceEngine::BlobMap &result); - void GetPerformanceCounts(std::map &perfMap); + std::map GetPerformanceCounts(); void AddExtension(InferenceEngine::IExtensionPtr extension) override; void SetConfig(const std::map &config) override; diff --git a/inference-engine/src/hetero_plugin/hetero_executable_network.cpp b/inference-engine/src/hetero_plugin/hetero_executable_network.cpp index 7c5555e4cb78db..6241118511a7ce 100644 --- a/inference-engine/src/hetero_plugin/hetero_executable_network.cpp +++ b/inference-engine/src/hetero_plugin/hetero_executable_network.cpp @@ -383,6 +383,7 @@ HeteroExecutableNetwork::HeteroExecutableNetwork(const InferenceEngine::CNNNetwo if (itClonedInput != clonedInputs.end() && nullptr != itClonedInput->second) { itClonedInput->second->getPreProcess() = externalInput.second->getPreProcess(); itClonedInput->second->setPrecision(externalInput.second->getPrecision()); + itClonedInput->second->setLayout(externalInput.second->getLayout()); } } isInputSubnetwork[id] = std::any_of(std::begin(subgraph._parameters), @@ -440,28 +441,24 @@ HeteroExecutableNetwork::HeteroExecutableNetwork(std::istream& std::unordered_set networkInputs; pugi::xml_node inputsNode = heteroNode.child("inputs"); - for (auto inputNode = inputsNode.child("input"); !inputNode.empty(); - inputNode = inputNode.next_sibling("input")) { + FOREACH_CHILD(inputNode, inputsNode, "input") { networkInputs.insert(GetStrAttr(inputNode, "name")); } std::unordered_set networkOutputs; pugi::xml_node outputsNode = heteroNode.child("outputs"); - for (auto outputNode = outputsNode.child("output"); !outputNode.empty(); - outputNode = outputNode.next_sibling("output")) { + FOREACH_CHILD(outputNode, outputsNode, "output") { networkOutputs.insert(GetStrAttr(outputNode, "name")); } Engine::Configs importedConfigs; auto configsNode = heteroNode.child("configs"); - for (auto configNode = configsNode.child("config"); !configNode.empty(); - configNode = configNode.next_sibling("config")) { + FOREACH_CHILD(configNode, configsNode, "config") { importedConfigs.emplace(GetStrAttr(configNode, "key"), GetStrAttr(configNode, "value")); } auto blobNamesNode = heteroNode.child("blob_names_map"); - for (auto blobNameNode = blobNamesNode.child("blob_name_map"); !blobNameNode.empty(); - blobNameNode = blobNameNode.next_sibling("blob_name_map")) { + FOREACH_CHILD(blobNameNode, blobNamesNode, "blob_name_map") { _blobNameMap.emplace(GetStrAttr(blobNameNode, "key"), GetStrAttr(blobNameNode, "value")); } @@ -471,8 +468,7 @@ HeteroExecutableNetwork::HeteroExecutableNetwork(std::istream& std::vector descs; pugi::xml_node subnetworksNode = heteroNode.child("subnetworks"); - for (auto subnetworkNode = subnetworksNode.child("subnetwork"); !subnetworkNode.empty(); - subnetworkNode = subnetworkNode.next_sibling("subnetwork")) { + FOREACH_CHILD(subnetworkNode, subnetworksNode, "subnetwork") { auto deviceName = GetStrAttr(subnetworkNode, "device"); auto metaDevices = _heteroPlugin->GetDevicePlugins(deviceName, importedConfigs); @@ -507,16 +503,14 @@ HeteroExecutableNetwork::HeteroExecutableNetwork(std::istream& cnnnetwork = _heteroPlugin->GetCore()->ReadNetwork(xmlString, std::move(dataBlob)); auto inputs = cnnnetwork.getInputsInfo(); auto inputsNode = subnetworkNode.child("inputs"); - for (auto inputNode = inputsNode.child("input"); !inputNode.empty(); - inputNode = inputNode.next_sibling("input")) { + FOREACH_CHILD(inputNode, inputsNode, "input") { auto inputName = GetStrAttr(inputNode, "name"); inputs[inputName]->setPrecision(Precision::FromStr(GetStrAttr(inputNode, "precision"))); } auto outputs = cnnnetwork.getOutputsInfo(); auto outputsNode = subnetworkNode.child("outputs"); - for (auto outputNode = outputsNode.child("output"); !outputNode.empty(); - outputNode = outputNode.next_sibling("output")) { + FOREACH_CHILD(outputNode, outputsNode, "output") { auto outputName = GetStrAttr(outputNode, "name"); outputs[outputName]->setPrecision(Precision::FromStr(GetStrAttr(outputNode, "precision"))); } diff --git a/inference-engine/src/hetero_plugin/hetero_infer_request.cpp b/inference-engine/src/hetero_plugin/hetero_infer_request.cpp index 94be11eb594ece..9d831f7703082f 100644 --- a/inference-engine/src/hetero_plugin/hetero_infer_request.cpp +++ b/inference-engine/src/hetero_plugin/hetero_infer_request.cpp @@ -64,7 +64,7 @@ HeteroInferRequest::HeteroInferRequest(InferenceEngine::InputsDataMap networkInp } } -void HeteroInferRequest::SetBlob(const char* name, const InferenceEngine::Blob::Ptr& data) { +void HeteroInferRequest::SetBlob(const std::string& name, const InferenceEngine::Blob::Ptr& data) { InferenceEngine::InferRequestInternal::SetBlob(name, data); assert(!_inferRequests.empty()); for (auto &&desc : _inferRequests) { @@ -95,14 +95,15 @@ void HeteroInferRequest::InferImpl() { } } -void HeteroInferRequest::GetPerformanceCounts(std::map &perfMap) const { - perfMap.clear(); +std::map HeteroInferRequest::GetPerformanceCounts() const { + std::map perfMap; for (size_t i = 0; i < _inferRequests.size(); i++) { auto perfMapRequest = _inferRequests[i]._request->GetPerformanceCounts(); for (auto &&r : perfMapRequest) { perfMap[std::string("subgraph") + std::to_string(i) + ": " + r.first] = r.second; } } + return perfMap; } void HeteroInferRequest::updateInOutIfNeeded() { diff --git a/inference-engine/src/hetero_plugin/hetero_infer_request.hpp b/inference-engine/src/hetero_plugin/hetero_infer_request.hpp index 069e36339151d2..aee93ced131e21 100644 --- a/inference-engine/src/hetero_plugin/hetero_infer_request.hpp +++ b/inference-engine/src/hetero_plugin/hetero_infer_request.hpp @@ -40,9 +40,9 @@ class HeteroInferRequest : public InferenceEngine::InferRequestInternal { void InferImpl() override; - void SetBlob(const char* name, const InferenceEngine::Blob::Ptr& data) override; + void SetBlob(const std::string& name, const InferenceEngine::Blob::Ptr& data) override; - void GetPerformanceCounts(std::map &perfMap) const override; + std::map GetPerformanceCounts() const override; void updateInOutIfNeeded(); diff --git a/inference-engine/src/inference_engine/cnn_network_ngraph_impl.cpp b/inference-engine/src/inference_engine/cnn_network_ngraph_impl.cpp index 2355a5674ab20c..02f8d2fc3fd8e8 100644 --- a/inference-engine/src/inference_engine/cnn_network_ngraph_impl.cpp +++ b/inference-engine/src/inference_engine/cnn_network_ngraph_impl.cpp @@ -1,4 +1,4 @@ -// Copyright (C) 2018-2020 Intel Corporation +// Copyright (C) 2018-2021 Intel Corporation // SPDX-License-Identifier: Apache-2.0 // @@ -65,7 +65,7 @@ void CNNNetworkNGraphImpl::createDataForResult(const ::ngraph::Output<::ngraph:: case 2: return l == Layout::CN || l == Layout::HW || l == Layout::NC; case 3: - return l == Layout::CHW; + return l == Layout::CHW || l == Layout::HWC; case 4: return l == Layout::NCHW || l == Layout::NHWC; case 5: @@ -122,6 +122,12 @@ CNNNetworkNGraphImpl::CNNNetworkNGraphImpl( std::string outName = layer->get_friendly_name(); IE_ASSERT(layer->get_output_size() == 1); // Parameter as only singly output port + // map original names to OpenVINO name + _opNames[outName] = outName; + for (const auto& name : layer->get_output_tensor(0).get_names()) { + _tensorNames[name] = outName; + } + DataPtr& ptr = _data[outName]; IE_ASSERT(ptr); // Data must be allocated after the reshape method @@ -139,7 +145,10 @@ CNNNetworkNGraphImpl::CNNNetworkNGraphImpl( } CNNNetworkNGraphImpl::CNNNetworkNGraphImpl(const CNNNetwork& network) { - if (network.getFunction() == nullptr) { + IE_SUPPRESS_DEPRECATED_START + const ICNNNetwork& iNetwork = network; + const auto net = dynamic_cast(&iNetwork); + if (network.getFunction() == nullptr || !net) { THROW_IE_EXCEPTION << "Cannot create CNNNetwork with nGraph from legacy network format!"; } @@ -147,6 +156,9 @@ CNNNetworkNGraphImpl::CNNNetworkNGraphImpl(const CNNNetwork& network) { InputsDataMap inputs = network.getInputsInfo(); OutputsDataMap outputs = network.getOutputsInfo(); + _opNames = net->_opNames; + _tensorNames = net->_tensorNames; + for (const auto& outputInfo : outputs) { const auto& name = outputInfo.second->getName(); DataPtr output = std::make_shared(name, outputInfo.second->getTensorDesc()); @@ -164,6 +176,7 @@ CNNNetworkNGraphImpl::CNNNetworkNGraphImpl(const CNNNetwork& network) { info->setLayout(inputInfo.second->getLayout()); _inputData[name] = info; } + IE_SUPPRESS_DEPRECATED_END } void CNNNetworkNGraphImpl::setInputInfo(InputInfo::Ptr data) { @@ -204,19 +217,22 @@ StatusCode CNNNetworkNGraphImpl::addOutput(const std::string& layerName, size_t try { for (const auto & layer : _ngraph_function->get_ops()) { - if (layer->get_friendly_name() == layerName) { + // Result can have the same name as previous operation + if (layer->get_friendly_name() == layerName && !std::dynamic_pointer_cast(layer)) { + std::string outputName = layerName; + if (layer->outputs().size() != 1) { + outputName += "." + std::to_string(outputIndex); + } + // Check that we don't have a result for the output port for (const auto& port : layer->output(outputIndex).get_target_inputs()) { if (dynamic_cast(port.get_node())) return OK; } auto result = make_shared<::ngraph::op::Result>(layer->output(outputIndex)); + result->set_friendly_name(outputName); _ngraph_function->add_results({result}); - std::string outputName = layerName; - if (layer->outputs().size() != 1) { - outputName += "." + std::to_string(outputIndex); - } if (_outputData.count(outputName) == 0) { reshape(); } @@ -237,6 +253,17 @@ void CNNNetworkNGraphImpl::addOutput(const ::ngraph::Output<::ngraph::Node> & ou createDataForResult(output, dataName, data); _data[dataName] = data; _outputData[dataName] = data; + + // Save original framework names + for (const auto& name : output.get_tensor().get_names()) { + _tensorNames[name] = dataName; + } + for (const auto consumerInput : output.get_target_inputs()) { + const auto &consumerLayer = consumerInput.get_node()->shared_from_this(); + if (std::dynamic_pointer_cast(consumerLayer)) { + _opNames[consumerLayer->get_friendly_name()] = dataName; + } + } } size_t CNNNetworkNGraphImpl::getBatchSize() const noexcept { @@ -306,6 +333,7 @@ CNNNetworkNGraphImpl::reshape(const std::map>& auto params = _ngraph_function->get_parameters(); + bool parameter_replaced = false; for (size_t i = 0; i < params.size(); i++) { const auto& param = params[i]; if (inputShapes.find(param->get_friendly_name()) == inputShapes.end()) @@ -314,23 +342,35 @@ CNNNetworkNGraphImpl::reshape(const std::map>& auto newParam = std::make_shared<::ngraph::op::Parameter>(param->get_element_type(), shape); newParam->set_friendly_name(param->get_friendly_name()); _ngraph_function->replace_parameter(i, newParam); + parameter_replaced = true; } - _ngraph_function->validate_nodes_and_infer_types(); + if (parameter_replaced) + _ngraph_function->validate_nodes_and_infer_types(); + + const auto& results = _ngraph_function->get_results(); + bool outputs_are_static = all_of( + begin(results), end(results), + [](const std::shared_ptr& n){ return n->get_output_partial_shape(0).is_static(); }); { - auto specialized_ngraph_function = cloneFunction(false); - { - OV_ITT_SCOPED_TASK(itt::domains::IE, "CNNNetworkNGraphImpl::ConvertToLegacy"); - ::ngraph::pass::Manager manager; - // resolves dynamism by replacing dynamic operation with static version - manager.register_pass<::ngraph::pass::ConvertNMS5ToLegacyMatcher>(false); - manager.register_pass<::ngraph::pass::ConstantFolding>(); - // OneHotToLegacy changes output precision - manager.register_pass<::ngraph::pass::ConvertOneHotToOneHotIEMatcher>()->detect_output_type( - specialized_ngraph_function); - manager.run_passes(specialized_ngraph_function); + shared_ptr specialized_ngraph_function = nullptr; + if (outputs_are_static) { + specialized_ngraph_function = _ngraph_function; + } else { + specialized_ngraph_function = cloneFunction(false); + { + OV_ITT_SCOPED_TASK(itt::domains::IE, "CNNNetworkNGraphImpl::ConvertToLegacy"); + ::ngraph::pass::Manager manager; + // resolves dynamism by replacing dynamic operation with static version + manager.register_pass<::ngraph::pass::ConvertNMS5ToLegacyMatcher>(false); + manager.register_pass<::ngraph::pass::ConstantFolding>(); + // OneHotToLegacy changes output precision + manager.register_pass<::ngraph::pass::ConvertOneHotToOneHotIEMatcher>()->detect_output_type( + specialized_ngraph_function); + manager.run_passes(specialized_ngraph_function); + } + specialized_ngraph_function->validate_nodes_and_infer_types(); } - specialized_ngraph_function->validate_nodes_and_infer_types(); #if 0 for (const auto &op : specialized_ngraph_function->get_ordered_ops()) { @@ -391,7 +431,7 @@ StatusCode CNNNetworkNGraphImpl::serialize(const std::string& xmlPath, ResponseDesc* resp) const noexcept { try { std::map custom_opsets; - for (auto extension : _ie_extensions) { + for (const auto& extension : _ie_extensions) { auto opset = extension->getOpSets(); custom_opsets.insert(begin(opset), end(opset)); } @@ -410,6 +450,20 @@ StatusCode CNNNetworkNGraphImpl::serialize(const std::string& xmlPath, return OK; } +StatusCode CNNNetworkNGraphImpl::getOVNameForTensor(std::string& ov_name, const std::string& orig_name, ResponseDesc* resp) const noexcept { + if (_tensorNames.find(orig_name) == _tensorNames.end()) + return DescriptionBuffer(NOT_FOUND, resp) << "Framework tensor with name \"" << orig_name << "\" was not mapped to OpenVINO data!"; + ov_name = _tensorNames.at(orig_name); + return OK; +} + +StatusCode CNNNetworkNGraphImpl::getOVNameForOperation(std::string& ov_name, const std::string& orig_name, ResponseDesc* resp) const noexcept { + if (_opNames.find(orig_name) == _opNames.end()) + return DescriptionBuffer(NOT_FOUND, resp) << "Framework operation with name \"" << orig_name << "\" was not mapped to OpenVINO data!"; + ov_name = _opNames.at(orig_name); + return OK; +} + StatusCode CNNNetworkNGraphImpl::setBatchSize(size_t size, ResponseDesc* responseDesc) noexcept { try { if (getBatchSize() == size) return OK; diff --git a/inference-engine/src/inference_engine/cnn_network_ngraph_impl.hpp b/inference-engine/src/inference_engine/cnn_network_ngraph_impl.hpp index 7d3070afaec472..7778ec8ae82424 100644 --- a/inference-engine/src/inference_engine/cnn_network_ngraph_impl.hpp +++ b/inference-engine/src/inference_engine/cnn_network_ngraph_impl.hpp @@ -1,4 +1,4 @@ -// Copyright (C) 2018-2020 Intel Corporation +// Copyright (C) 2018-2021 Intel Corporation // SPDX-License-Identifier: Apache-2.0 // @@ -11,6 +11,7 @@ #include #include +#include #include #include #include @@ -81,6 +82,10 @@ class INFERENCE_ENGINE_API_CLASS(CNNNetworkNGraphImpl): public ICNNNetwork { StatusCode serialize(const std::string& xmlPath, const std::string& binPath, ResponseDesc* resp) const noexcept override; + StatusCode getOVNameForTensor(std::string& ov_name, const std::string& orig_name, ResponseDesc* resp) const noexcept override; + + StatusCode getOVNameForOperation(std::string& ov_name, const std::string& orig_name, ResponseDesc* resp) const noexcept override; + // used by convertFunctionToICNNNetwork from legacy library std::map _data; protected: @@ -91,6 +96,8 @@ class INFERENCE_ENGINE_API_CLASS(CNNNetworkNGraphImpl): public ICNNNetwork { InferenceEngine::InputsDataMap _inputData; std::map _outputData; const std::vector _ie_extensions; + std::unordered_map _opNames; + std::unordered_map _tensorNames; /** * @brief Create DataPtr for nGraph operation diff --git a/inference-engine/src/inference_engine/ie_compound_blob.cpp b/inference-engine/src/inference_engine/ie_compound_blob.cpp index 15e94a952dec27..b8f71250ecf933 100644 --- a/inference-engine/src/inference_engine/ie_compound_blob.cpp +++ b/inference-engine/src/inference_engine/ie_compound_blob.cpp @@ -259,6 +259,10 @@ TensorDesc verifyBatchedBlobInput(const std::vector& blobs) { blobLayout = NCHW; blobDims.insert(blobDims.begin(), blobs.size()); break; + case HWC: + blobLayout = NHWC; + blobDims.insert(blobDims.begin(), blobs.size()); + break; default: THROW_IE_EXCEPTION << "Unsupported sub-blobs layout - to be one of: [NCHW, NHWC, NCDHW, NDHWC, NC, CN, C, CHW]"; } diff --git a/inference-engine/src/inference_engine/ie_core.cpp b/inference-engine/src/inference_engine/ie_core.cpp index b082d5a50b4203..b5bb82116cb82b 100644 --- a/inference-engine/src/inference_engine/ie_core.cpp +++ b/inference-engine/src/inference_engine/ie_core.cpp @@ -192,8 +192,7 @@ class Core::Impl : public ICore { pugi::xml_node ieNode = xmlDoc.document_element(); pugi::xml_node devicesNode = ieNode.child("plugins"); - for (auto pluginNode = devicesNode.child("plugin"); !pluginNode.empty(); - pluginNode = pluginNode.next_sibling("plugin")) { + FOREACH_CHILD(pluginNode, devicesNode, "plugin") { std::string deviceName = GetStrAttr(pluginNode, "name"); FileUtils::FilePath pluginPath = FileUtils::toFilePath(GetStrAttr(pluginNode, "location").c_str()); @@ -212,8 +211,7 @@ class Core::Impl : public ICore { std::map config; if (propertiesNode) { - for (auto propertyNode = propertiesNode.child("property"); !propertyNode.empty(); - propertyNode = propertyNode.next_sibling("property")) { + FOREACH_CHILD(propertyNode, propertiesNode, "property") { std::string key = GetStrAttr(propertyNode, "key"); std::string value = GetStrAttr(propertyNode, "value"); config[key] = value; @@ -225,8 +223,7 @@ class Core::Impl : public ICore { std::vector listOfExtentions; if (extensionsNode) { - for (auto extensionNode = extensionsNode.child("extension"); !extensionNode.empty(); - extensionNode = extensionNode.next_sibling("extension")) { + FOREACH_CHILD(extensionNode, extensionsNode, "extension") { FileUtils::FilePath extensionLocation = FileUtils::toFilePath(GetStrAttr(extensionNode, "location").c_str()); listOfExtentions.push_back(extensionLocation); } diff --git a/inference-engine/src/inference_engine/ie_layouts.cpp b/inference-engine/src/inference_engine/ie_layouts.cpp index 071dcad53d3e81..5def2480cfdb64 100644 --- a/inference-engine/src/inference_engine/ie_layouts.cpp +++ b/inference-engine/src/inference_engine/ie_layouts.cpp @@ -43,6 +43,8 @@ TensorDesc::TensorDesc(const Precision& precision, const SizeVector& dims, const case 3: if (blockingDesc.getOrder()[0] == 0 && blockingDesc.getOrder()[1] == 1 && blockingDesc.getOrder()[2] == 2) { layout = Layout::CHW; + } else if (blockingDesc.getOrder()[0] == 1 && blockingDesc.getOrder()[1] == 2 && blockingDesc.getOrder()[2] == 0) { + layout = Layout::HWC; } break; case 4: @@ -123,6 +125,7 @@ void TensorDesc::setLayout(Layout l) { inconsistentLayout = dims.size() != 4; break; case Layout::CHW: + case Layout::HWC: inconsistentLayout = dims.size() != 3; break; case Layout::CN: @@ -319,6 +322,11 @@ BlockingDesc::BlockingDesc(const SizeVector& dims, Layout layout): offsetPadding l_order = {0, 1, 2}; l_dims = dims; break; + case Layout::HWC: + checkDims(dims.size(), 3); + l_order = {1, 2, 0}; + l_dims = dims; + break; case Layout::CN: checkDims(dims.size(), 2); l_order = {1, 0}; diff --git a/inference-engine/src/legacy_api/include/legacy/details/ie_cnn_network_iterator.hpp b/inference-engine/src/legacy_api/include/legacy/details/ie_cnn_network_iterator.hpp index c91fad03f043eb..f513fdcf5bf1b4 100644 --- a/inference-engine/src/legacy_api/include/legacy/details/ie_cnn_network_iterator.hpp +++ b/inference-engine/src/legacy_api/include/legacy/details/ie_cnn_network_iterator.hpp @@ -32,7 +32,6 @@ class INFERENCE_ENGINE_INTERNAL("Migrate to IR v10 and work with ngraph::Functio CNNNetworkIterator { IE_SUPPRESS_DEPRECATED_START - std::unordered_set visited {}; std::list nextLayersToVisit {}; InferenceEngine::CNNLayerPtr currentLayer = nullptr; const ICNNNetwork* network = nullptr; @@ -56,6 +55,7 @@ CNNNetworkIterator { } return consumers; }; + std::unordered_set visited; auto bfs = [&](const CNNLayerPtr& start_node, bool traverse_via_outputs = false) { if (!start_node || visited.count(start_node.get())) return; std::deque q; @@ -98,16 +98,31 @@ CNNNetworkIterator { } }; + // Find all outputLayers + std::vector outputLayers; + const auto* networkImpl = dynamic_cast(network); + if (networkImpl) { + for (const auto & node : networkImpl->allLayers()) { + if (get_consumers(node.second).empty()) + outputLayers.emplace_back(node.second); + } + } else { + // For backward compatibility + for (const auto& out : outputs) { + outputLayers.emplace_back(getCreatorLayer(out.second).lock()); + } + } // First we run bfs starting from outputs that provides deterministic graph traverse - for (const auto & output : outputs) { - bfs(getCreatorLayer(output.second).lock()); + for (const auto & output : outputLayers) { + bfs(output); } - - // For cases when graph has no outputs we start bfs from inputs to ensure topological sort - for (const auto & input : inputs) { - const auto data_ptr = input.second->getInputData(); - for (const auto & consumer : getInputTo(data_ptr)) - bfs(consumer.second, true); + if (!networkImpl) { + // For cases when graph has no outputs we start bfs from inputs to ensure topological sort + for (const auto & input : inputs) { + const auto data_ptr = input.second->getInputData(); + for (const auto & consumer : getInputTo(data_ptr)) + bfs(consumer.second, true); + } } currentLayer = nextLayersToVisit.front(); } diff --git a/inference-engine/src/legacy_api/src/convert_function_to_cnn_network.cpp b/inference-engine/src/legacy_api/src/convert_function_to_cnn_network.cpp index df634bf08e1491..8902e846ed182e 100644 --- a/inference-engine/src/legacy_api/src/convert_function_to_cnn_network.cpp +++ b/inference-engine/src/legacy_api/src/convert_function_to_cnn_network.cpp @@ -1,4 +1,4 @@ -// Copyright (C) 2018-2020 Intel Corporation +// Copyright (C) 2018-2021 Intel Corporation // SPDX-License-Identifier: Apache-2.0 // @@ -1311,8 +1311,11 @@ InferenceEngine::details::CNNLayerCreator::CNNLayerCreator(const std::shared_ptr res->params["normalize_variance"] = params.at("normalize_variance"); res->params["normalize_variance"] = res->getBoolStrParamAsIntStr("normalize_variance"); res->params["eps"] = params.at("eps"); - res->params["across_channels"] = params.at("across_channels"); - res->params["across_channels"] = res->getBoolStrParamAsIntStr("across_channels"); + const auto& acrossChannelsIt = params.find("across_channels"); + if (acrossChannelsIt != params.end()) { + res->params["across_channels"] = params.at("across_channels"); + res->params["across_channels"] = res->getBoolStrParamAsIntStr("across_channels"); + } return res; }); @@ -1555,8 +1558,8 @@ InferenceEngine::details::CNNLayerCreator::CNNLayerCreator(const std::shared_ptr return res; }); - addSpecificCreator({"TensorIterator"}, [](const std::shared_ptr<::ngraph::Node>& node, - const std::map& params) -> CNNLayerPtr { + addSpecificCreator({"TensorIterator", "StaticShapeLoop"}, + [](const std::shared_ptr<::ngraph::Node>& node, const std::map& params) -> CNNLayerPtr { auto res = createSubGraphLayer(node); res->type = "TensorIterator"; return res; @@ -1586,11 +1589,25 @@ InferenceEngine::details::CNNLayerCreator::CNNLayerCreator(const std::shared_ptr return res; }); - addSpecificCreator({"PSROIPooling"}, [](const std::shared_ptr<::ngraph::Node> &node, - const std::map ¶ms) -> CNNLayerPtr { - LayerParams attrs = {node->get_friendly_name(), "PSROIPooling", details::convertPrecision(node->get_output_element_type(0))}; - auto res = std::make_shared(attrs); - res->params = params; + addSpecificCreator({"VariadicSplit"}, [](const std::shared_ptr<::ngraph::Node>& node, + const std::map& params) -> CNNLayerPtr { + LayerParams attrs = {node->get_friendly_name(), "Split", details::convertPrecision(node->get_output_element_type(0))}; + auto res = std::make_shared(attrs); + auto castedLayer = std::dynamic_pointer_cast(node); + if (!castedLayer) THROW_IE_EXCEPTION << "Cannot get " << attrs.type << " layer " << attrs.name; + + auto axis_node = castedLayer->input_value(1).get_node_shared_ptr(); + const auto axis_node_const = ngraph::as_type_ptr(axis_node); + if (!axis_node_const) { + THROW_IE_EXCEPTION << "Split " << castedLayer->get_friendly_name() << " has no axes as Constant"; + } + + auto axis = axis_node_const->cast_vector()[0]; + if (axis < 0) { + axis += castedLayer->get_input_shape(0).size(); + } + + res->params["axis"] = Builder::asString(axis); return res; }); } @@ -1628,9 +1645,7 @@ void convertFunctionToICNNNetwork(const std::shared_ptr>(), std::make_shared>(), std::make_shared>(), - std::make_shared>(), std::make_shared>(), - std::make_shared>(), std::make_shared>(), std::make_shared>(), std::make_shared>(), @@ -1876,7 +1891,9 @@ void convertFunctionToICNNNetwork(const std::shared_ptroutData.clear(); continue; } + NGRAPH_SUPPRESS_DEPRECATED_START auto outName = layer->output(i).get_tensor().get_name(); + NGRAPH_SUPPRESS_DEPRECATED_END if (outName.empty()) { outName = ngraph::op::util::create_ie_output_name(layer->output(i)); } @@ -1930,7 +1947,9 @@ void convertFunctionToICNNNetwork(const std::shared_ptr(layer)) { IE_ASSERT(layer->get_input_size() == 1); const auto &input = layer->input_value(0); + NGRAPH_SUPPRESS_DEPRECATED_START auto name = input.get_tensor().get_name(); + NGRAPH_SUPPRESS_DEPRECATED_END if (!name.empty()) cnnNetworkImpl->addOutput(name); else diff --git a/inference-engine/src/legacy_api/src/graph_transformer.cpp b/inference-engine/src/legacy_api/src/graph_transformer.cpp index aa0e7b93b24300..f8fe47306ba607 100644 --- a/inference-engine/src/legacy_api/src/graph_transformer.cpp +++ b/inference-engine/src/legacy_api/src/graph_transformer.cpp @@ -219,7 +219,9 @@ static std::vector skipConstInfer = { "Convolution", // Const inference function for Convolution is not implemented "Eltwise", // Const inference function for Eltwise is not implemented "FullyConnected", - "Squeeze" + "Squeeze", + "TensorIterator", + "LSTMSequence", }; const std::map ConstTransformer::getConstLayers(const std::vector& sortedLayers) { diff --git a/inference-engine/src/legacy_api/src/ie_cnn_layer_builder_ngraph.cpp b/inference-engine/src/legacy_api/src/ie_cnn_layer_builder_ngraph.cpp index 03a277a233c1c4..9b677b4b6a4ecc 100644 --- a/inference-engine/src/legacy_api/src/ie_cnn_layer_builder_ngraph.cpp +++ b/inference-engine/src/legacy_api/src/ie_cnn_layer_builder_ngraph.cpp @@ -250,27 +250,6 @@ CNNLayer::Ptr NodeConverter::createLayer(const std::s return res; } -template <> -CNNLayer::Ptr NodeConverter::createLayer(const std::shared_ptr& layer) const { - LayerParams params = {layer->get_friendly_name(), "Split", - details::convertPrecision(layer->get_output_element_type(0))}; - auto res = std::make_shared(params); - auto castedLayer = ngraph::as_type_ptr(layer); - if (castedLayer == nullptr) THROW_IE_EXCEPTION << "Cannot get " << params.type << " layer " << params.name; - - auto axis_node = castedLayer->input_value(1).get_node_shared_ptr(); - const auto axis_node_const = std::dynamic_pointer_cast(axis_node); - if (!axis_node_const) { - THROW_IE_EXCEPTION << "Split " << castedLayer->get_friendly_name() << " has no axes as Constant"; - } - auto axis = axis_node_const->cast_vector()[0]; - if (axis < 0) { - axis += castedLayer->get_input_shape(0).size(); - } - res->params["axis"] = asString(axis); - return res; -} - template <> CNNLayer::Ptr NodeConverter::createLayer(const std::shared_ptr& layer) const { LayerParams params = {layer->get_friendly_name(), "Concat", @@ -599,24 +578,6 @@ CNNLayer::Ptr NodeConverter::createLa return res; } -template <> -CNNLayer::Ptr NodeConverter::createLayer(const std::shared_ptr& layer) const { - LayerParams params = {layer->get_friendly_name(), "ReorgYolo", - details::convertPrecision(layer->get_output_element_type(0))}; - auto res = std::make_shared(params); - auto castedLayer = ngraph::as_type_ptr(layer); - if (castedLayer == nullptr) THROW_IE_EXCEPTION << "Cannot get " << params.type << " layer " << params.name; - - std::string value; - for (const auto& val : castedLayer->get_strides()) { - if (!value.empty()) value += ","; - value += asString(val); - } - - res->params["stride"] = value; - return res; -} - template <> CNNLayer::Ptr NodeConverter::createLayer(const std::shared_ptr& layer) const { LayerParams params = {layer->get_friendly_name(), "Log", diff --git a/inference-engine/src/legacy_api/src/ngraph_ops/onehot_ie.cpp b/inference-engine/src/legacy_api/src/ngraph_ops/onehot_ie.cpp index 2c964ec21d8098..403b82fcd61ec5 100644 --- a/inference-engine/src/legacy_api/src/ngraph_ops/onehot_ie.cpp +++ b/inference-engine/src/legacy_api/src/ngraph_ops/onehot_ie.cpp @@ -19,10 +19,10 @@ op::OneHotIE::OneHotIE(const Output& input, int axis, int depth, f void op::OneHotIE::validate_and_infer_types() { const PartialShape& arg_shape = get_input_partial_shape(0); - if (arg_shape.is_dynamic()) { + if (arg_shape.rank().is_dynamic()) { set_output_type(0, m_type, PartialShape::dynamic()); } else { - Shape output_shape = arg_shape.to_shape(); + vector output_shape{arg_shape}; int normalized_axis = m_axis; if (m_axis < 0) normalized_axis = m_axis + static_cast(arg_shape.to_shape().size()); diff --git a/inference-engine/src/legacy_api/src/transformations/convert_opset1_to_legacy/convert_one_hot_to_one_hot_ie.cpp b/inference-engine/src/legacy_api/src/transformations/convert_opset1_to_legacy/convert_one_hot_to_one_hot_ie.cpp index 0da3b68ba8fd9d..9aef6f8e78ce1f 100644 --- a/inference-engine/src/legacy_api/src/transformations/convert_opset1_to_legacy/convert_one_hot_to_one_hot_ie.cpp +++ b/inference-engine/src/legacy_api/src/transformations/convert_opset1_to_legacy/convert_one_hot_to_one_hot_ie.cpp @@ -46,7 +46,8 @@ ngraph::pass::ConvertOneHotToOneHotIEMatcher::ConvertOneHotToOneHotIEMatcher() { // insert Convert layer to cast output to a correct data type defined by the on/off values if (on_value_node->get_element_type() != m_output_type) { auto convert = std::make_shared(one_hot_ie, on_value_node->get_element_type()); - convert->set_friendly_name(one_hot->get_friendly_name() + "/Convert"); + convert->set_friendly_name(one_hot->get_friendly_name()); + one_hot->set_friendly_name(one_hot->get_friendly_name() + "/FloatOutput"); ngraph::copy_runtime_info(one_hot, {one_hot_ie, convert}); ngraph::replace_node(m.get_match_root(), convert); } else { diff --git a/inference-engine/src/legacy_api/src/transformations/convert_opset1_to_legacy/reshape_fully_connected.cpp b/inference-engine/src/legacy_api/src/transformations/convert_opset1_to_legacy/reshape_fully_connected.cpp index 502ee859bb4070..509f1bd620d060 100644 --- a/inference-engine/src/legacy_api/src/transformations/convert_opset1_to_legacy/reshape_fully_connected.cpp +++ b/inference-engine/src/legacy_api/src/transformations/convert_opset1_to_legacy/reshape_fully_connected.cpp @@ -61,7 +61,7 @@ ngraph::pass::ReshapeFullyConnected::ReshapeFullyConnected() { auto reshape_output = op::util::reshapeTo(fc_new, output_shape); new_ops.push_back(reshape_output); reshape_output->set_friendly_name(fc->get_friendly_name()); - fc->set_friendly_name(fc->get_friendly_name() + "/FC"); + fc_new->set_friendly_name(fc->get_friendly_name() + "/FC"); ngraph::copy_runtime_info(fc, new_ops); ngraph::replace_node(fc, reshape_output); } else { diff --git a/inference-engine/src/low_precision_transformations/include/low_precision/common/fake_quantize_dequantization.hpp b/inference-engine/src/low_precision_transformations/include/low_precision/common/fake_quantize_dequantization.hpp index 2bfc6eeb9df626..33d9507836a7d9 100644 --- a/inference-engine/src/low_precision_transformations/include/low_precision/common/fake_quantize_dequantization.hpp +++ b/inference-engine/src/low_precision_transformations/include/low_precision/common/fake_quantize_dequantization.hpp @@ -1,4 +1,4 @@ -// Copyright (C) 2020 Intel Corporation +// Copyright (C) 2020-2021 Intel Corporation // SPDX-License-Identifier: Apache-2.0 // @@ -20,21 +20,39 @@ class FakeQuantizeDequantization { FakeQuantizeDequantization(); FakeQuantizeDequantization( - Output data, - std::shared_ptr convert, - std::shared_ptr subtract, - std::shared_ptr multiply); + const Output& data, + const std::shared_ptr& convert, + const std::shared_ptr& subtract, + const std::shared_ptr& subtractConvert, + const std::shared_ptr& subtractConstant, + const std::shared_ptr& multiply, + const std::shared_ptr& multiplyConstant); bool empty() const; bool multiplyHasZero() const; bool isShared() const; bool isLowPrecision() const; + static bool checkElementwise(const std::shared_ptr& elementwise); + static bool checkShape(const std::shared_ptr& elementwise) noexcept; + + static int fillDequantizationParams( + const std::shared_ptr& elementwise, + std::shared_ptr& convert, + std::shared_ptr& constant) noexcept; + + static int fillDequantizationParams( + const std::shared_ptr& elementwise, + std::shared_ptr& constant) noexcept; + Output data; std::shared_ptr convert; std::shared_ptr subtract; + std::shared_ptr subtractConvert; + std::shared_ptr subtractConstant; std::shared_ptr multiply; + std::shared_ptr multiplyConstant; }; } // namespace low_precision diff --git a/inference-engine/src/low_precision_transformations/include/low_precision/disable_convert_constant_folding_on_const_path.hpp b/inference-engine/src/low_precision_transformations/include/low_precision/disable_convert_constant_folding_on_const_path.hpp new file mode 100644 index 00000000000000..9427894071edad --- /dev/null +++ b/inference-engine/src/low_precision_transformations/include/low_precision/disable_convert_constant_folding_on_const_path.hpp @@ -0,0 +1,26 @@ +// Copyright (C) 2021 Intel Corporation +// SPDX-License-Identifier: Apache-2.0 +// + +#pragma once + +#include +#include + +#include +#include + +namespace ngraph { +namespace pass { + +class TRANSFORMATIONS_API DisableConvertConstantFoldingOnConstPath; + +} // namespace pass +} // namespace ngraph + +class ngraph::pass::DisableConvertConstantFoldingOnConstPath : public ngraph::pass::MatcherPass { +public: + NGRAPH_RTTI_DECLARATION; + DisableConvertConstantFoldingOnConstPath( + const std::vector& inputPrecisions = {}); +}; diff --git a/inference-engine/src/low_precision_transformations/include/low_precision/eltwise_base_transformation.hpp b/inference-engine/src/low_precision_transformations/include/low_precision/eltwise_base_transformation.hpp index f9bf7a1082a795..e6780d99e9b78e 100644 --- a/inference-engine/src/low_precision_transformations/include/low_precision/eltwise_base_transformation.hpp +++ b/inference-engine/src/low_precision_transformations/include/low_precision/eltwise_base_transformation.hpp @@ -1,4 +1,4 @@ -// Copyright (C) 2020 Intel Corporation +// Copyright (C) 2020-2021 Intel Corporation // SPDX-License-Identifier: Apache-2.0 // @@ -21,6 +21,9 @@ class TRANSFORMATIONS_API EltwiseBaseTransformation : public LayerTransformation static bool isBroadcasted(const Shape& shape) noexcept; protected: int getNotEmpty(const std::shared_ptr& eltwise) const; + // Return indexes: + // 1. first - data branch index for eltwise + // 2. second - Constant branch index for data branch Multiply std::pair getMultiplyConstBranch(const std::shared_ptr& eltwise) const; }; diff --git a/inference-engine/src/low_precision_transformations/include/low_precision/fake_quantize.hpp b/inference-engine/src/low_precision_transformations/include/low_precision/fake_quantize.hpp index 05574be1d15641..4a260aeebcaefa 100644 --- a/inference-engine/src/low_precision_transformations/include/low_precision/fake_quantize.hpp +++ b/inference-engine/src/low_precision_transformations/include/low_precision/fake_quantize.hpp @@ -1,4 +1,4 @@ -// Copyright (C) 2020 Intel Corporation +// Copyright (C) 2020-2021 Intel Corporation // SPDX-License-Identifier: Apache-2.0 // @@ -16,16 +16,14 @@ namespace low_precision { class TRANSFORMATIONS_API FakeQuantizeTransformation : public LayerTransformation { public: FakeQuantizeTransformation(const Params& params) : LayerTransformation(params) {} - ~FakeQuantizeTransformation() override {}; void registerMatcherIn(GraphRewrite& pass, TransformationContext& context) const override; bool transform(TransformationContext& context, ngraph::pattern::Matcher &m) const override; bool isPrecisionPreserved(std::shared_ptr layer) const noexcept override; static bool checkElementwise(const std::shared_ptr& eltwise); + private: - std::shared_ptr fuseElementwise( - TransformationContext& context, - const std::shared_ptr& fakeQuantize) const; + std::shared_ptr fuseElementwise(TransformationContext& context, const std::shared_ptr& fakeQuantize) const; }; } // namespace low_precision diff --git a/inference-engine/src/low_precision_transformations/include/low_precision/fake_quantize_decomposition.hpp b/inference-engine/src/low_precision_transformations/include/low_precision/fake_quantize_decomposition.hpp new file mode 100644 index 00000000000000..15cc2c1e9858ff --- /dev/null +++ b/inference-engine/src/low_precision_transformations/include/low_precision/fake_quantize_decomposition.hpp @@ -0,0 +1,26 @@ +// Copyright (C) 2021 Intel Corporation +// SPDX-License-Identifier: Apache-2.0 +// + +#pragma once + +#include +#include +#include "layer_transformation.hpp" +#include "low_precision/fuse_fake_quantize.hpp" + +namespace ngraph { +namespace pass { +namespace low_precision { + +class TRANSFORMATIONS_API FakeQuantizeDecompositionTransformation : public LayerTransformation { +public: + FakeQuantizeDecompositionTransformation(const Params& params) : LayerTransformation(params) {} + void registerMatcherIn(GraphRewrite& pass, TransformationContext& context) const override; + bool transform(TransformationContext& context, ngraph::pattern::Matcher &m) const override; + bool isPrecisionPreserved(std::shared_ptr layer) const noexcept override; +}; + +} // namespace low_precision +} // namespace pass +} // namespace ngraph diff --git a/inference-engine/src/low_precision_transformations/include/low_precision/fold_convert.hpp b/inference-engine/src/low_precision_transformations/include/low_precision/fold_convert.hpp new file mode 100644 index 00000000000000..56fcb241e7416b --- /dev/null +++ b/inference-engine/src/low_precision_transformations/include/low_precision/fold_convert.hpp @@ -0,0 +1,27 @@ +// Copyright (C) 2021 Intel Corporation +// SPDX-License-Identifier: Apache-2.0 +// + +#pragma once + +#include +#include +#include "low_precision/layer_transformation.hpp" + +namespace ngraph { +namespace pass { +namespace low_precision { + +class TRANSFORMATIONS_API FoldConvertTransformation : public LayerTransformation { +public: + FoldConvertTransformation(const Params& params) : LayerTransformation(params) {} + ~FoldConvertTransformation() override {} + void registerMatcherIn(GraphRewrite& pass, TransformationContext& context) const override; + bool transform(TransformationContext& context, ngraph::pattern::Matcher &m) const override; + bool canBeTransformed(const TransformationContext& context, std::shared_ptr layer) const override; + bool isPrecisionPreserved(std::shared_ptr layer) const noexcept override; +}; + +} // namespace low_precision +} // namespace pass +} // namespace ngraph diff --git a/inference-engine/src/low_precision_transformations/include/low_precision/layer_transformation.hpp b/inference-engine/src/low_precision_transformations/include/low_precision/layer_transformation.hpp index eaeea836ddab1d..ac9b90c72524c2 100644 --- a/inference-engine/src/low_precision_transformations/include/low_precision/layer_transformation.hpp +++ b/inference-engine/src/low_precision_transformations/include/low_precision/layer_transformation.hpp @@ -1,4 +1,4 @@ -// Copyright (C) 2020 Intel Corporation +// Copyright (C) 2020-2021 Intel Corporation // SPDX-License-Identifier: Apache-2.0 // @@ -51,6 +51,10 @@ class TRANSFORMATIONS_API DataPrecision { max(max), hasZeroPoint(hasZeroPoint) {} + static bool isSupported(const element::Type& precision) { + return (precision == element::u8) || (precision == element::i8); + } + static float getMinValue(const element::Type precision, const size_t levels) { if (precision == element::i8) { if (levels == 255) { @@ -305,8 +309,6 @@ class TRANSFORMATIONS_API LayerTransformation { ILayerTransformationsManager* layerTransformationsManager; protected: - std::shared_ptr separateInStandaloneBranch(std::shared_ptr node) const; - std::shared_ptr moveDequantizationAfter( TransformationContext &context, const std::shared_ptr& operation, @@ -314,8 +316,6 @@ class TRANSFORMATIONS_API LayerTransformation { const bool updatePrecision, const bool moveSubtract = true) const; - void fuseConvertIfPossible(const std::shared_ptr& operation) const; - void updateOutput( TransformationContext &context, std::shared_ptr lastNode, @@ -328,6 +328,9 @@ class TRANSFORMATIONS_API LayerTransformation { void addPattern(ngraph::pass::GraphRewrite& pass, TransformationContext& context, std::shared_ptr patternRoot) const; + //TODO: replace with canBeTransformed when quantization by special dimension is supported for all transformations + bool canBeTransformedSpecialDimension(const TransformationContext& context, std::shared_ptr layer) const; + template void addSingleNodePattern(ngraph::pass::GraphRewrite& pass, TransformationContext& context) const { using namespace ngraph; diff --git a/inference-engine/src/low_precision_transformations/include/low_precision/network_helper.hpp b/inference-engine/src/low_precision_transformations/include/low_precision/network_helper.hpp index 75e862de69d876..1d1e21747734a6 100644 --- a/inference-engine/src/low_precision_transformations/include/low_precision/network_helper.hpp +++ b/inference-engine/src/low_precision_transformations/include/low_precision/network_helper.hpp @@ -1,4 +1,4 @@ -// Copyright (C) 2020 Intel Corporation +// Copyright (C) 2020-2021 Intel Corporation // SPDX-License-Identifier: Apache-2.0 // @@ -39,10 +39,10 @@ class TRANSFORMATIONS_API NetworkHelper { // Collect and return a vector with all nodes that consumes any of the `node` output static std::vector> consumers(std::shared_ptr node); - static Shape alignShapeForChannelDim(const Shape& shape, Rank rank); + // return true if op is on a constant path + static bool isConstantPath(const std::shared_ptr& op); - // return true if at least one child uses layer on weights - static bool onWeights(std::shared_ptr layer); + static Shape alignShapeForChannelDim(const Shape& shape, Rank rank); template static std::shared_ptr setOutDataPrecisionForTypeRelaxed(std::shared_ptr operation, const element::Type& precision); @@ -83,6 +83,8 @@ class TRANSFORMATIONS_API NetworkHelper { static std::shared_ptr round(std::shared_ptr node, element::Type target_type); + static std::shared_ptr composeFakeQuantize(const std::shared_ptr& fq); + static std::tuple, std::shared_ptr> decomposeFakeQuantize( std::shared_ptr fq, const element::Type precision, @@ -114,10 +116,21 @@ class TRANSFORMATIONS_API NetworkHelper { const bool hasZeroPoint, const bool updatePrecision); - static FakeQuantizeDequantization getDequantization(const std::shared_ptr node, const size_t parentIndex = 0ul, const bool inPlace = false); + static bool areQuantizeAndDequantizeSupportedForSubtract(const std::shared_ptr& node); + + static bool areQuantizeAndDequantizeSupportedForMultiply(const std::shared_ptr& node); + + static bool isQuantizeSupported(const std::shared_ptr& fakeQuantize); + + static FakeQuantizeDequantization getDequantization(const std::shared_ptr& node, const size_t parentIndex = 0ul, const bool inPlace = false); + + static FakeQuantizeDequantization getDequantizationBelow(const std::shared_ptr& node); static FakeQuantizeDequantization normalizeDequantization(FakeQuantizeDequantization dequantization); + // 1. remove Convert if possible + // 2. optimize Constant if possible + // 3. remove Subtract if Constant on the second branch is zero static std::shared_ptr optimizeSubtract(std::shared_ptr add); class InsertDequantizationResult { @@ -136,11 +149,6 @@ class TRANSFORMATIONS_API NetworkHelper { const bool updatePrecision, const bool moveSubtract); - // TODO: rename: fuseConvertIfPossible - static void removeConvertIfPossible( - const std::shared_ptr& operation, - const FakeQuantizeDequantization& dequantization); - static bool checkConstantValuePrecision(const element::Type expectedPrecision, const std::shared_ptr& constant); static size_t getChildInputIndex(const std::shared_ptr& parent, const std::shared_ptr& child); @@ -158,9 +166,11 @@ class TRANSFORMATIONS_API NetworkHelper { static std::shared_ptr fold_fake_quantize(const std::shared_ptr& fq); static std::shared_ptr fold_fake_quantize(const std::shared_ptr& fq, const bool roundValues); - // multi-precision constant folding - // handles only specific case: Constant -> [dequantization operations] -> [node] - static void foldDequantization(std::shared_ptr& node, const size_t branchIndex, const bool inPlace = false); + static FakeQuantizeDequantization foldDequantization(const std::shared_ptr& node, const size_t branchIndex, const bool inPlace = false); + + static std::shared_ptr separateInStandaloneBranch(std::shared_ptr node); + + static std::shared_ptr fuseConvert(const std::shared_ptr& fakeQuantize); private: static std::shared_ptr foldFakeQuantize(const std::shared_ptr& fq, const bool roundValues, const bool roundValuesWasSet); diff --git a/inference-engine/src/low_precision_transformations/include/low_precision/quantization_details.hpp b/inference-engine/src/low_precision_transformations/include/low_precision/quantization_details.hpp index 2114d9f7601a94..1e4b05fce2812b 100644 --- a/inference-engine/src/low_precision_transformations/include/low_precision/quantization_details.hpp +++ b/inference-engine/src/low_precision_transformations/include/low_precision/quantization_details.hpp @@ -1,4 +1,4 @@ -// Copyright (C) 2020 Intel Corporation +// Copyright (C) 2018-2021 Intel Corporation // SPDX-License-Identifier: Apache-2.0 // @@ -71,7 +71,6 @@ class TRANSFORMATIONS_API QuantizationDetails { const size_t outputChannelsCount; private: - QuantizationDetails &operator=(const QuantizationDetails & /*target*/) { return *this; } static void validate(std::shared_ptr constantLayer); static std::vector getBlobValue(std::shared_ptr constantLayer); }; diff --git a/inference-engine/src/low_precision_transformations/include/low_precision/transformer.hpp b/inference-engine/src/low_precision_transformations/include/low_precision/transformer.hpp index fe8a43784bdf29..e14d61536bde1d 100644 --- a/inference-engine/src/low_precision_transformations/include/low_precision/transformer.hpp +++ b/inference-engine/src/low_precision_transformations/include/low_precision/transformer.hpp @@ -1,4 +1,4 @@ -// Copyright (C) 2020 Intel Corporation +// Copyright (C) 2020-2021 Intel Corporation // SPDX-License-Identifier: Apache-2.0 // @@ -148,6 +148,22 @@ class TRANSFORMATIONS_API LowPrecisionTransformations { return *this; } + /** + * Add decomposition transformation. Transformation type and operation type are required. + * Operation type is used to find transformation by operation during precision definition. + */ + template + LowPrecisionTransformations& addDecomposition(const LayerTransformation::Params& params) { + const std::string typeName = getType(); + const auto it = decompositionTransformations.find(typeName); + if (it != decompositionTransformations.end()) { + decompositionTransformations.erase(it); + } + + decompositionTransformations.emplace(typeName, std::make_shared(params)); + return *this; + } + /** * Add transformation. Transformation type and operation type are required. * Operation type is used to find transformation by operation during precision definition. @@ -233,6 +249,7 @@ class TRANSFORMATIONS_API LowPrecisionTransformations { // Key is not a layer type, but just a name of transformation // Layer type (or a pattern) is defined by transformation itself as an ngraph matcher std::map branchSpecificTransformations; + std::map decompositionTransformations; std::map transformations; std::map>> cleanupTransformations; std::vector standaloneCleanupTransformations; diff --git a/inference-engine/src/low_precision_transformations/include/low_precision/weightable_layer_transformation.hpp b/inference-engine/src/low_precision_transformations/include/low_precision/weightable_layer_transformation.hpp index 20ea6f6ad47753..388175be774110 100644 --- a/inference-engine/src/low_precision_transformations/include/low_precision/weightable_layer_transformation.hpp +++ b/inference-engine/src/low_precision_transformations/include/low_precision/weightable_layer_transformation.hpp @@ -1,4 +1,4 @@ -// Copyright (C) 2020 Intel Corporation +// Copyright (C) 2020-2021 Intel Corporation // SPDX-License-Identifier: Apache-2.0 // @@ -17,11 +17,11 @@ class TRANSFORMATIONS_API WeightableLayerTransformation : public LayerTransforma public: WeightableLayerTransformation(const Params& params); bool canBeTransformed(const TransformationContext& context, std::shared_ptr layer) const override; - bool isQuantized(std::shared_ptr layer, bool isReshape) const noexcept; + bool isQuantized(std::shared_ptr layer, bool reshapeIsRequired) const noexcept; bool isPrecisionPreserved(std::shared_ptr layer) const noexcept override; protected: - DataPrecision decomposeFakeQuantizeForWeightsPath(std::shared_ptr weightableLayer) const; + void decomposeFakeQuantizeForWeightsPath(std::shared_ptr weightableLayer) const; static bool isGroup(const std::shared_ptr& node); static bool isDepthwise(const std::shared_ptr& node); diff --git a/inference-engine/src/low_precision_transformations/src/add.cpp b/inference-engine/src/low_precision_transformations/src/add.cpp index 8e9670e1fc42e1..a901ba1c039d5a 100644 --- a/inference-engine/src/low_precision_transformations/src/add.cpp +++ b/inference-engine/src/low_precision_transformations/src/add.cpp @@ -1,4 +1,4 @@ -// Copyright (C) 2020 Intel Corporation +// Copyright (C) 2020-2021 Intel Corporation // SPDX-License-Identifier: Apache-2.0 // @@ -97,7 +97,7 @@ bool AddTransformation::transform(TransformationContext& context, ngraph::patter NetworkHelper::normalizeDequantization(NetworkHelper::getDequantization(op, 0)); NetworkHelper::normalizeDequantization(NetworkHelper::getDequantization(op, 1)); - std::shared_ptr addNode = separateInStandaloneBranch(op); + std::shared_ptr addNode = NetworkHelper::separateInStandaloneBranch(op); std::shared_ptr add = as_type_ptr(addNode); const int fullPathIndex = getNotEmpty(add); @@ -107,8 +107,10 @@ bool AddTransformation::transform(TransformationContext& context, ngraph::patter if (fullPathIndex == -1) { // swap constant multiply and add and possibly fuse to subtract const auto multiplyBranch = getMultiplyConstBranch(add); - - if (multiplyBranch.first == -1) { + if (multiplyBranch.first != -1) { + NetworkHelper::foldDequantization(add, multiplyBranch.first == 0 ? 1 : 0); + } else { + // constant folding on dequantization ops (for example: Convert on Subtract) NetworkHelper::foldDequantization(addNode, 0); NetworkHelper::foldDequantization(addNode, 1); return false; @@ -140,15 +142,17 @@ bool AddTransformation::transform(TransformationContext& context, ngraph::patter return false; } - std::shared_ptr subtractEmptyPathValues; - std::shared_ptr multiplyEmptyPathValues; - std::tie(subtractEmptyPathValues, multiplyEmptyPathValues) = NetworkHelper::createEmptyValues(dequantizationEmptyPath); - FakeQuantizeDequantization dequantizationFullPath = NetworkHelper::getDequantization(add, fullPathIndex); if (updatePrecisions && !dequantizationFullPath.empty() && !dequantizationFullPath.isLowPrecision()) { return false; } + dequantizationEmptyPath = NetworkHelper::foldDequantization(addNode, emptyPathIndex); + std::shared_ptr subtractEmptyPathValues; + std::shared_ptr multiplyEmptyPathValues; + std::tie(subtractEmptyPathValues, multiplyEmptyPathValues) = NetworkHelper::createEmptyValues(dequantizationEmptyPath); + + dequantizationFullPath = NetworkHelper::foldDequantization(addNode, fullPathIndex); std::shared_ptr subtractFullPathValues; std::shared_ptr multiplyFullPathValues; std::tie(subtractFullPathValues, multiplyFullPathValues) = NetworkHelper::createEmptyValues(dequantizationFullPath); diff --git a/inference-engine/src/low_precision_transformations/src/avg_pool.cpp b/inference-engine/src/low_precision_transformations/src/avg_pool.cpp index 04943099a92047..2eb260ceba5833 100644 --- a/inference-engine/src/low_precision_transformations/src/avg_pool.cpp +++ b/inference-engine/src/low_precision_transformations/src/avg_pool.cpp @@ -1,4 +1,4 @@ -// Copyright (C) 2020 Intel Corporation +// Copyright (C) 2020-2021 Intel Corporation // SPDX-License-Identifier: Apache-2.0 // @@ -29,7 +29,7 @@ bool AvgPoolTransformation::transform(TransformationContext& context, ngraph::pa return false; } - const std::shared_ptr pooling = separateInStandaloneBranch(m.get_match_root()); + const std::shared_ptr pooling = NetworkHelper::separateInStandaloneBranch(m.get_match_root()); const std::vector> children = getChildrenRecursivelyExceptPrecisionPreserved(pooling); diff --git a/inference-engine/src/low_precision_transformations/src/clamp.cpp b/inference-engine/src/low_precision_transformations/src/clamp.cpp index c93eec47c0c563..7f844eb124df2d 100644 --- a/inference-engine/src/low_precision_transformations/src/clamp.cpp +++ b/inference-engine/src/low_precision_transformations/src/clamp.cpp @@ -1,4 +1,4 @@ -// Copyright (C) 2020 Intel Corporation +// Copyright (C) 2020-2021 Intel Corporation // SPDX-License-Identifier: Apache-2.0 // @@ -25,7 +25,15 @@ bool ClampTransformation::transform(TransformationContext& context, ngraph::patt if (sub == nullptr) { return false; } - const auto constant = as_type_ptr(sub->get_input_node_shared_ptr(1)); + + auto constant = as_type_ptr(sub->get_input_node_shared_ptr(1)); + if (constant == nullptr) { + const auto convert = sub->get_input_node_shared_ptr(1); + if (!is_type(convert)) { + return false; + } + constant = as_type_ptr(convert->get_input_node_shared_ptr(0)); + } if (constant == nullptr) { return false; @@ -38,7 +46,7 @@ bool ClampTransformation::transform(TransformationContext& context, ngraph::patt return false; } - const std::shared_ptr clamp = separateInStandaloneBranch(m.get_match_root()); + std::shared_ptr clamp = NetworkHelper::separateInStandaloneBranch(m.get_match_root()); const FakeQuantizeDequantization dequantization = NetworkHelper::getDequantization(clamp); const bool moveSubtract = subWithTheSameValues(dequantization.subtract); @@ -46,26 +54,31 @@ bool ClampTransformation::transform(TransformationContext& context, ngraph::patt if (!moveSubtract && (dequantization.subtract != nullptr)) { return false; } + const auto newClamp = as_type_ptr(moveDequantizationAfter(context, clamp, dequantization, false, moveSubtract)); - double min = newClamp->get_min(); - double max = newClamp->get_max(); - if (dequantization.multiply != nullptr) { - double scale = as_type_ptr(dequantization.multiply->get_input_node_shared_ptr(1))->cast_vector()[0]; - if (scale < 0.0) { - std::swap(min, max); + std::shared_ptr replacement; + { + double min = newClamp->get_min(); + double max = newClamp->get_max(); + + if (dequantization.multiply != nullptr) { + double scale = as_type_ptr(dequantization.multiply->get_input_node_shared_ptr(1))->cast_vector()[0]; + if (scale < 0.0) { + std::swap(min, max); + } + min /= scale; + max /= scale; } - min /= scale; - max /= scale; - } - if (dequantization.subtract != nullptr && moveSubtract) { - double shift = as_type_ptr(dequantization.subtract->get_input_node_shared_ptr(1))->cast_vector()[0]; - min += shift; - max += shift; - } + if (dequantization.subtract != nullptr && moveSubtract) { + double shift = as_type_ptr(dequantization.subtractConstant)->cast_vector()[0]; + min += shift; + max += shift; + } - const std::shared_ptr replacement = std::make_shared(newClamp->get_input_node_shared_ptr(0), min, max); + replacement = std::make_shared(newClamp->get_input_node_shared_ptr(0), min, max); + } replace_node(newClamp, replacement); element::Type outputClampType = dequantization.multiply ? diff --git a/inference-engine/src/low_precision_transformations/src/concat.cpp b/inference-engine/src/low_precision_transformations/src/concat.cpp index 6da7208aa04883..575ec26a91ad93 100644 --- a/inference-engine/src/low_precision_transformations/src/concat.cpp +++ b/inference-engine/src/low_precision_transformations/src/concat.cpp @@ -52,23 +52,22 @@ bool ConcatTransformation::transform(TransformationContext& context, ngraph::pat } std::unordered_map dequantizations; - std::vector quantizationLayersDetails; - for (size_t i = 0; i < subgraph.quantizationLayers.size(); ++i) { - const std::shared_ptr fakeQuantizeLayer = subgraph.quantizationLayers[i]; - - const ngraph::Shape shape = fakeQuantizeLayer->get_output_shape(0); - if (shape.size() < 4ul) { + const std::shared_ptr fq = ngraph::as_type_ptr(subgraph.quantizationLayers[i]); + if (fq == nullptr) { return false; } - const std::shared_ptr fq = ngraph::as_type_ptr(fakeQuantizeLayer->shared_from_this()); - if (fq == nullptr) { + if (!NetworkHelper::isQuantizeSupported(fq)) { return false; } const QuantizationDetails& quantizationDetails = QuantizationDetails::getDetails(fq); - quantizationLayersDetails.push_back(quantizationDetails); + + // per tensor scale is supported only + if (quantizationDetails.inputHighValues.size() != 1ul) { + return false; + } const DataPrecision dataPrecision2 = getDataPrecision(subgraph.quantizationLayers[i]->shared_from_this(), quantizationDetails, false); if (dataPrecision2.precision == ngraph::element::undefined) { @@ -86,9 +85,27 @@ bool ConcatTransformation::transform(TransformationContext& context, ngraph::pat return false; } - // per tensor scale is supported only - if (quantizationLayersDetails.empty() || (quantizationLayersDetails[0].inputHighValues.size() != 1ul)) { - return false; + std::vector quantizationLayersDetails; + for (size_t i = 0; i < subgraph.quantizationLayers.size(); ++i) { + std::shared_ptr fakeQuantize = as_type_ptr(subgraph.quantizationLayers[i]); + auto newFakeQuantize = NetworkHelper::fuseConvert(fakeQuantize); + if (newFakeQuantize == nullptr) { + subgraph.quantizationLayers[i] = fakeQuantize; + quantizationLayersDetails.push_back(QuantizationDetails::getDetails(fakeQuantize)); + continue; + } + + fakeQuantize = newFakeQuantize; + newFakeQuantize = NetworkHelper::composeFakeQuantize(fakeQuantize); + if (newFakeQuantize == nullptr) { + subgraph.quantizationLayers[i] = fakeQuantize; + quantizationLayersDetails.push_back(QuantizationDetails::getDetails(fakeQuantize)); + continue; + } + + fakeQuantize = newFakeQuantize; + subgraph.quantizationLayers[i] = fakeQuantize; + quantizationLayersDetails.push_back(QuantizationDetails::getDetails(fakeQuantize)); } FakeQuantizeDequantization dequantization; diff --git a/inference-engine/src/low_precision_transformations/src/concat_multi_channels.cpp b/inference-engine/src/low_precision_transformations/src/concat_multi_channels.cpp index 5eee9e2b2907de..4fff73d65899fd 100644 --- a/inference-engine/src/low_precision_transformations/src/concat_multi_channels.cpp +++ b/inference-engine/src/low_precision_transformations/src/concat_multi_channels.cpp @@ -1,4 +1,4 @@ -// Copyright (C) 2020 Intel Corporation +// Copyright (C) 2020-2021 Intel Corporation // SPDX-License-Identifier: Apache-2.0 // @@ -76,18 +76,33 @@ bool ConcatMultiChannelsTransformation::transform(TransformationContext& context } } + for (size_t i = 0; i < subgraph.quantizationLayers.size(); ++i) { + const std::shared_ptr fq = ngraph::as_type_ptr(subgraph.quantizationLayers[i]); + if (fq == nullptr) { + return false; + } + + if (!NetworkHelper::isQuantizeSupported(fq)) { + return false; + } + } + std::unordered_map dequantizations; for (size_t i = 0; i < subgraph.quantizationLayers.size(); ++i) { const std::shared_ptr& fakeQuantizeLayer = subgraph.quantizationLayers[i]; - const ngraph::Shape shape = fakeQuantizeLayer->get_output_shape(0); - if (shape.size() < 4ul) { - return false; + + std::shared_ptr fq = ngraph::as_type_ptr(fakeQuantizeLayer->shared_from_this()); + assert(fq); + + auto newFakeQuantize = NetworkHelper::fuseConvert(fq); + if (newFakeQuantize != nullptr) { + fq = newFakeQuantize; } - const std::shared_ptr fq = ngraph::as_type_ptr(fakeQuantizeLayer->shared_from_this()); - if (fq == nullptr) { - return false; + newFakeQuantize = NetworkHelper::composeFakeQuantize(fq); + if (newFakeQuantize != nullptr) { + fq = newFakeQuantize; } const DataPrecision currentDataPrecision = getDataPrecision(fq, QuantizationDetails::getDetails(fq), false); @@ -201,8 +216,7 @@ void ConcatMultiChannelsTransformation::updateDequantizationShapesIfNecessary( FakeQuantizeDequantization replacedDequantization = dequantizationByFakeQuantize[fakeQuantizes[i]->get_friendly_name()]; const float scale = as_type_ptr(replacedDequantization.multiply->get_input_node_shared_ptr(1))->cast_vector()[0]; - const float shift = replacedDequantization.subtract ? - as_type_ptr(replacedDequantization.subtract->get_input_node_shared_ptr(1))->cast_vector()[0] : 0.f; + const float shift = replacedDequantization.subtract ? replacedDequantization.subtractConstant->cast_vector()[0] : 0.f; const auto precisionBefore = replacedDequantization.data.get_element_type(); const auto precisionAfter = replacedDequantization.multiply->get_element_type(); diff --git a/inference-engine/src/low_precision_transformations/src/convolution.cpp b/inference-engine/src/low_precision_transformations/src/convolution.cpp index 734dd176c5def4..705610f97ae93f 100644 --- a/inference-engine/src/low_precision_transformations/src/convolution.cpp +++ b/inference-engine/src/low_precision_transformations/src/convolution.cpp @@ -1,4 +1,4 @@ -// Copyright (C) 2020 Intel Corporation +// Copyright (C) 2020-2021 Intel Corporation // SPDX-License-Identifier: Apache-2.0 // @@ -24,7 +24,12 @@ void ConvolutionTransformation::registerMatcherIn(GraphRewrite &pass, Transforma addPattern( pass, context, - make_op_pattern({ make_op_label(), make_op_label()})); + make_op_pattern({ make_op_label(), make_op_label() })); + + addPattern( + pass, + context, + make_op_pattern({ make_op_label(), make_op_label() })); } bool ConvolutionTransformation::isQuantized(std::shared_ptr layer) const noexcept { @@ -51,7 +56,7 @@ bool ConvolutionTransformation::transform(TransformationContext &context, ngraph return false; } - convolution = separateInStandaloneBranch(convolution); + convolution = NetworkHelper::separateInStandaloneBranch(convolution); dequantization = NetworkHelper::getDequantization(convolution); { @@ -156,6 +161,18 @@ bool ConvolutionTransformation::transform(TransformationContext &context, ngraph decomposeFakeQuantizeForWeightsPath(convolution); std::shared_ptr reshapeFromWeights = as_type_ptr(convolution->input_value(1).get_node_shared_ptr()); + + const auto dequantization = reshapeFromWeights == nullptr ? + NetworkHelper::getDequantization(convolution, 1ul) : + NetworkHelper::getDequantization(reshapeFromWeights); + assert(!dequantization.empty()); + if (is_type(dequantization.data.get_node())) { + const std::shared_ptr fq = as_type_ptr(dequantization.data.get_node_shared_ptr()); + std::shared_ptr newFQ = NetworkHelper::fold_fake_quantize(fq, true); + NetworkHelper::copyInfo(fq, newFQ); + replace_node(fq, newFQ); + } + std::shared_ptr multiplyFromWeights = as_type_ptr( reshapeFromWeights == nullptr ? convolution->input_value(1).get_node_shared_ptr() : @@ -164,8 +181,10 @@ bool ConvolutionTransformation::transform(TransformationContext &context, ngraph { Shape newScaleShape = multiplyFromWeights->get_input_shape(1); - // that's all we need: [C, 1, 1, 1] => [C, 1, 1] - newScaleShape.pop_back(); + if (!newScaleShape.empty()) { + // that's all we need: [C, 1, 1, 1] => [C, 1, 1] + newScaleShape.pop_back(); + } if (reshapeFromWeights != nullptr) { reshapeFromWeights = as_type_ptr(reshapeFromWeights->copy_with_new_inputs({ @@ -189,9 +208,13 @@ bool ConvolutionTransformation::transform(TransformationContext &context, ngraph } if (subtractFromWeights != nullptr) { + // optimize zero point on weights auto optimizedSubtract = NetworkHelper::optimizeSubtract(subtractFromWeights); + // TODO: handle optimizedSubtract == nullptr; - if (optimizedSubtract != nullptr) { + if (optimizedSubtract == nullptr) { + subtractFromWeights = nullptr; + } else { subtractFromWeights = as_type_ptr(optimizedSubtract); const Shape weightsShape = subtractFromWeights->input(0).get_shape(); @@ -208,8 +231,8 @@ bool ConvolutionTransformation::transform(TransformationContext &context, ngraph std::shared_ptr convertFromWeights = as_type_ptr(subtractFromWeights == nullptr ? multiplyFromWeights->get_input_node_shared_ptr(0) : subtractFromWeights->get_input_node_shared_ptr(0)); - if (convertFromWeights != nullptr) { + // remove Convert on weights std::shared_ptr childNode = reshapeFromWeights == nullptr ? convolution : reshapeFromWeights; auto newConvolution = convolution->clone_with_new_inputs({ @@ -223,6 +246,7 @@ bool ConvolutionTransformation::transform(TransformationContext &context, ngraph reshapeFromWeights = as_type_ptr(convolution->get_input_node_shared_ptr(1)); if (reshapeFromWeights != nullptr) { + // remove Reshape on weights const std::shared_ptr newWeights = fold_reshape( reshapeFromWeights->input_value(0), reshapeFromWeights->input_value(1), diff --git a/inference-engine/src/low_precision_transformations/src/depth_to_space.cpp b/inference-engine/src/low_precision_transformations/src/depth_to_space.cpp index 99bb398da45126..98c85d6c39cc3f 100644 --- a/inference-engine/src/low_precision_transformations/src/depth_to_space.cpp +++ b/inference-engine/src/low_precision_transformations/src/depth_to_space.cpp @@ -1,4 +1,4 @@ -// Copyright (C) 2020 Intel Corporation +// Copyright (C) 2020-2021 Intel Corporation // SPDX-License-Identifier: Apache-2.0 // @@ -23,10 +23,12 @@ void DepthToSpaceTransformation::registerMatcherIn(GraphRewrite& pass, Transform } bool DepthToSpaceTransformation::transform(TransformationContext &context, ngraph::pattern::Matcher &m) const { - const std::shared_ptr depthToSpace = separateInStandaloneBranch(m.get_match_root()); + std::shared_ptr depthToSpace = m.get_match_root(); if (!canBeTransformed(context, depthToSpace)) { return false; } + + depthToSpace = NetworkHelper::separateInStandaloneBranch(depthToSpace); moveDequantizationAfter(context, depthToSpace, NetworkHelper::getDequantization(depthToSpace), true); return true; } @@ -49,8 +51,7 @@ bool DepthToSpaceTransformation::canBeTransformed(const TransformationContext& c } if (dequantization.subtract != nullptr) { - auto subtractConst = as_type_ptr(dequantization.subtract->get_input_node_shared_ptr(1)); - if (!NetworkHelper::isScalarLike(subtractConst)) { + if (!NetworkHelper::isScalarLike(dequantization.subtractConstant)) { return false; } } diff --git a/inference-engine/src/low_precision_transformations/src/disable_convert_constant_folding_on_const_path.cpp b/inference-engine/src/low_precision_transformations/src/disable_convert_constant_folding_on_const_path.cpp new file mode 100644 index 00000000000000..aecd2dfdd939e2 --- /dev/null +++ b/inference-engine/src/low_precision_transformations/src/disable_convert_constant_folding_on_const_path.cpp @@ -0,0 +1,63 @@ +// Copyright (C) 2021 Intel Corporation +// SPDX-License-Identifier: Apache-2.0 +// + +#include "low_precision/disable_convert_constant_folding_on_const_path.hpp" + +#include +#include +#include + +#include +#include +#include +#include +#include +#include "transformations/rt_info/dequantization_attribute.hpp" + +using namespace ngraph; + +NGRAPH_RTTI_DEFINITION(ngraph::pass::DisableConvertConstantFoldingOnConstPath, "DisableConvertConstantFoldingOnConstPath", 0); + +ngraph::pass::DisableConvertConstantFoldingOnConstPath::DisableConvertConstantFoldingOnConstPath( + const std::vector& inputPrecisions) { + auto matcherData = ngraph::pattern::any_input(); + auto matcherConvert = ngraph::pattern::wrap_type({ matcherData }, pattern::consumers_count(1)); + + ngraph::matcher_pass_callback callback = [=](ngraph::pattern::Matcher & m) -> bool { + const auto& opsMap = m.get_pattern_value_map(); + const auto convert = opsMap.find(matcherConvert)->second.get_node()->shared_from_this(); + + // validation by Convert operation input precisions + if (!inputPrecisions.empty()) { + const ngraph::element::Type inputPrecision = convert->input(0).get_element_type(); + if (std::find(inputPrecisions.begin(), inputPrecisions.end(), inputPrecision) == inputPrecisions.end()) { + return false; + } + } + + // Constant subgraph has not be folded if Convert is part of dequantization operations: + // + // Constant Constant + // | | + // Convert Constant OR Convert Constant + // \ / \ / + // Subtract Constant Multiply + // \ / + // Multiply + // + auto parent = convert->get_input_node_ptr(0); + auto child = convert->output(0).get_target_inputs().begin()->get_node(); + if (is_type(parent) && + (is_type(child) || is_type(child))) { + auto& rtInfo = convert->get_rt_info(); + rtInfo["DISABLED_CONSTANT_FOLDING"] = std::make_shared>(""); + return true; + } + + return false; + }; + + auto m = std::make_shared(matcherConvert, "DisableConvertConstantFoldingOnConstPath"); + this->register_matcher(m, callback); +} diff --git a/inference-engine/src/low_precision_transformations/src/eltwise_base_transformation.cpp b/inference-engine/src/low_precision_transformations/src/eltwise_base_transformation.cpp index 155e4badf99434..80eb83f8ea949a 100644 --- a/inference-engine/src/low_precision_transformations/src/eltwise_base_transformation.cpp +++ b/inference-engine/src/low_precision_transformations/src/eltwise_base_transformation.cpp @@ -1,4 +1,4 @@ -// Copyright (C) 2020 Intel Corporation +// Copyright (C) 2020-2021 Intel Corporation // SPDX-License-Identifier: Apache-2.0 // #include "low_precision/eltwise_base_transformation.hpp" @@ -83,13 +83,13 @@ bool EltwiseBaseTransformation::canBeTransformed(const TransformationContext& co } int EltwiseBaseTransformation::getNotEmpty(const std::shared_ptr& eltwise) const { - FakeQuantizeDequantization dequantization1 = pass::low_precision::NetworkHelper::getDequantization(eltwise, 0ul); - if (dequantization1.empty()) { + const FakeQuantizeDequantization dequantization1 = pass::low_precision::NetworkHelper::getDequantization(eltwise, 0ul); + if (dequantization1.empty() || as_type(dequantization1.data.get_node())) { return -1; } - FakeQuantizeDequantization dequantization2 = pass::low_precision::NetworkHelper::getDequantization(eltwise, 1ul); - if (dequantization2.empty()) { + const FakeQuantizeDequantization dequantization2 = pass::low_precision::NetworkHelper::getDequantization(eltwise, 1ul); + if (dequantization2.empty() || as_type(dequantization2.data.get_node())) { return -1; } @@ -141,16 +141,22 @@ int EltwiseBaseTransformation::getNotEmpty(const std::shared_ptr& eltwise) } std::pair EltwiseBaseTransformation::getMultiplyConstBranch(const std::shared_ptr& eltwise) const { - std::shared_ptr parent1 = eltwise->get_input_node_shared_ptr(0); - std::shared_ptr parent2 = eltwise->get_input_node_shared_ptr(1); - - std::shared_ptr constParent = as_type_ptr(parent1); + const std::shared_ptr parent1 = eltwise->get_input_node_shared_ptr(0); + const auto dequantization1 = NetworkHelper::getDequantization(eltwise, 0); + const std::shared_ptr parent2 = eltwise->get_input_node_shared_ptr(1); + const auto dequantization2 = NetworkHelper::getDequantization(eltwise, 1); + + std::shared_ptr constParent = dequantization1.empty() ? + as_type_ptr(parent1) : + as_type_ptr(dequantization1.data.get_node_shared_ptr()); std::shared_ptr multiplyParent = as_type_ptr(parent2); int multiplyBranch = 1; if (constParent == nullptr || multiplyParent == nullptr) { - constParent = as_type_ptr(parent2); + constParent = dequantization2.empty() ? + as_type_ptr(parent2) : + as_type_ptr(dequantization2.data.get_node_shared_ptr()); multiplyParent = as_type_ptr(parent1); multiplyBranch = 0; } diff --git a/inference-engine/src/low_precision_transformations/src/fake_quantize.cpp b/inference-engine/src/low_precision_transformations/src/fake_quantize.cpp index e872a2c83bead9..7169d5ca32d7dc 100644 --- a/inference-engine/src/low_precision_transformations/src/fake_quantize.cpp +++ b/inference-engine/src/low_precision_transformations/src/fake_quantize.cpp @@ -1,21 +1,12 @@ -// Copyright (C) 2020 Intel Corporation +// Copyright (C) 2020-2021 Intel Corporation // SPDX-License-Identifier: Apache-2.0 // #include "low_precision/fake_quantize.hpp" -#include -#include -#include -#include #include -#include -#include -#include - #include -#include "low_precision/common/ie_lpt_exception.hpp" #include "low_precision/network_helper.hpp" namespace ngraph { @@ -28,94 +19,16 @@ void FakeQuantizeTransformation::registerMatcherIn(GraphRewrite& pass, Transform bool FakeQuantizeTransformation::transform(TransformationContext& context, ngraph::pattern::Matcher &m) const { std::shared_ptr layer = std::dynamic_pointer_cast(m.get_match_root()); + if (!NetworkHelper::isQuantizeSupported(layer)) { + return false; + } std::shared_ptr fakeQuantize = layer; - do { layer = fakeQuantize; fakeQuantize = fuseElementwise(context, fakeQuantize); } while (fakeQuantize != nullptr); - const ngraph::element::Type precision = layer->get_output_element_type(0); - if ((precision == ngraph::element::i8) || (precision == ngraph::element::u8)) { - return false; - } - - // FakeQuantize on weights are used without dequantization ScaleShifts - if (NetworkHelper::onWeights(layer)) { - return false; - } - - if (as_type(layer->get_input_node_ptr(0))) { - bool nextOpearionsWillBeNotHandled = true; - for (auto output : layer->outputs()) { - for (auto input : output.get_target_inputs()) { - auto activations = paramsManager->getPrecisionsOnActivations(*input.get_node()); - if (paramsManager->getPrecisionsOnActivations(*input.get_node()).size() != 0ul) { - nextOpearionsWillBeNotHandled = false; - break; - } - } - - if (!nextOpearionsWillBeNotHandled) { - break; - } - } - - if (nextOpearionsWillBeNotHandled) { - const std::shared_ptr resultConstant = NetworkHelper::fold_fake_quantize(layer); - if (as_type_ptr(resultConstant)) { - replace_node(layer, resultConstant); - return true; - } - } - } - - if (!QuantizationDetails::outputLayoutIsSupported(layer)) { - return false; - } - - if (!QuantizationDetails::isSupportedLevel(layer->get_levels())) { - return false; - } - - const QuantizationDetails quantizationDetails = QuantizationDetails::getDetails(layer); - const DataPrecision dataPrecision = getDataPrecision(layer, quantizationDetails, false); - if (dataPrecision.precision == element::undefined) { - return false; - } - - // Split FakeQuantize to two parts: Quantize and Dequantize - auto QDQ = NetworkHelper::decomposeFakeQuantize( - as_type_ptr(layer), - dataPrecision.precision, - dataPrecision.min, - dataPrecision.max, - dataPrecision.hasZeroPoint, - updatePrecisions); - -#ifdef LPT_PRINT_DEQUANTIZATION_INFO - { - const std::shared_ptr multiply = as_type_ptr(std::get<1>(QDQ)); - const std::shared_ptr multiplyConst = as_type_ptr(multiply->get_input_node_shared_ptr(1)); - const std::vector dequantizationScales = multiplyConst->cast_vector(); - - const std::shared_ptr subtract = as_type_ptr(multiply->get_input_node_shared_ptr(0)); - std::vector dequantizationShifts; - if (subtract != nullptr) { - const std::shared_ptr subtractConst = as_type_ptr(subtract->get_input_node_shared_ptr(1)); - dequantizationShifts = subtractConst->cast_vector(); - } else { - dequantizationShifts = std::vector(dequantizationScales.size()); - } - - printDequantizationValues(dequantizationScales, dequantizationShifts); - } -#endif - - std::shared_ptr dequantize = std::get<1>(QDQ); - updateOutput(context, dequantize, layer); - return true; } diff --git a/inference-engine/src/low_precision_transformations/src/fake_quantize_decomposition.cpp b/inference-engine/src/low_precision_transformations/src/fake_quantize_decomposition.cpp new file mode 100644 index 00000000000000..d295258a2c898d --- /dev/null +++ b/inference-engine/src/low_precision_transformations/src/fake_quantize_decomposition.cpp @@ -0,0 +1,169 @@ +// Copyright (C) 2021 Intel Corporation +// SPDX-License-Identifier: Apache-2.0 +// + +#include "low_precision/fake_quantize_decomposition.hpp" + +#include +#include + +#include "low_precision/common/ie_lpt_exception.hpp" +#include "low_precision/network_helper.hpp" + +namespace ngraph { +namespace pass { +namespace low_precision { + +void FakeQuantizeDecompositionTransformation::registerMatcherIn(GraphRewrite& pass, TransformationContext& context) const { + addSingleNodePattern(pass, context); +} + +bool FakeQuantizeDecompositionTransformation::transform(TransformationContext& context, ngraph::pattern::Matcher &m) const { + std::shared_ptr layer = std::dynamic_pointer_cast(m.get_match_root()); + if (!NetworkHelper::isQuantizeSupported(layer)) { + return false; + } + + layer = NetworkHelper::fuseConvert(layer); + if (NetworkHelper::isConstantPath(layer)) { + // fold fq if constant just before fq and child layers aren't supported in LPT + if (as_type(layer->get_input_node_ptr(0))) { + bool nextOpearionsWillBeNotHandled = true; + for (auto output : layer->outputs()) { + for (auto input : output.get_target_inputs()) { + const auto node = input.get_node(); + + if (as_type(node)) { + for (const auto& child : NetworkHelper::consumers(node->shared_from_this())) { + if ((as_type_ptr(child)) && + (paramsManager->getPrecisionsOnActivations(*child).size() != 0ul)) { + nextOpearionsWillBeNotHandled = false; + break; + } + } + } + + if (paramsManager->getPrecisionsOnActivations(*input.get_node()).size() != 0ul) { + nextOpearionsWillBeNotHandled = false; + break; + } + } + + if (!nextOpearionsWillBeNotHandled) { + break; + } + } + + if (nextOpearionsWillBeNotHandled) { + const std::shared_ptr resultConstant = NetworkHelper::fold_fake_quantize(layer); + if (as_type_ptr(resultConstant)) { + replace_node(layer, resultConstant); + return true; + } + } + } + return false; + } + + const ngraph::element::Type precision = layer->get_output_element_type(0); + if (DataPrecision::isSupported(precision)) { + const QuantizationDetails quantizationDetails = QuantizationDetails::getDetails(layer); + const FakeQuantizeDequantization dequantization = NetworkHelper::getDequantizationBelow(layer); + if (dequantization.empty()) { + return false; + } + + const DataPrecision expectedDataPrecision = getDataPrecision(dequantization.multiply, quantizationDetails, false); + if (expectedDataPrecision.precision == element::undefined) { + return false; + } + + if (expectedDataPrecision.precision == precision) { + return false; + } + + layer = NetworkHelper::composeFakeQuantize(layer); + if (layer == nullptr) { + return false; + } + } + + if (as_type(layer->get_input_node_ptr(0))) { + bool nextOpearionsWillBeNotHandled = true; + for (auto output : layer->outputs()) { + for (auto input : output.get_target_inputs()) { + auto activations = paramsManager->getPrecisionsOnActivations(*input.get_node()); + if (paramsManager->getPrecisionsOnActivations(*input.get_node()).size() != 0ul) { + nextOpearionsWillBeNotHandled = false; + break; + } + } + + if (!nextOpearionsWillBeNotHandled) { + break; + } + } + + if (nextOpearionsWillBeNotHandled) { + const std::shared_ptr resultConstant = NetworkHelper::fold_fake_quantize(layer); + if (as_type_ptr(resultConstant)) { + replace_node(layer, resultConstant); + return true; + } + } + } + + if (!QuantizationDetails::outputLayoutIsSupported(layer)) { + return false; + } + + if (!QuantizationDetails::isSupportedLevel(layer->get_levels())) { + return false; + } + + const QuantizationDetails quantizationDetails = QuantizationDetails::getDetails(layer); + const DataPrecision dataPrecision = getDataPrecision(layer, quantizationDetails, false); + if (dataPrecision.precision == element::undefined) { + return false; + } + + // Split FakeQuantize to two parts: Quantize and Dequantize + auto QDQ = NetworkHelper::decomposeFakeQuantize( + as_type_ptr(layer), + dataPrecision.precision, + dataPrecision.min, + dataPrecision.max, + dataPrecision.hasZeroPoint, + updatePrecisions); + +#ifdef LPT_PRINT_DEQUANTIZATION_INFO + { + const std::shared_ptr multiply = as_type_ptr(std::get<1>(QDQ)); + const std::shared_ptr multiplyConst = as_type_ptr(multiply->get_input_node_shared_ptr(1)); + const std::vector dequantizationScales = multiplyConst->cast_vector(); + + const std::shared_ptr subtract = as_type_ptr(multiply->get_input_node_shared_ptr(0)); + std::vector dequantizationShifts; + if (subtract != nullptr) { + const std::shared_ptr subtractConst = as_type_ptr(subtract->get_input_node_shared_ptr(1)); + dequantizationShifts = subtractConst->cast_vector(); + } else { + dequantizationShifts = std::vector(dequantizationScales.size()); + } + + printDequantizationValues(dequantizationScales, dequantizationShifts); + } +#endif + + std::shared_ptr dequantize = std::get<1>(QDQ); + updateOutput(context, dequantize, layer); + + return true; +} + +bool FakeQuantizeDecompositionTransformation::isPrecisionPreserved(std::shared_ptr layer) const noexcept { + return false; +} +} // namespace low_precision +} // namespace pass +} // namespace ngraph diff --git a/inference-engine/src/low_precision_transformations/src/fake_quantize_dequantization.cpp b/inference-engine/src/low_precision_transformations/src/fake_quantize_dequantization.cpp index 9244cd1159a00e..a3a88e511b8493 100644 --- a/inference-engine/src/low_precision_transformations/src/fake_quantize_dequantization.cpp +++ b/inference-engine/src/low_precision_transformations/src/fake_quantize_dequantization.cpp @@ -1,4 +1,4 @@ -// Copyright (C) 2020 Intel Corporation +// Copyright (C) 2020-2021 Intel Corporation // SPDX-License-Identifier: Apache-2.0 // @@ -16,14 +16,20 @@ namespace low_precision { FakeQuantizeDequantization::FakeQuantizeDequantization() {} FakeQuantizeDequantization::FakeQuantizeDequantization( - Output data, - std::shared_ptr convert, - std::shared_ptr subtract, - std::shared_ptr multiply) : + const Output& data, + const std::shared_ptr& convert, + const std::shared_ptr& subtract, + const std::shared_ptr& subtractConvert, + const std::shared_ptr& subtractConstant, + const std::shared_ptr& multiply, + const std::shared_ptr& multiplyConstant) : data(data), convert(convert), subtract(subtract), - multiply(multiply) { + subtractConvert(subtractConvert), + subtractConstant(subtractConstant), + multiply(multiply), + multiplyConstant(multiplyConstant) { } bool FakeQuantizeDequantization::empty() const { @@ -67,19 +73,33 @@ bool FakeQuantizeDequantization::isLowPrecision() const { return (data.get_element_type() == element::i8) || (data.get_element_type() == element::u8); } +bool FakeQuantizeDequantization::checkShape(const std::shared_ptr& elementwise) noexcept { + std::shared_ptr convert; + std::shared_ptr constant; + const int branchIndex = FakeQuantizeDequantization::fillDequantizationParams(elementwise, convert, constant); + if (branchIndex == -1) { + return true; + } + + if (elementwise->output(0).get_shape() != elementwise->get_input_shape(branchIndex == 1 ? 0 : 1)) { + return false; + } + + return true; +} + bool FakeQuantizeDequantization::checkElementwise(const std::shared_ptr& dequantizationElementwise) { const ngraph::PartialShape partialShape = dequantizationElementwise->get_input_partial_shape(0); if (partialShape.is_dynamic()) { return false; } - std::shared_ptr constant = as_type_ptr(dequantizationElementwise->get_input_node_shared_ptr(1)); - if (constant == nullptr) { - constant = as_type_ptr(dequantizationElementwise->get_input_node_shared_ptr(0)); - } + std::shared_ptr convert; + std::shared_ptr constant; + FakeQuantizeDequantization::fillDequantizationParams(dequantizationElementwise, convert, constant); + if (constant == nullptr) { - THROW_IE_LPT_EXCEPTION(*dequantizationElementwise) << "unexpected operation type " << - dequantizationElementwise->get_type_info().name << " on the second branch"; + return false; } const ngraph::Shape constShape = constant->get_output_shape(0); @@ -117,6 +137,52 @@ bool FakeQuantizeDequantization::checkElementwise(const std::shared_ptr& elementwise, + std::shared_ptr& convert, + std::shared_ptr& constant) noexcept { + auto fill = []( + const std::shared_ptr& elementwise, + const size_t branchIndex, + std::shared_ptr& convert, + std::shared_ptr& constant) { + convert = as_type_ptr(elementwise->get_input_node_shared_ptr(branchIndex)); + if (convert != nullptr) { + constant = as_type_ptr(convert->get_input_node_shared_ptr(0)); + } else { + constant = as_type_ptr(elementwise->get_input_node_shared_ptr(branchIndex)); + } + }; + + fill(elementwise, 1ul, convert, constant); + if (constant != nullptr) { + return 1; + } + + fill(elementwise, 0ul, convert, constant); + if (constant != nullptr) { + return 0; + } + + return -1; +} + +int FakeQuantizeDequantization::fillDequantizationParams( + const std::shared_ptr& elementwise, + std::shared_ptr& constant) noexcept { + constant = as_type_ptr(elementwise->get_input_node_shared_ptr(1ul)); + if (constant != nullptr) { + return 1; + } + + constant = as_type_ptr(elementwise->get_input_node_shared_ptr(0ul)); + if (constant != nullptr) { + return 0; + } + + return -1; +} + } // namespace low_precision } // namespace pass } // namespace ngraph diff --git a/inference-engine/src/low_precision_transformations/src/fold_convert.cpp b/inference-engine/src/low_precision_transformations/src/fold_convert.cpp new file mode 100644 index 00000000000000..16b0c93ac35435 --- /dev/null +++ b/inference-engine/src/low_precision_transformations/src/fold_convert.cpp @@ -0,0 +1,45 @@ +// Copyright (C) 2021 Intel Corporation +// SPDX-License-Identifier: Apache-2.0 +// + +#include "low_precision/fold_convert.hpp" +#include +#include +#include "low_precision/fake_quantize.hpp" +#include "low_precision/network_helper.hpp" + +namespace ngraph { +namespace pass { +namespace low_precision { + +void FoldConvertTransformation::registerMatcherIn(GraphRewrite &pass, TransformationContext &context) const { + addSingleNodePattern(pass, context); +} + +bool FoldConvertTransformation::transform(TransformationContext& context, ngraph::pattern::Matcher &m) const { + const auto subtract = m.get_match_root(); + if (!canBeTransformed(context, subtract)) { + return false; + } + + const auto convert = subtract->get_input_node_shared_ptr(1); + const auto resultConstant = fold(convert->get_input_node_shared_ptr(0), convert->output(0).get_element_type()); + + replace_node(convert, resultConstant); + updateOutput(context, resultConstant, convert); + return true; +} + +bool FoldConvertTransformation::canBeTransformed(const TransformationContext& context, std::shared_ptr operation) const { + return + is_type(operation->get_input_node_ptr(1)) && + is_type(operation->get_input_node_ptr(1)->get_input_node_ptr(0)); +} + +bool FoldConvertTransformation::isPrecisionPreserved(std::shared_ptr layer) const noexcept { + return false; +} + +} // namespace low_precision +} // namespace pass +} // namespace ngraph diff --git a/inference-engine/src/low_precision_transformations/src/interpolate.cpp b/inference-engine/src/low_precision_transformations/src/interpolate.cpp index 48c00f43716a17..a1a460e213d046 100644 --- a/inference-engine/src/low_precision_transformations/src/interpolate.cpp +++ b/inference-engine/src/low_precision_transformations/src/interpolate.cpp @@ -1,4 +1,4 @@ -// Copyright (C) 2020 Intel Corporation +// Copyright (C) 2020-2021 Intel Corporation // SPDX-License-Identifier: Apache-2.0 // @@ -37,7 +37,7 @@ bool InterpolateTransformation::transform(TransformationContext &context, ngraph if (!canBeTransformed(context, m.get_match_root())) { return false; } - interpolate = separateInStandaloneBranch(interpolate); + interpolate = NetworkHelper::separateInStandaloneBranch(interpolate); moveDequantizationAfter(context, interpolate, NetworkHelper::getDequantization(interpolate), true); return true; } diff --git a/inference-engine/src/low_precision_transformations/src/layer_transformation.cpp b/inference-engine/src/low_precision_transformations/src/layer_transformation.cpp index b2d904746f57d9..a7928488c0cbbb 100644 --- a/inference-engine/src/low_precision_transformations/src/layer_transformation.cpp +++ b/inference-engine/src/low_precision_transformations/src/layer_transformation.cpp @@ -1,4 +1,4 @@ -// Copyright (C) 2020 Intel Corporation +// Copyright (C) 2020-2021 Intel Corporation // SPDX-License-Identifier: Apache-2.0 // @@ -113,6 +113,20 @@ bool LayerTransformation::canBeTransformed(const TransformationContext& context, return true; } +bool LayerTransformation::canBeTransformedSpecialDimension(const TransformationContext& context, std::shared_ptr layer) const { + if (!isQuantized(layer)) { + return false; + } + + for (const auto& output : layer->outputs()) { + const size_t size = output.get_shape().size(); + if ((size < 2ul) || (size > 5ul)) { + return false; + } + } + return true; +} + bool LayerTransformation::canSubtractBeHandled(const std::shared_ptr& op, const size_t parentIndex) const { return canSubtractBeHandled(op, NetworkHelper::getDequantization(op, parentIndex)); } @@ -138,7 +152,17 @@ bool LayerTransformation::canSubtractBeHandled(const std::shared_ptr& op, return false; } - return true; + const auto parent = dequantization.subtract->input_value(1).get_node_shared_ptr(); + + if (is_type(parent)) { + return true; + } else if (is_type(parent) && is_type(parent->get_input_node_shared_ptr(0))) { + const auto constant = parent->get_input_node_shared_ptr(0); + const auto constantType = constant->output(0).get_element_type(); + return operationType == constantType; + } else { + return false; + } } #ifdef LPT_PRINT_DEQUANTIZATION_INFO @@ -156,7 +180,7 @@ std::stringstream toStream(const std::vector& dequantizationValues) { void LayerTransformation::printDequantizationInfo(const std::shared_ptr& layer) { const QuantizationDetails quantizationDetails = QuantizationDetails::getDetails(as_type_ptr(layer)); std::cout << - layer->get_type_name() << (NetworkHelper::onWeights(layer) ? " on weights " : " on activations ") << + layer->get_type_name() << (NetworkHelper::isConstantPath(layer) ? " on weights " : " on activations ") << layer->get_friendly_name() << ":" << std::endl << " details : " << quantizationDetails << std::endl; } @@ -378,44 +402,6 @@ std::vector> LayerTransformation::getChildrenRecursivelyEx return resultChildren; } - -std::shared_ptr LayerTransformation::separateInStandaloneBranch(std::shared_ptr node) const { - FakeQuantizeDequantization dequantization = NetworkHelper::getDequantization(node); - if (dequantization.isShared()) { - Output parent = dequantization.data; - if (dequantization.convert != nullptr) { - parent = dequantization.convert->clone_with_new_inputs({ parent }); - parent.get_node_shared_ptr()->set_friendly_name(parent.get_node_shared_ptr()->get_name() + "_new"); - } - - if (dequantization.subtract != nullptr) { - parent = dequantization.subtract->clone_with_new_inputs({ - parent, - dequantization.subtract->get_input_node_shared_ptr(1)->clone_with_new_inputs({}) }); - parent.get_node_shared_ptr()->set_friendly_name(parent.get_node_shared_ptr()->get_name() + "_new"); - } - - if (dequantization.multiply != nullptr) { - parent = dequantization.multiply->clone_with_new_inputs({ - parent, - dequantization.multiply->get_input_node_shared_ptr(1)->clone_with_new_inputs({}) }); - parent.get_node_shared_ptr()->set_friendly_name(parent.get_node_shared_ptr()->get_name() + "_new"); - } - - std::vector> inputs = NetworkHelper::getInputs(node); - const size_t inputIndex = NetworkHelper::getChildInputIndex(dequantization.multiply, node); - inputs[inputIndex] = parent; - const std::shared_ptr newNode = node->clone_with_new_inputs(inputs); - - replace_node(node, newNode); - newNode->set_friendly_name(node->get_friendly_name()); - - return newNode; - } - - return node; -} - std::shared_ptr LayerTransformation::moveDequantizationAfter( TransformationContext &context, const std::shared_ptr& operation, @@ -427,21 +413,6 @@ std::shared_ptr LayerTransformation::moveDequantizationAfter( return result.newOperation; } -void LayerTransformation::fuseConvertIfPossible(const std::shared_ptr& operation) const { - FakeQuantizeDequantization dequantization = NetworkHelper::getDequantization(operation, 0); - if ((dequantization.subtract != nullptr) && - NetworkHelper::checkConstantValuePrecision( - dequantization.convert->get_output_element_type(0), - dequantization.subtract->get_input_node_shared_ptr(1))) { - auto newOperation = separateInStandaloneBranch(operation); - dequantization = NetworkHelper::getDequantization(operation, 0); - // TODO: It is correct to use optimizeSubtract here: uncomment following rows and fix it - //auto newSubtract = NetworkHelper::optimizeSubtract(dequantization.subtract); - //replace_node(dequantization.subtract, newSubtract); - NetworkHelper::removeConvertIfPossible(operation, dequantization); - } -} - void LayerTransformation::updateOutput( TransformationContext &context, std::shared_ptr lastNode, diff --git a/inference-engine/src/low_precision_transformations/src/mat_mul.cpp b/inference-engine/src/low_precision_transformations/src/mat_mul.cpp index e7ccf91639ad91..212a8e8d11ab14 100644 --- a/inference-engine/src/low_precision_transformations/src/mat_mul.cpp +++ b/inference-engine/src/low_precision_transformations/src/mat_mul.cpp @@ -1,4 +1,4 @@ -// Copyright (C) 2020 Intel Corporation +// Copyright (C) 2020-2021 Intel Corporation // SPDX-License-Identifier: Apache-2.0 // @@ -22,7 +22,7 @@ bool MatMulTransformation::transform(TransformationContext &context, ngraph::pat return false; } - matMul = as_type_ptr(separateInStandaloneBranch(matMul)); + matMul = as_type_ptr(NetworkHelper::separateInStandaloneBranch(matMul)); FakeQuantizeDequantization dequantization2 = ngraph::pass::low_precision::NetworkHelper::getDequantization(matMul, 1); if (dequantization2.empty()) { @@ -45,27 +45,50 @@ bool MatMulTransformation::transform(TransformationContext &context, ngraph::pat } const FakeQuantizeDequantization dequantization1 = ngraph::pass::low_precision::NetworkHelper::getDequantization(matMul, 0); - std::shared_ptr subtract; - if (dequantization1.subtract != nullptr) { - std::shared_ptr layer = dequantization1.subtract; - ngraph::pass::low_precision::NetworkHelper::cleanRunTimeInfo(layer); - - auto optimizedSubtract = NetworkHelper::optimizeSubtract(dequantization1.subtract); - if (optimizedSubtract == nullptr) { - optimizedSubtract = dequantization1.subtract; - } - subtract = as_type_ptr(optimizedSubtract); + + if (dequantization2.subtract != nullptr) { + NetworkHelper::optimizeSubtract(dequantization2.subtract); + dequantization2 = ngraph::pass::low_precision::NetworkHelper::getDequantization(matMul, 1); } const std::shared_ptr newMatMul = std::make_shared>( std::vector({ element::f32, element::f32 }), std::vector({}), - ngraph::op::TemporaryReplaceOutputType(dequantization1.subtract != nullptr ? subtract : dequantization1.data, element::f32).get(), - ngraph::op::TemporaryReplaceOutputType(dequantization2.subtract != nullptr ? dequantization2.subtract : dequantization2.data, element::f32).get(), + ngraph::op::TemporaryReplaceOutputType(dequantization1.data, element::f32).get(), + ngraph::op::TemporaryReplaceOutputType(dequantization2.data, element::f32).get(), matMul->get_transpose_a(), matMul->get_transpose_b()); NetworkHelper::setOutDataPrecisionForTypeRelaxed(newMatMul, matMul->get_output_element_type(0)); NetworkHelper::copyInfo(matMul, newMatMul); + std::shared_ptr parent = newMatMul; + + // dequantization with subtract on activations & constant weights + if (dequantization1.subtract) { + auto broadcastShape = NetworkHelper::isScalarLike(as_type_ptr(dequantization1.subtract->get_input_node_shared_ptr(1))) ? + ngraph::Shape(dequantization1.subtract->get_shape().size(), 1) : + dequantization1.subtract->get_input_node_shared_ptr(1)->get_shape(); + const size_t lastIdx = matMul->get_transpose_a() ? broadcastShape.size() - 2 : broadcastShape.size() - 1; + broadcastShape[lastIdx] = dequantization1.subtract->get_shape()[lastIdx]; + + // broadcasted sub const to form [1, ..., 1, Y] + const auto broadcastedConst = fold( + dequantization1.subtract->get_input_node_shared_ptr(1), + opset1::Constant::create(ngraph::element::i32, { broadcastShape.size() }, broadcastShape)); + + // multiply by weights: [1, ..., 1, Y] x [Y, Z] => [1, ..., 1, Z] + const auto newSubConst = NetworkHelper::toScalarIfPossible(fold( + broadcastedConst, + fold(newMatMul->get_input_node_shared_ptr(1), newMatMul->get_element_type()), + newMatMul->get_transpose_a(), + newMatMul->get_transpose_b())); + + const auto newSubtract = std::make_shared(newMatMul, newSubConst); + newSubtract->set_friendly_name(newMatMul->get_friendly_name() + "/DequantizationSubtract"); + ngraph::copy_runtime_info({ newSubtract, matMul }, newSubtract); + + parent = newSubtract; + } + auto transpose = [](const std::shared_ptr& node) -> std::shared_ptr { const Shape outputShape = node->get_output_shape(0); if (outputShape.size() < 2ul) { @@ -81,20 +104,35 @@ bool MatMulTransformation::transform(TransformationContext &context, ngraph::pat return transposedConstant; }; - const std::shared_ptr const1 = matMul->get_transpose_a() ? + const auto mulConst1 = matMul->get_transpose_a() ? transpose(dequantization1.multiply->get_input_node_shared_ptr(1)) : dequantization1.multiply->get_input_node_shared_ptr(1); - const std::shared_ptr const2 = matMul->get_transpose_b() ? + auto mulConst2 = matMul->get_transpose_b() ? transpose(dequantization2.multiply->get_input_node_shared_ptr(1)) : dequantization2.multiply->get_input_node_shared_ptr(1); - const std::shared_ptr newMultiply = std::make_shared( - newMatMul, - NetworkHelper::toScalarIfPossible( - fold( - NetworkHelper::toScalar(as_type_ptr(const1)), - const2))); + if (NetworkHelper::isScalarLike(as_type_ptr(mulConst2))) { + mulConst2 = NetworkHelper::toScalar(as_type_ptr(mulConst2)); + } else { + auto constShape = mulConst2->get_shape(); + auto inputShape = matMul->get_input_shape(0); + + // unsqueeze from the left side to make both shapes of the same rank + if (constShape.size() < inputShape.size()) { + Shape unsqueezeConstantShape(inputShape.size() - constShape.size()); + std::iota(unsqueezeConstantShape.begin(), unsqueezeConstantShape.end(), 0ul); + + mulConst2 = fold( + mulConst2, + op::Constant::create(ngraph::element::i32, Shape{ unsqueezeConstantShape.size() }, unsqueezeConstantShape)); + } + } + + const auto newMulConst = NetworkHelper::toScalarIfPossible(fold(mulConst1, mulConst2)); + const std::shared_ptr newMultiply = std::make_shared(parent, newMulConst); + newMultiply->set_friendly_name(newMatMul->get_friendly_name() + "/DequantizationMultiply"); + replace_node(matMul, newMultiply); ngraph::copy_runtime_info({ newMultiply, matMul }, newMultiply); @@ -120,52 +158,80 @@ bool MatMulTransformation::isPrecisionPreserved(std::shared_ptr layer) con } bool MatMulTransformation::canBeTransformed(const TransformationContext& context, std::shared_ptr layer) const { - if (!LayerTransformation::canBeTransformed(context, layer)) { + if (!LayerTransformation::canBeTransformedSpecialDimension(context, layer)) { return false; } - if (!canSubtractBeHandled(layer)) { + std::shared_ptr matMul = as_type_ptr(layer); + if (matMul == nullptr) { return false; } const auto dequantization1 = ngraph::pass::low_precision::NetworkHelper::getDequantization(layer); - if (!NetworkHelper::isScalarLike(as_type_ptr(dequantization1.multiply->get_input_node_shared_ptr(1)))) { - return false; - } + if (!dequantization1.empty()) { + if (updatePrecisions && !dequantization1.isLowPrecision()) { + return false; + } - if (updatePrecisions && !dequantization1.empty() && !dequantization1.isLowPrecision()) { - return false; + const auto mulConst = as_type_ptr(dequantization1.multiply->get_input_node_shared_ptr(1)); + if (!NetworkHelper::isScalarLike(mulConst)) { + const auto constantShape = mulConst->get_shape(); + const auto mulShape = dequantization1.multiply->get_shape(); + const size_t columnsIdx = matMul->get_transpose_a() ? mulShape.size() - 2ul : mulShape.size() - 1ul; + + // dequantization scales by columns in tensor A can't be propagate + if ((constantShape.size() == mulShape.size()) && (constantShape[columnsIdx] != 1)) { + return false; + } + } } - if (updatePrecisions) { - const auto dequantization2 = ngraph::pass::low_precision::NetworkHelper::getDequantization(layer, 1); - if (!dequantization2.empty() && !dequantization2.isLowPrecision()) { + const auto dequantization2 = ngraph::pass::low_precision::NetworkHelper::getDequantization(layer, 1); + if (!dequantization2.empty()) { + if ((updatePrecisions && !dequantization2.isLowPrecision()) || (dequantization2.subtract)) { return false; } + + const auto mulConst = as_type_ptr(dequantization2.multiply->get_input_node_shared_ptr(1)); + if (!NetworkHelper::isScalarLike(mulConst)) { + const auto constantShape = mulConst->get_shape(); + const auto mulShape = dequantization2.multiply->get_shape(); + const size_t rowsIdx = matMul->get_transpose_b() ? mulShape.size() - 1ul : mulShape.size() - 2ul; + + // dequantization scales by rows in tensor B can't be propagate + if ((constantShape.size() == mulShape.size()) && (constantShape[rowsIdx] != 1)) { + return false; + } + } } const auto fakeQuantize = as_type_ptr(layer->get_input_node_shared_ptr(1)); - if (fakeQuantize != nullptr) { + if (fakeQuantize) { if (!QuantizationDetails::outputLayoutIsSupported(fakeQuantize)) { return false; } - std::shared_ptr matMul = as_type_ptr(layer); - if (matMul == nullptr) { + const QuantizationDetails quantizationDetails = QuantizationDetails::getDetails(fakeQuantize); + const DataPrecision dataPrecision = getDataPrecision(fakeQuantize, quantizationDetails, true); + if (dataPrecision.hasZeroPoint) { return false; } - const size_t channelIndex1 = matMul->get_transpose_a() ? 0 : 1; - const size_t channelIndex2 = matMul->get_transpose_b() ? 1 : 0; - - // for MatMul with 3D input the channel is 3'rd dimension (not 2'nd) - const Shape input1 = layer->input(0).get_shape(); - const Shape input2 = layer->input(1).get_shape(); - if ((input1[channelIndex1] != input2[channelIndex2]) && - ((shape_size(dequantization1.multiply->input(1).get_shape()) > 1) || - (shape_size(fakeQuantize->input(3).get_shape()) > 1) || (shape_size(fakeQuantize->input(4).get_shape()) > 1))) { + + const auto outLowShape = fakeQuantize->get_input_node_shared_ptr(3)->get_shape(); + const auto outHighShape = fakeQuantize->get_input_node_shared_ptr(4)->get_shape(); + const auto fakeQuantizeShape = fakeQuantize->get_shape(); + const size_t rowsIdx = matMul->get_transpose_b() ? fakeQuantizeShape.size() - 1 : fakeQuantizeShape.size() - 2; + + // dequantization scales by rows in tensor B can't be propagate + if (((outLowShape.size() == fakeQuantizeShape.size()) && (outLowShape[rowsIdx] != 1)) || + ((outHighShape.size() == fakeQuantizeShape.size()) && (outHighShape[rowsIdx] != 1))) { return false; } } + if (fakeQuantize == nullptr && dequantization1.subtract) { + return false; + } + return true; } diff --git a/inference-engine/src/low_precision_transformations/src/max_pool.cpp b/inference-engine/src/low_precision_transformations/src/max_pool.cpp index de56563f9ff0a3..1204e7442ef3a6 100644 --- a/inference-engine/src/low_precision_transformations/src/max_pool.cpp +++ b/inference-engine/src/low_precision_transformations/src/max_pool.cpp @@ -1,4 +1,4 @@ -// Copyright (C) 2020 Intel Corporation +// Copyright (C) 2020-2021 Intel Corporation // SPDX-License-Identifier: Apache-2.0 // @@ -47,7 +47,7 @@ bool MaxPoolTransformation::transform(TransformationContext& context, ngraph::pa return false; } - const std::shared_ptr pooling = separateInStandaloneBranch(m.get_match_root()); + const std::shared_ptr pooling = NetworkHelper::separateInStandaloneBranch(m.get_match_root()); moveDequantizationAfter(context, pooling, NetworkHelper::getDequantization(pooling), false); return true; } diff --git a/inference-engine/src/low_precision_transformations/src/multiply.cpp b/inference-engine/src/low_precision_transformations/src/multiply.cpp index c89605b07d3dff..992c63ceb90a6f 100644 --- a/inference-engine/src/low_precision_transformations/src/multiply.cpp +++ b/inference-engine/src/low_precision_transformations/src/multiply.cpp @@ -1,4 +1,4 @@ -// Copyright (C) 2018-2020 Intel Corporation +// Copyright (C) 2020-2021 Intel Corporation // SPDX-License-Identifier: Apache-2.0 // @@ -33,7 +33,7 @@ bool MultiplyTransformation::transform(TransformationContext& context, ngraph::p NetworkHelper::normalizeDequantization(NetworkHelper::getDequantization(multiply, 0)); NetworkHelper::normalizeDequantization(NetworkHelper::getDequantization(multiply, 1)); - multiply = separateInStandaloneBranch(multiply); + multiply = NetworkHelper::separateInStandaloneBranch(multiply); auto newMultiply = multiply; auto fold_fake_quantizes = [](std::shared_ptr& multiply, const size_t index) { @@ -52,8 +52,12 @@ bool MultiplyTransformation::transform(TransformationContext& context, ngraph::p const int fullPathIndex = getNotEmpty(multiply); if (fullPathIndex == -1) { const auto multiplyBranch = getMultiplyConstBranch(multiply); + if (multiplyBranch.first != -1) { + NetworkHelper::foldDequantization(multiply, multiplyBranch.first == 0 ? 1 : 0); + } if (multiplyBranch.first == -1 || multiplyBranch.second == -1) { + // constant folding on dequantization ops (for example: Convert on Subtract) NetworkHelper::foldDequantization(multiply, 0); NetworkHelper::foldDequantization(multiply, 1); return false; @@ -90,6 +94,7 @@ bool MultiplyTransformation::transform(TransformationContext& context, ngraph::p return false; } + dequantizationEmptyPath = NetworkHelper::foldDequantization(multiply, emptyPathIndex); std::shared_ptr subtractValuesEmptyPath; std::shared_ptr multiplyValuesEmptyPath; std::tie(subtractValuesEmptyPath, multiplyValuesEmptyPath) = NetworkHelper::createEmptyValues(dequantizationEmptyPath); @@ -99,6 +104,7 @@ bool MultiplyTransformation::transform(TransformationContext& context, ngraph::p return false; } + dequantizationFullPath = NetworkHelper::foldDequantization(multiply, fullPathIndex); std::shared_ptr subtractValuesFullPath; std::shared_ptr multiplyValuesFullPath; std::tie(subtractValuesFullPath, multiplyValuesFullPath) = NetworkHelper::createEmptyValues(dequantizationFullPath); diff --git a/inference-engine/src/low_precision_transformations/src/multiply_to_group_convolution.cpp b/inference-engine/src/low_precision_transformations/src/multiply_to_group_convolution.cpp index d688241cdec047..2ecbd2658d3464 100644 --- a/inference-engine/src/low_precision_transformations/src/multiply_to_group_convolution.cpp +++ b/inference-engine/src/low_precision_transformations/src/multiply_to_group_convolution.cpp @@ -1,4 +1,4 @@ -// Copyright (C) 2020 Intel Corporation +// Copyright (C) 2020-2021 Intel Corporation // SPDX-License-Identifier: Apache-2.0 // @@ -31,6 +31,9 @@ bool MultiplyToGroupConvolutionTransformation::transform(TransformationContext& } auto dequantization = NetworkHelper::getDequantization(multiply, inputIndex); + if (dequantization.subtractConvert != nullptr) { + dequantization = NetworkHelper::foldDequantization(multiply, inputIndex); + } const element::Type weightsPrecision = updatePrecisions ? precisionsOnWeights[0] : dequantization.data.get_element_type(); @@ -88,8 +91,8 @@ bool MultiplyToGroupConvolutionTransformation::transform(TransformationContext& if (dequantization.subtract != nullptr) { lastNode = std::make_shared( convolution, - fold(fold(dequantization.subtract->get_input_node_shared_ptr(1), element::f32))); - lastNode->set_friendly_name(dequantization.subtract->get_friendly_name()); + fold(fold(dequantization.subtractConstant, element::f32))); + lastNode->set_friendly_name(convolution->get_friendly_name() + "/Add"); } lastNode = multiply->copy_with_new_inputs({ lastNode, constant }); diff --git a/inference-engine/src/low_precision_transformations/src/mvn.cpp b/inference-engine/src/low_precision_transformations/src/mvn.cpp index 382151438fe53f..cc6b64a298e8ed 100644 --- a/inference-engine/src/low_precision_transformations/src/mvn.cpp +++ b/inference-engine/src/low_precision_transformations/src/mvn.cpp @@ -1,4 +1,4 @@ -// Copyright (C) 2020 Intel Corporation +// Copyright (C) 2020-2021 Intel Corporation // SPDX-License-Identifier: Apache-2.0 // @@ -79,7 +79,7 @@ bool MVNTransformation::transform(TransformationContext &context, ngraph::patter return false; } - auto mvn = as_type_ptr(separateInStandaloneBranch(operation)); + auto mvn = as_type_ptr(NetworkHelper::separateInStandaloneBranch(operation)); FakeQuantizeDequantization dequantization = NetworkHelper::getDequantization(mvn); auto scalesConst = as_type_ptr(dequantization.multiply->get_input_node_shared_ptr(1)); diff --git a/inference-engine/src/low_precision_transformations/src/network_helper.cpp b/inference-engine/src/low_precision_transformations/src/network_helper.cpp index 4292fe3fc67b9d..ab8a31484dbdf3 100644 --- a/inference-engine/src/low_precision_transformations/src/network_helper.cpp +++ b/inference-engine/src/low_precision_transformations/src/network_helper.cpp @@ -1,4 +1,4 @@ -// Copyright (C) 2018-2020 Intel Corporation +// Copyright (C) 2020-2021 Intel Corporation // SPDX-License-Identifier: Apache-2.0 // @@ -51,33 +51,39 @@ std::vector> NetworkHelper::consumers(std::shared_ptr layer) { - const std::vector> children = consumers(layer); - for (std::shared_ptr child : children) { - if ((is_type(child) || - is_type(child) || - is_type(child)) && - (child->inputs().size() >= 2lu)) { - const std::vector> parents = getParentsRecursivelyExceptTypes(child, {}, 1); - for (const std::shared_ptr& parent : parents) { - if (parent.get() == layer.get()) { - return 1; - } - } - return -1; +bool NetworkHelper::isConstantPath(const std::shared_ptr& op) { + const auto isNotConstantPathOperation = [](const std::shared_ptr& node) -> bool { + return is_type(node) || + is_type(node) || + is_type(node) || + is_type(node); + }; + + if (isNotConstantPathOperation(op)) { + return false; + } + + std::queue> inputs; + const std::vector> nodeInputs = op->inputs(); + for (const Input& nodeInput : nodeInputs) { + inputs.push(nodeInput); + } + + while (!inputs.empty()) { + Input input = inputs.front(); + inputs.pop(); + + const Output& sourceOutput = input.get_source_output(); + const auto parentNode = sourceOutput.get_node_shared_ptr(); + if (isNotConstantPathOperation(parentNode)) { + return false; } - const int result = onWeightsInDepth(child); - if (result != 0) { - return result; + for (size_t inputIndex = 0; inputIndex < parentNode->get_input_size(); ++inputIndex) { + inputs.push(parentNode->input(inputIndex)); } } - return 0; -} - -bool NetworkHelper::onWeights(std::shared_ptr layer) { - const int result = onWeightsInDepth(layer); - return result == 1; + return true; } size_t NetworkHelper::getOutputChannelsCount(std::shared_ptr layer, bool isOnWeights) { @@ -339,58 +345,137 @@ std::shared_ptr NetworkHelper::fold_fake_quantize(const std::shared_ptr& node, const size_t branchIndex, const bool inPlace) { +FakeQuantizeDequantization NetworkHelper::foldDequantization(const std::shared_ptr& node, const size_t branchIndex, const bool inPlace) { FakeQuantizeDequantization dequantization = NetworkHelper::getDequantization(node, branchIndex, inPlace); if (dequantization.empty() || (dequantization.multiply == nullptr)) { - return; - } - - std::shared_ptr constant = as_type_ptr(dequantization.data.get_node_shared_ptr()); - if ((constant == nullptr) || (constant->output(0).get_target_inputs().size() != 1ul)) { - return; + return dequantization; } if (dequantization.convert != nullptr) { const std::shared_ptr result = fold(dequantization.data, dequantization.convert->get_element_type()); - if (!is_type(result)) { - return; - } - if (inPlace) { - copyInfo(dequantization.convert, result); + if (is_type(result)) { + if (inPlace) { + copyInfo(dequantization.convert, result); + } + replace_node(dequantization.convert, result); + dequantization = NetworkHelper::getDequantization(node, branchIndex, inPlace); } - replace_node(dequantization.convert, result); - dequantization = NetworkHelper::getDequantization(node, branchIndex, inPlace); } if (dequantization.subtract != nullptr) { - if (dequantization.data.get_element_type() != dequantization.subtract->input(1).get_element_type()) { - return; - } - const std::shared_ptr result = fold(dequantization.data, dequantization.subtract->get_input_node_shared_ptr(1)); - if (!is_type(result)) { - return; + if (dequantization.subtractConvert != nullptr) { + const auto convertionResult = fold( + dequantization.subtractConstant, + dequantization.subtractConvert->get_element_type()); + if (is_type(convertionResult)) { + replace_node(dequantization.subtractConvert, convertionResult); + dequantization = NetworkHelper::getDequantization(node, branchIndex, inPlace); + } } - if (inPlace) { - copyInfo(dequantization.subtract, result); + + const std::shared_ptr result = fold( + dequantization.subtract->get_input_node_shared_ptr(0), + dequantization.subtract->get_input_node_shared_ptr(1)); + if (is_type(result)) { + if (inPlace) { + copyInfo(dequantization.subtract, result); + } + replace_node(dequantization.subtract, result); + dequantization = NetworkHelper::getDequantization(node, branchIndex, inPlace); + } else { + return dequantization; } - replace_node(dequantization.subtract, result); - dequantization = NetworkHelper::getDequantization(node, branchIndex, inPlace); } if (dequantization.multiply != nullptr) { - if (dequantization.data.get_element_type() != dequantization.multiply->input(1).get_element_type()) { - return; + const std::shared_ptr result = fold( + dequantization.multiply->get_input_node_shared_ptr(0), + dequantization.multiply->get_input_node_shared_ptr(1)); + if (is_type(result)) { + if (inPlace) { + copyInfo(dequantization.multiply, result); + } + replace_node(dequantization.multiply, result); + dequantization = NetworkHelper::getDequantization(node, branchIndex, inPlace); + } else { + return dequantization; + } + } + + return dequantization; +} + +std::shared_ptr NetworkHelper::separateInStandaloneBranch(std::shared_ptr node) { + FakeQuantizeDequantization dequantization = NetworkHelper::getDequantization(node); + if (dequantization.isShared()) { + Output parent = dequantization.data; + if (dequantization.convert != nullptr) { + parent = dequantization.convert->clone_with_new_inputs({ parent }); + parent.get_node_shared_ptr()->set_friendly_name(parent.get_node_shared_ptr()->get_name() + "_new"); } - const std::shared_ptr result = fold(dequantization.data, dequantization.multiply->get_input_node_shared_ptr(1)); - if (!is_type(result)) { - return; + + if (dequantization.subtract != nullptr) { + const auto parentOnWeights = dequantization.subtract->get_input_node_shared_ptr(1); + const std::vector> inputs = parentOnWeights->inputs(); + OutputVector outputs; + outputs.reserve(inputs.size()); + for (const auto& input : inputs) { + outputs.push_back(input.get_source_output()); + } + + parent = dequantization.subtract->clone_with_new_inputs({parent, parentOnWeights->clone_with_new_inputs(outputs) }); + parent.get_node_shared_ptr()->set_friendly_name(parent.get_node_shared_ptr()->get_name() + "_new"); } - if (inPlace) { - copyInfo(dequantization.multiply, result); + + if (dequantization.multiply != nullptr) { + parent = dequantization.multiply->clone_with_new_inputs({ + parent, + dequantization.multiply->get_input_node_shared_ptr(1)->clone_with_new_inputs({}) }); + parent.get_node_shared_ptr()->set_friendly_name(parent.get_node_shared_ptr()->get_name() + "_new"); } - replace_node(dequantization.multiply, result); - dequantization = NetworkHelper::getDequantization(node, branchIndex, inPlace); + + std::vector> inputs = NetworkHelper::getInputs(node); + const size_t inputIndex = NetworkHelper::getChildInputIndex(dequantization.multiply, node); + inputs[inputIndex] = parent; + const std::shared_ptr newNode = node->clone_with_new_inputs(inputs); + + replace_node(node, newNode); + newNode->set_friendly_name(node->get_friendly_name()); + + return newNode; + } + + return node; +} + +std::shared_ptr NetworkHelper::fuseConvert(const std::shared_ptr& fakeQuantize) { + const Output output = fakeQuantize->output(0); + const auto targetInputs = output.get_target_inputs(); + if (targetInputs.size() != 1ul) { + return fakeQuantize; + } + + Node* node = targetInputs.begin()->get_node(); + if (!is_type(node) || + // TODO: LPT: avoid precision hardcode: to separate method: isSupportedPrecision + ((node->get_output_element_type(0) != element::u8) && (node->get_output_element_type(0) != element::i8))) { + return fakeQuantize; } + + + std::shared_ptr newFakeQuantize = std::make_shared>( + std::vector{ element::f32, element::f32, element::f32, element::f32, element::f32 }, + std::vector{}, + ngraph::op::TemporaryReplaceOutputType(fakeQuantize->get_input_node_shared_ptr(0), element::f32).get(), + ngraph::op::TemporaryReplaceOutputType(fakeQuantize->get_input_node_shared_ptr(1), element::f32).get(), + ngraph::op::TemporaryReplaceOutputType(fakeQuantize->get_input_node_shared_ptr(2), element::f32).get(), + ngraph::op::TemporaryReplaceOutputType(fakeQuantize->get_input_node_shared_ptr(3), element::f32).get(), + ngraph::op::TemporaryReplaceOutputType(fakeQuantize->get_input_node_shared_ptr(4), element::f32).get(), + fakeQuantize->get_levels()); + NetworkHelper::setOutDataPrecisionForTypeRelaxed(newFakeQuantize, node->get_output_element_type(0)); + replace_node(node->shared_from_this(), newFakeQuantize); + newFakeQuantize->set_friendly_name(fakeQuantize->get_friendly_name()); + return newFakeQuantize; } std::shared_ptr NetworkHelper::foldFakeQuantize( @@ -505,6 +590,93 @@ std::shared_ptr NetworkHelper::foldFakeQuantize( return fq; } +std::shared_ptr NetworkHelper::composeFakeQuantize(const std::shared_ptr& fakeQuantize) { + std::shared_ptr parent = fakeQuantize; + auto targetInputs = parent->output(0).get_target_inputs(); + if (targetInputs.size() != 1ul) { + return nullptr; + } + if (is_type(targetInputs.begin()->get_node())) { + parent = targetInputs.begin()->get_node()->shared_from_this(); + } + + targetInputs = parent->output(0).get_target_inputs(); + if (targetInputs.size() != 1ul) { + return nullptr; + } + if (is_type(targetInputs.begin()->get_node())) { + parent = targetInputs.begin()->get_node()->shared_from_this(); + } + + targetInputs = parent->output(0).get_target_inputs(); + if (targetInputs.size() != 1ul) { + return nullptr; + } + if (is_type(targetInputs.begin()->get_node())) { + parent = targetInputs.begin()->get_node()->shared_from_this(); + } + + const std::shared_ptr prev = parent; + parent = parent->output(0).get_target_inputs().begin()->get_node()->shared_from_this(); + + const size_t index = NetworkHelper::getChildInputIndex(prev, parent); + const FakeQuantizeDequantization dequantization = getDequantization(parent, index); + if (dequantization.empty()) { + return nullptr; + } + + std::shared_ptr newFakeQuantize = fakeQuantize; + + if (dequantization.convert != nullptr) { + const std::shared_ptr replacement = std::make_shared>( + newFakeQuantize->input_value(0), + newFakeQuantize->input_value(1), + newFakeQuantize->input_value(2), + newFakeQuantize->input_value(3), + newFakeQuantize->input_value(4), + newFakeQuantize->get_levels(), + newFakeQuantize->get_auto_broadcast()); + replace_node(dequantization.convert, replacement); + replacement->set_friendly_name(newFakeQuantize->get_friendly_name()); + NetworkHelper::setOutDataPrecisionForTypeRelaxed(replacement, dequantization.convert->output(0).get_element_type()); + newFakeQuantize = replacement; + } + + if (dequantization.subtract != nullptr) { + const auto subtractValue = (dequantization.subtractConvert == nullptr) ? + dequantization.subtractConstant : + fold(dequantization.subtractConstant, dequantization.subtractConvert->output(0).get_element_type()); + + const std::shared_ptr replacement = std::make_shared>( + newFakeQuantize->input_value(0), + newFakeQuantize->input_value(1), + newFakeQuantize->input_value(2), + fold(newFakeQuantize->get_input_node_shared_ptr(3), subtractValue), + fold(newFakeQuantize->get_input_node_shared_ptr(4), subtractValue), + newFakeQuantize->get_levels(), + newFakeQuantize->get_auto_broadcast()); + replace_node(dequantization.subtract, replacement); + replacement->set_friendly_name(newFakeQuantize->get_friendly_name()); + newFakeQuantize = replacement; + } + + if (dequantization.multiply != nullptr) { + const std::shared_ptr replacement = std::make_shared>( + newFakeQuantize->input_value(0), + newFakeQuantize->input_value(1), + newFakeQuantize->input_value(2), + fold(newFakeQuantize->get_input_node_shared_ptr(3), dequantization.multiply->get_input_node_shared_ptr(1)), + fold(newFakeQuantize->get_input_node_shared_ptr(4), dequantization.multiply->get_input_node_shared_ptr(1)), + newFakeQuantize->get_levels(), + newFakeQuantize->get_auto_broadcast()); + replace_node(dequantization.multiply, replacement); + replacement->set_friendly_name(newFakeQuantize->get_friendly_name()); + newFakeQuantize = replacement; + } + + return newFakeQuantize; +} + // Decompose FakeQuantize to FakeQuantize with output integer limits (quantize), dequatized MultiplyAdd // To align types the resulting sequence is FakeQuantize -> Convert -> Convert -> MultiplyAdd std::tuple, std::shared_ptr> NetworkHelper::decomposeFakeQuantize( @@ -688,20 +860,22 @@ FakeQuantizeDequantization NetworkHelper::makeDequantization( } std::shared_ptr subtract; + std::shared_ptr subtractConstant; if (std::abs(dequantizationSub) > 1e-6) { - subtract = std::make_shared>( - parent, - std::make_shared(originalPrecision, ngraph::Shape({}), std::vector({ dequantizationSub }))); + subtractConstant = std::make_shared(originalPrecision, ngraph::Shape({}), std::vector({ dequantizationSub })); + subtract = std::make_shared>(parent, subtractConstant); subtract->set_output_type(0, originalPrecision, subtract->get_output_partial_shape(0)); parent = subtract; } // mandatory - std::shared_ptr multiply = std::make_shared( - parent, - std::make_shared(originalPrecision, ngraph::Shape({}), std::vector({ dequantizationMul }))); + std::shared_ptr multiplyConstant = std::make_shared( + originalPrecision, + ngraph::Shape({}), + std::vector({ dequantizationMul })); + std::shared_ptr multiply = std::make_shared(parent, multiplyConstant); - return FakeQuantizeDequantization(input, convert, subtract, multiply); + return FakeQuantizeDequantization(input, convert, subtract, nullptr, subtractConstant, multiply, multiplyConstant); } FakeQuantizeDequantization NetworkHelper::createDequantizationFromFakeQuantize( @@ -722,15 +896,18 @@ FakeQuantizeDequantization NetworkHelper::createDequantizationFromFakeQuantize( // TODO: threshold values have to used here to avoid shifts - const std::shared_ptr scale = fold( + const std::shared_ptr scale = as_type_ptr(fold( fold(outputHigh, outputLow), - fold(newMax, newMin)); + fold(newMax, newMin))); + assert(scale != nullptr); - std::shared_ptr shift = hasZeroPoint ? - fold( + std::shared_ptr shift; + if (hasZeroPoint) { + shift = as_type_ptr(fold( fold(fold(newMin, outputHigh), fold(newMax, outputLow)), - fold(outputHigh, outputLow)) : - nullptr; + fold(outputHigh, outputLow))); + assert(shift != nullptr); + } if (shift != nullptr) { std::shared_ptr shiftConst = as_type_ptr(shift); @@ -765,32 +942,97 @@ FakeQuantizeDequantization NetworkHelper::createDequantizationFromFakeQuantize( } const std::shared_ptr multiply = std::make_shared(parent, scale); - return FakeQuantizeDequantization(fq, convert, subtract, multiply); + return FakeQuantizeDequantization(fq, convert, subtract, nullptr, shift, multiply, scale); } -FakeQuantizeDequantization NetworkHelper::getDequantization(const std::shared_ptr node, const size_t parentIndex, const bool inPlace) { +bool NetworkHelper::areQuantizeAndDequantizeSupportedForSubtract(const std::shared_ptr& node) { + if (!is_type(node)) { + return false; + } + + const auto targetInputs = node->output(0).get_target_inputs(); + if (targetInputs.size() != 1ul) { + return false; + } + + const auto multiply = targetInputs.begin()->get_node()->shared_from_this(); + return areQuantizeAndDequantizeSupportedForMultiply(multiply); +} + +bool NetworkHelper::areQuantizeAndDequantizeSupportedForMultiply(const std::shared_ptr& node) { + if (!is_type(node)) { + return false; + } + + const std::shared_ptr multiply = const_cast(node.get())->shared_from_this(); + const auto dequantization = ngraph::pass::low_precision::NetworkHelper::getDequantization(multiply, 0, true); + if (dequantization.empty()) { + return false; + } + + const auto dataNode = dequantization.data.get_node(); + if (is_type(dataNode)) { + const auto quantize = as_type_ptr(dataNode->get_input_node_shared_ptr(0)); + if (quantize == nullptr) { + return false; + } + + return NetworkHelper::isQuantizeSupported(quantize); + } else if (is_type(dataNode)) { + return true; + } + + return false; +} + +bool NetworkHelper::isQuantizeSupported(const std::shared_ptr& fakeQuantize) { + return QuantizationDetails::outputLayoutIsSupported(fakeQuantize) && QuantizationDetails::isSupportedLevel(fakeQuantize->get_levels()); +} + +FakeQuantizeDequantization NetworkHelper::getDequantization(const std::shared_ptr& node, const size_t parentIndex, const bool inPlace) { auto getDataIndex = [](const std::shared_ptr& node) { if (is_type(node->get_input_node_ptr(1))) { return 0ul; - } else { + } + + if (is_type(node->get_input_node_ptr(1)) && is_type(node->get_input_node_ptr(1)->get_input_node_ptr(0))) { + return 0ul; + } + + if (is_type(node->get_input_node_ptr(0)) && is_type(node->get_input_node_ptr(0)->get_input_node_ptr(0))) { return 1ul; } + + return 1ul; }; Output dataNode = inPlace ? node : node->input_value(parentIndex); const std::shared_ptr multiply = as_type_ptr(dataNode.get_node_shared_ptr()); + std::shared_ptr multiplyConstant; if (multiply != nullptr) { - if (!is_type(multiply->get_input_node_ptr(0)) && !is_type(multiply->get_input_node_ptr(1))) { + if (!FakeQuantizeDequantization::checkShape(multiply)) { + return FakeQuantizeDequantization(); + } + + FakeQuantizeDequantization::fillDequantizationParams(multiply, multiplyConstant); + if (multiplyConstant == nullptr) { return FakeQuantizeDequantization(); } dataNode = multiply->get_input_source_output(getDataIndex(multiply)); } const std::shared_ptr subtract = as_type_ptr(dataNode.get_node_shared_ptr()); + std::shared_ptr subtractConvert; + std::shared_ptr subtractConstant; if (subtract != nullptr) { - if (!is_type(subtract->get_input_node_ptr(0)) && !is_type(subtract->get_input_node_ptr(1))) { - return FakeQuantizeDequantization(dataNode, nullptr, nullptr, multiply); + if (!FakeQuantizeDequantization::checkShape(subtract)) { + return FakeQuantizeDequantization(dataNode, nullptr, nullptr, nullptr, nullptr, multiply, multiplyConstant); + } + + FakeQuantizeDequantization::fillDequantizationParams(subtract, subtractConvert, subtractConstant); + if (subtractConstant == nullptr) { + return FakeQuantizeDequantization(dataNode, nullptr, nullptr, nullptr, nullptr, multiply, multiplyConstant); } dataNode = subtract->get_input_source_output(getDataIndex(subtract)); } @@ -799,12 +1041,58 @@ FakeQuantizeDequantization NetworkHelper::getDequantization(const std::shared_pt if (convert != nullptr) { if ((convert->input(0).get_element_type() != element::i8) && (convert->input(0).get_element_type() != element::u8) && (convert->output(0).get_element_type() != element::f32)) { - return FakeQuantizeDequantization(dataNode, nullptr, subtract, multiply); + return FakeQuantizeDequantization(dataNode, nullptr, subtract, subtractConvert, subtractConstant, multiply, multiplyConstant); } dataNode = convert->get_input_source_output(0); } - return FakeQuantizeDequantization(dataNode, convert, subtract, multiply); + return FakeQuantizeDequantization(dataNode, convert, subtract, subtractConvert, subtractConstant, multiply, multiplyConstant); +} + +FakeQuantizeDequantization NetworkHelper::getDequantizationBelow(const std::shared_ptr& node) { + const Output dataNode = node->output(0); + std::shared_ptr lastNode = dataNode.get_target_inputs().begin()->get_node()->shared_from_this(); + + const std::shared_ptr convert = as_type_ptr(lastNode); + if (convert != nullptr) { + if ((convert->input(0).get_element_type() != element::i8) && (convert->input(0).get_element_type() != element::u8) && + (convert->output(0).get_element_type() != element::f32)) { + return FakeQuantizeDequantization(); + } + + const auto& inputs = lastNode->output(0).get_target_inputs(); + if (inputs.size() != 1ul) { + return FakeQuantizeDequantization(); + } + lastNode = inputs.begin()->get_node()->shared_from_this(); + } + + const std::shared_ptr subtract = as_type_ptr(lastNode); + std::shared_ptr subtractConvert; + std::shared_ptr subtractConstant; + if (subtract != nullptr) { + FakeQuantizeDequantization::fillDequantizationParams(subtract, subtractConvert, subtractConstant); + if (subtractConstant == nullptr) { + return FakeQuantizeDequantization(); + } + + const auto& inputs = lastNode->output(0).get_target_inputs(); + if (inputs.size() != 1ul) { + return FakeQuantizeDequantization(); + } + lastNode = inputs.begin()->get_node()->shared_from_this(); + } + + const std::shared_ptr multiply = as_type_ptr(lastNode); + std::shared_ptr multiplyConstant; + if (multiply != nullptr) { + FakeQuantizeDequantization::fillDequantizationParams(multiply, multiplyConstant); + if (multiplyConstant == nullptr) { + return FakeQuantizeDequantization(); + } + } + + return FakeQuantizeDequantization(dataNode, convert, subtract, subtractConvert, subtractConstant, multiply, multiplyConstant); } FakeQuantizeDequantization NetworkHelper::normalizeDequantization(FakeQuantizeDequantization dequantization) { @@ -838,7 +1126,9 @@ FakeQuantizeDequantizationValues NetworkHelper::createEmptyValues(const FakeQuan std::make_shared(parent->get_output_element_type(0), Shape({}), std::vector({ 1.f })); std::shared_ptr subtract1Const = dequantization.subtract ? - dequantization.subtract->get_input_node_shared_ptr(1)->clone_with_new_inputs({}) : + (dequantization.subtractConvert == nullptr ? + dequantization.subtractConstant->clone_with_new_inputs({}) : + fold(dequantization.subtractConstant, dequantization.subtractConvert->get_element_type())) : std::make_shared(parent->get_output_element_type(0), Shape({}), std::vector({ 0.f })); subtract1Const->set_output_type(0, multiply1Const->get_output_element_type(0), subtract1Const->get_output_partial_shape(0)); @@ -881,29 +1171,51 @@ std::shared_ptr NetworkHelper::optimizeSubtract(std::shared_ptrinput_value(0); - auto shift = subtract->input_value(1).get_node_shared_ptr(); - auto roundedShift = NetworkHelper::round(shift, convertInputType); - - // Propagate convertInputType down - const auto replacement = std::make_shared>(data, roundedShift); - NetworkHelper::copyInfo(subtract, replacement); - NetworkHelper::setOutDataPrecisionForTypeRelaxed(replacement, convertOutputType); - replace_node(subtract, replacement); - - // We lose the tail conversion here; not needed if the next node is a TypeRelaxed - // TODO: check cases when Convert should be preserved - - // Try to optimize Add out if constant is zero - // TODO: don't remove operation here: don't create this Subtraction operation in FQ decomposition - // if (isScalarLike(roundedShift)) { - // auto scalar = distillToScalar(roundedShift); - // if (op::util::constantIsEqualTo(scalar, 0)) { - // replace_node(replacement, replacement->input_value(0).get_node_shared_ptr()); - // replacement = nullptr; - // } - // } - - return replacement; + const auto subtractParent = subtract->get_input_node_shared_ptr(1); + if (is_type(subtractParent)) { + std::shared_ptr replacement; + + auto shift = subtract->input_value(1).get_node_shared_ptr(); + auto roundedShift = NetworkHelper::round(shift, convertInputType); + + if (isScalarLike(roundedShift)) { + roundedShift = toScalar(roundedShift); + if (op::util::constantIsEqualTo(roundedShift, 0)) { + replace_node(subtract, convertOnSubtract->get_input_node_shared_ptr(0)); + roundedShift = nullptr; + } + } + + if (roundedShift) { + // Propagate convertInputType down + replacement = std::make_shared>(data, roundedShift); + NetworkHelper::copyInfo(subtract, replacement); + NetworkHelper::setOutDataPrecisionForTypeRelaxed(replacement, convertOutputType); + replace_node(subtract, replacement); + } + + // We lose the tail conversion here; not needed if the next node is a TypeRelaxed + // TODO: check cases when Convert should be preserved + + // Try to optimize Add out if constant is zero + // TODO: don't remove operation here: don't create this Subtraction operation in FQ decomposition + // if (isScalarLike(roundedShift)) { + // auto scalar = distillToScalar(roundedShift); + // if (op::util::constantIsEqualTo(scalar, 0)) { + // replace_node(replacement, replacement->input_value(0).get_node_shared_ptr()); + // replacement = nullptr; + // } + // } + + return replacement; + } else if (is_type(subtractParent) || is_type(subtractParent->get_input_node_shared_ptr(0))) { + auto replacement = std::make_shared>(data, subtractParent->get_input_node_shared_ptr(0)); + NetworkHelper::setOutDataPrecisionForTypeRelaxed(replacement, convertOutputType); + replace_node(subtract, replacement); + return replacement; + } + + return subtract; } NetworkHelper::InsertDequantizationResult NetworkHelper::moveDequantizationAfter( @@ -911,6 +1223,14 @@ NetworkHelper::InsertDequantizationResult NetworkHelper::moveDequantizationAfter const FakeQuantizeDequantization& dequantization, const bool updatePrecision, const bool moveSubtract) { + assert( + (NetworkHelper::getDequantization(operation).subtractConstant == nullptr) || + (NetworkHelper::getDequantization(operation).subtractConstant.get() == dequantization.subtractConstant.get())); + + assert( + (NetworkHelper::getDequantization(operation).multiplyConstant == nullptr) || + (NetworkHelper::getDequantization(operation).multiplyConstant.get() == dequantization.multiplyConstant.get())); + std::vector> inputs(operation->get_input_size()); for (size_t i = 0; i < operation->get_input_size(); ++i) { inputs[i] = operation->get_input_node_shared_ptr(i); @@ -946,20 +1266,25 @@ NetworkHelper::InsertDequantizationResult NetworkHelper::moveDequantizationAfter } if (moveSubtract && (dequantization.subtract != nullptr)) { - auto subtractConstant = dequantization.subtract->get_input_node_shared_ptr(1); - const element::Type parentPrecision = parent->get_output_element_type(0); - if (parentPrecision.bitwidth() < subtractConstant->output(0).get_element_type().bitwidth()) { - THROW_IE_LPT_EXCEPTION(*parent) << - "unexpected precisions: on data " << parent->get_friendly_name() << ":" << parentPrecision << - ", subtract dequantization constant " << subtractConstant->get_friendly_name() << ":" << subtractConstant->output(0).get_element_type(); - } + if (dequantization.subtractConvert == nullptr) { + const element::Type parentPrecision = parent->get_output_element_type(0); + if (parentPrecision.bitwidth() < dequantization.subtractConstant->output(0).get_element_type().bitwidth()) { + THROW_IE_LPT_EXCEPTION(*parent) << + "unexpected precisions: on data " << parent->get_friendly_name() << ":" << parentPrecision << + ", subtract dequantization constant " << dequantization.subtractConstant->get_friendly_name() << ":" << + dequantization.subtractConstant->output(0).get_element_type(); + } - parent = std::make_shared( - parent, - subtractConstant->output(0).get_element_type() == parentPrecision ? - subtractConstant : - fold(subtractConstant->output(0), parentPrecision)); - ngraph::copy_runtime_info({ newOperation, parent }, parent); + parent = std::make_shared( + parent, + dequantization.subtractConstant->output(0).get_element_type() == parentPrecision ? + dequantization.subtractConstant : + fold(dequantization.subtractConstant, parentPrecision)); + ngraph::copy_runtime_info({ newOperation, parent }, parent); + } else { + parent = std::make_shared(parent, dequantization.subtractConvert); + ngraph::copy_runtime_info({ newOperation, parent }, parent); + } } if (dequantization.multiply != nullptr) { @@ -989,19 +1314,6 @@ NetworkHelper::InsertDequantizationResult NetworkHelper::moveDequantizationAfter return InsertDequantizationResult(newOperation, parent); } -void NetworkHelper::removeConvertIfPossible( - const std::shared_ptr& operation, - const FakeQuantizeDequantization& dequantization) { - const element::Type precisionBeforeConvert = dequantization.convert->input(0).get_element_type(); - - if (checkConstantValuePrecision(precisionBeforeConvert, dequantization.subtract->get_input_node_shared_ptr(1))) { - auto newSubtract = dequantization.subtract->clone_with_new_inputs({ - dequantization.convert->get_input_node_shared_ptr(0), - fold(dequantization.subtract->get_input_node_shared_ptr(1), precisionBeforeConvert) }); - replace_node(dequantization.subtract, newSubtract); - } -} - bool NetworkHelper::checkConstantValuePrecision(const element::Type expectedPrecision, const std::shared_ptr& constant) { if (expectedPrecision.is_signed()) { return true; diff --git a/inference-engine/src/low_precision_transformations/src/normalize_l2.cpp b/inference-engine/src/low_precision_transformations/src/normalize_l2.cpp index f96960078f514f..68d55727a35cf9 100644 --- a/inference-engine/src/low_precision_transformations/src/normalize_l2.cpp +++ b/inference-engine/src/low_precision_transformations/src/normalize_l2.cpp @@ -1,4 +1,4 @@ -// Copyright (C) 2020 Intel Corporation +// Copyright (C) 2020-2021 Intel Corporation // SPDX-License-Identifier: Apache-2.0 // @@ -94,7 +94,7 @@ bool NormalizeL2Transformation::transform(TransformationContext &context, ngraph return false; } - auto normalize = as_type_ptr(separateInStandaloneBranch(operation)); + auto normalize = as_type_ptr(NetworkHelper::separateInStandaloneBranch(operation)); const auto axes = as_type_ptr(normalize->get_input_node_shared_ptr(1)); FakeQuantizeDequantization dequantization = NetworkHelper::getDequantization(normalize); diff --git a/inference-engine/src/low_precision_transformations/src/prelu.cpp b/inference-engine/src/low_precision_transformations/src/prelu.cpp index 4907206aa8262f..1c1a7c33218490 100644 --- a/inference-engine/src/low_precision_transformations/src/prelu.cpp +++ b/inference-engine/src/low_precision_transformations/src/prelu.cpp @@ -1,4 +1,4 @@ -// Copyright (C) 2018-2020 Intel Corporation +// Copyright (C) 2020-2021 Intel Corporation // SPDX-License-Identifier: Apache-2.0 // @@ -28,7 +28,7 @@ bool PReluTransformation::transform(TransformationContext& context, ngraph::patt return false; } - prelu = separateInStandaloneBranch(prelu); + prelu = NetworkHelper::separateInStandaloneBranch(prelu); const FakeQuantizeDequantization dequantization = NetworkHelper::getDequantization(prelu, 0); moveDequantizationAfter(context, prelu, dequantization, false, false); return true; diff --git a/inference-engine/src/low_precision_transformations/src/quantization_details.cpp b/inference-engine/src/low_precision_transformations/src/quantization_details.cpp index 635650482c1899..ffaba7f5c5951d 100644 --- a/inference-engine/src/low_precision_transformations/src/quantization_details.cpp +++ b/inference-engine/src/low_precision_transformations/src/quantization_details.cpp @@ -1,4 +1,4 @@ -// Copyright (C) 2018-2020 Intel Corporation +// Copyright (C) 2020-2021 Intel Corporation // SPDX-License-Identifier: Apache-2.0 // @@ -22,56 +22,6 @@ namespace ngraph { namespace pass { namespace low_precision { -#if 0 // TODO LPT-TO-NGRAPH - -class ConstTensorDesc { -public: - static void validate(const Layout layout, const std::vector& dims) { - switch (layout) { - case Layout::SCALAR: { - if (dims.size() != 0) { - THROW_TRANSFORMATION_EXCEPTION << "unexpected dimensions size " << dims.size() << " for layout " << layout; - } - break; - } - case Layout::C: { - if (dims.size() != 1) { - THROW_TRANSFORMATION_EXCEPTION << "unexpected dimensions size " << dims.size() << " for layout " << layout; - } - break; - } - case Layout::NCHW: { - if (dims.size() != 4) { - THROW_TRANSFORMATION_EXCEPTION << "unexpected dimensions size " << dims.size() << " for layout " << layout; - } - break; - } - default: { - THROW_TRANSFORMATION_EXCEPTION << "unexpected layout " << layout; - } - } - } - - static size_t getChannelsCount(const Layout layout, const std::vector& dims) { - switch (layout) { - case Layout::SCALAR: { - return 1; - } - case Layout::C: { - return dims[0]; - } - case Layout::NCHW: { - return dims[1]; - } - default: { - THROW_TRANSFORMATION_EXCEPTION << "unexpected layout " << layout; - } - } - } -}; - -#endif - QuantizationDetails::QuantizationDetails() : levels(), inputLowValues({}), @@ -114,18 +64,15 @@ bool QuantizationDetails::outputLayoutIsSupported(std::shared_ptr outputLowValues; - std::vector outputHighValues; - size_t outputIntervalsCount; - getOutputIntervals(quantize, outputLowValues, outputHighValues, outputIntervalsCount); - - // TODO: FQ on weights - temporary workaround: - // if (outputIntervalsCount == quantize->get_output_shape(0)[0]) { - // return true; - // } + const size_t inputLowValuesSize = as_type_ptr(quantize->get_input_node_shared_ptr(1))->cast_vector().size(); + const size_t inputHighValuesSize = as_type_ptr(quantize->get_input_node_shared_ptr(2))->cast_vector().size(); + if (inputLowValuesSize != inputHighValuesSize) { + return false; + } - const size_t outputChannelsCount = NetworkHelper::getOutputChannelsCount(quantize, NetworkHelper::onWeights(quantize)); - if ((outputIntervalsCount != 1ul) && (outputIntervalsCount != outputChannelsCount)) { + const size_t outputLowValuesSize = as_type_ptr(quantize->get_input_node_shared_ptr(3))->cast_vector().size(); + const size_t outputHighValuesSize = as_type_ptr(quantize->get_input_node_shared_ptr(4))->cast_vector().size(); + if (outputLowValuesSize != outputHighValuesSize) { return false; } @@ -189,7 +136,7 @@ QuantizationDetails QuantizationDetails::getDetails(std::shared_ptr& resha bool ReshapeTransformation::transform(TransformationContext& context, ngraph::pattern::Matcher &m) const { std::shared_ptr reshape = as_type_ptr(m.get_match_root()); - if ((reshape == nullptr) || (!canBeTransformed(context, reshape))) { + if (NetworkHelper::isConstantPath(reshape)) { return false; } - reshape = as_type_ptr(separateInStandaloneBranch(reshape)); + if (!canBeTransformed(context, reshape)) { + return false; + } + + reshape = as_type_ptr(NetworkHelper::separateInStandaloneBranch(reshape)); reshapeDequantizationConstant(reshape); moveDequantizationAfter(context, reshape, NetworkHelper::getDequantization(reshape, 0), false); return true; diff --git a/inference-engine/src/low_precision_transformations/src/split.cpp b/inference-engine/src/low_precision_transformations/src/split.cpp index d6ed88a4329713..c10d840b980753 100644 --- a/inference-engine/src/low_precision_transformations/src/split.cpp +++ b/inference-engine/src/low_precision_transformations/src/split.cpp @@ -1,4 +1,4 @@ -// Copyright (C) 2020 Intel Corporation +// Copyright (C) 2020-2021 Intel Corporation // SPDX-License-Identifier: Apache-2.0 // @@ -22,7 +22,7 @@ bool SplitTransformation::transform(TransformationContext& context, ngraph::patt return false; } - const std::shared_ptr split = separateInStandaloneBranch(m.get_match_root()); + const std::shared_ptr split = NetworkHelper::separateInStandaloneBranch(m.get_match_root()); auto dequantization = NetworkHelper::getDequantization(split); OutputVector inputs(split->get_input_size()); diff --git a/inference-engine/src/low_precision_transformations/src/squeeze.cpp b/inference-engine/src/low_precision_transformations/src/squeeze.cpp index cfb16eae29f74d..3a5d299de8bfa1 100644 --- a/inference-engine/src/low_precision_transformations/src/squeeze.cpp +++ b/inference-engine/src/low_precision_transformations/src/squeeze.cpp @@ -1,4 +1,4 @@ -// Copyright (C) 2020 Intel Corporation +// Copyright (C) 2020-2021 Intel Corporation // SPDX-License-Identifier: Apache-2.0 // @@ -39,7 +39,7 @@ bool SqueezeTransformation::transform(TransformationContext& context, ngraph::pa return dequantizationOpConstant; }; - const std::shared_ptr squeeze = separateInStandaloneBranch(m.get_match_root()); + const std::shared_ptr squeeze = NetworkHelper::separateInStandaloneBranch(m.get_match_root()); FakeQuantizeDequantization dequantization = NetworkHelper::getDequantization(squeeze); if (dequantization.multiply != nullptr) { diff --git a/inference-engine/src/low_precision_transformations/src/strided_slice.cpp b/inference-engine/src/low_precision_transformations/src/strided_slice.cpp index 398eae9d2b839e..2acc5bb05b270d 100644 --- a/inference-engine/src/low_precision_transformations/src/strided_slice.cpp +++ b/inference-engine/src/low_precision_transformations/src/strided_slice.cpp @@ -68,7 +68,7 @@ bool StridedSliceTransformation::transform(TransformationContext& context, ngrap return false; } - const auto stridedSlice = separateInStandaloneBranch(m.get_match_root()); + const auto stridedSlice = NetworkHelper::separateInStandaloneBranch(m.get_match_root()); const auto dequantization = NetworkHelper::getDequantization(stridedSlice); if (dequantization.subtract) { @@ -85,7 +85,7 @@ bool StridedSliceTransformation::transform(TransformationContext& context, ngrap const auto newMulConst = stridedSliceDeqConstant(stridedSlice, mulConst); dequantization.multiply->set_argument(mulConstIdx, newMulConst); - moveDequantizationAfter(context, stridedSlice, dequantization, false); + moveDequantizationAfter(context, stridedSlice, NetworkHelper::getDequantization(stridedSlice), false); return true; } diff --git a/inference-engine/src/low_precision_transformations/src/subgraph.cpp b/inference-engine/src/low_precision_transformations/src/subgraph.cpp index d73d780ae0242b..aa0fdf675355a3 100644 --- a/inference-engine/src/low_precision_transformations/src/subgraph.cpp +++ b/inference-engine/src/low_precision_transformations/src/subgraph.cpp @@ -1,4 +1,4 @@ -// Copyright (C) 2018-2020 Intel Corporation +// Copyright (C) 2020-2021 Intel Corporation // SPDX-License-Identifier: Apache-2.0 // @@ -112,6 +112,19 @@ bool Subgraph::atLeastOneIsIntermediate(const std::shared_ptr& nod return false; } +std::shared_ptr getFakeQuantize(const FakeQuantizeDequantization& dequantization) { + std::shared_ptr node = dequantization.data.get_node_shared_ptr(); + std::shared_ptr fakeQuantize = ngraph::as_type_ptr(node); + if (fakeQuantize != nullptr) { + return fakeQuantize; + } + + if (is_type(node)) { + fakeQuantize = ngraph::as_type_ptr(node->get_input_node_shared_ptr(0)); + } + return fakeQuantize; +} + bool Subgraph::fill(const std::shared_ptr& layer, std::unordered_set& handledLayers) { // if at least one parent is handled incorrectly then subgraph is not in low precision for (size_t index = 0; index < layer->get_input_size(); ++index) { @@ -126,7 +139,14 @@ bool Subgraph::fill(const std::shared_ptr& layer, std::unordered_s return false; } } else { - const std::shared_ptr fakeQuantizeParent = ngraph::as_type_ptr(parent); + // WA: issue #46906 + if (parent->get_output_size() != 1ul) { + return false; + } + const FakeQuantizeDequantization dequantization = NetworkHelper::getDequantization(parent, 0, true); + const std::shared_ptr fakeQuantizeParent = dequantization.empty() ? + ngraph::as_type_ptr(parent) : + getFakeQuantize(dequantization); if (fakeQuantizeParent != nullptr) { if (!fillSubgraphForQuantization(fakeQuantizeParent, handledLayers)) { // diff --git a/inference-engine/src/low_precision_transformations/src/subtract_multiply_to_multiply_add.cpp b/inference-engine/src/low_precision_transformations/src/subtract_multiply_to_multiply_add.cpp index be7a3f4e0b59c7..a8565131e5cc46 100644 --- a/inference-engine/src/low_precision_transformations/src/subtract_multiply_to_multiply_add.cpp +++ b/inference-engine/src/low_precision_transformations/src/subtract_multiply_to_multiply_add.cpp @@ -1,4 +1,4 @@ -// Copyright (C) 2020 Intel Corporation +// Copyright (C) 2020-2021 Intel Corporation // SPDX-License-Identifier: Apache-2.0 // @@ -27,7 +27,9 @@ FakeQuantizeDequantization get(const std::shared_ptr node) { dataNode.get_node_shared_ptr()->get_input_node_shared_ptr(1)) ? as_type_ptr(dataNode.get_node_shared_ptr()) : nullptr; + std::shared_ptr multiplyConstant; if (multiply != nullptr) { + FakeQuantizeDequantization::fillDequantizationParams(multiply, multiplyConstant); dataNode = multiply->get_input_source_output(0); } @@ -35,7 +37,10 @@ FakeQuantizeDequantization get(const std::shared_ptr node) { && is_type(dataNode.get_node_shared_ptr()->get_input_node_ptr(1)) ? as_type_ptr(dataNode.get_node_shared_ptr()) : nullptr; + std::shared_ptr subtractConvert; + std::shared_ptr subtractConstant; if (subtract != nullptr) { + FakeQuantizeDequantization::fillDequantizationParams(subtract, subtractConvert, subtractConstant); dataNode = subtract->get_input_source_output(0); } @@ -44,7 +49,7 @@ FakeQuantizeDequantization get(const std::shared_ptr node) { dataNode = convert->get_input_source_output(0); } - return FakeQuantizeDequantization(dataNode, convert, subtract, multiply); + return FakeQuantizeDequantization(dataNode, convert, subtract, subtractConvert, subtractConstant, multiply, multiplyConstant); } bool SubtractMultiplyToMultiplyAddTransformation::transform(TransformationContext& context, ngraph::pattern::Matcher &m) const { @@ -53,7 +58,7 @@ bool SubtractMultiplyToMultiplyAddTransformation::transform(TransformationContex return false; } - multiply = separateInStandaloneBranch(multiply); + multiply = NetworkHelper::separateInStandaloneBranch(multiply); FakeQuantizeDequantization dequantization = get(multiply); const element::Type precisionBeforeDequantization = dequantization.convert == nullptr ? diff --git a/inference-engine/src/low_precision_transformations/src/transformer.cpp b/inference-engine/src/low_precision_transformations/src/transformer.cpp index 440d7dced270a4..f9bd681bd99ce1 100644 --- a/inference-engine/src/low_precision_transformations/src/transformer.cpp +++ b/inference-engine/src/low_precision_transformations/src/transformer.cpp @@ -1,4 +1,4 @@ -// Copyright (C) 2018-2020 Intel Corporation +// Copyright (C) 2020-2021 Intel Corporation // SPDX-License-Identifier: Apache-2.0 // @@ -24,6 +24,9 @@ #include "low_precision/concat.hpp" #include "low_precision/concat_multi_channels.hpp" +// decomposition transformations +#include "low_precision/fake_quantize_decomposition.hpp" + // general transformations #include "low_precision/add.hpp" #include "low_precision/avg_pool.hpp" @@ -51,8 +54,8 @@ #include "low_precision/split.hpp" // cleanup transformations -#include "low_precision/convert.hpp" #include "low_precision/fuse_convert.hpp" +#include "low_precision/fold_convert.hpp" #include "low_precision/fuse_fake_quantize.hpp" #include "low_precision/fuse_subtract_to_fake_quantize.hpp" #include "low_precision/fuse_multiply_to_fake_quantize.hpp" @@ -132,6 +135,7 @@ std::vector LowPrecisionTransformations::find(const std: void LowPrecisionTransformations::setParamsManager(IParamsManager* paramsManager) noexcept { setParamsManager(paramsManager, branchSpecificTransformations); + setParamsManager(paramsManager, decompositionTransformations); setParamsManager(paramsManager, transformations); setParamsManager(paramsManager, cleanupTransformations); setParamsManager(paramsManager, standaloneCleanupTransformations); @@ -139,6 +143,7 @@ void LowPrecisionTransformations::setParamsManager(IParamsManager* paramsManager void LowPrecisionTransformations::setLayerTransformationsManager(ILayerTransformationsManager* layerTransformationsManager) noexcept { setLayerTransformationsManager(layerTransformationsManager, branchSpecificTransformations); + setLayerTransformationsManager(layerTransformationsManager, decompositionTransformations); setLayerTransformationsManager(layerTransformationsManager, transformations); setLayerTransformationsManager(layerTransformationsManager, cleanupTransformations); setLayerTransformationsManager(layerTransformationsManager, standaloneCleanupTransformations); @@ -202,6 +207,8 @@ LowPrecisionTransformations LowPrecisionTransformer::getAllTransformations(const auto transformer = LowPrecisionTransformations(). addBranchSpecific(params). + addDecomposition(params). + add(params). add(params). add(params). @@ -224,6 +231,7 @@ LowPrecisionTransformations LowPrecisionTransformer::getAllTransformations(const add(params). add(params). + addCleanup(params). addCleanup(params). addStandaloneCleanup(params). @@ -357,13 +365,9 @@ void LowPrecisionTransformer::transform(std::shared_ptr network) { } { - // Step #1: FakeQuantize layer transformation execution - LayerTransformationPtr fqTransformation = transformations.find()[0]; - if (fqTransformation == nullptr) { - THROW_TRANSFORMATION_EXCEPTION << "FakeQuantize transformation was not found"; - } + // Step #1: FakeQuantize decomposition transformation execution GraphRewrite pass; - fqTransformation->registerMatcherIn(pass, context); + registerAllMatchers(transformations.decompositionTransformations, pass, context); pass.run_on_function(network); } diff --git a/inference-engine/src/low_precision_transformations/src/transpose.cpp b/inference-engine/src/low_precision_transformations/src/transpose.cpp index f164fa85d7a97a..b41beefa944dcc 100644 --- a/inference-engine/src/low_precision_transformations/src/transpose.cpp +++ b/inference-engine/src/low_precision_transformations/src/transpose.cpp @@ -1,4 +1,4 @@ -// Copyright (C) 2020 Intel Corporation +// Copyright (C) 2020-2021 Intel Corporation // SPDX-License-Identifier: Apache-2.0 // @@ -80,7 +80,7 @@ bool TransposeTransformation::transform(TransformationContext& context, ngraph:: return false; } - transpose = separateInStandaloneBranch(transpose); + transpose = NetworkHelper::separateInStandaloneBranch(transpose); transposeDequantizationConstant(transpose); moveDequantizationAfter(context, transpose, NetworkHelper::getDequantization(transpose, 0), false); return true; @@ -102,16 +102,13 @@ bool TransposeTransformation::canBeTransformed(const TransformationContext& cont const FakeQuantizeDequantization dequantization = NetworkHelper::getDequantization(op); const bool isPerTensor = [&] { - const auto sub = dequantization.subtract; - const auto mul = dequantization.multiply; - if (sub) { - auto subConst = as_type_ptr(sub->get_input_node_shared_ptr(1)); - if (!NetworkHelper::isScalarLike(subConst)) { + if (dequantization.subtractConstant != nullptr) { + if (!NetworkHelper::isScalarLike(dequantization.subtractConstant)) { return false; } } - if (mul) { - auto mulConst = as_type_ptr(mul->get_input_node_shared_ptr(1)); + if (dequantization.multiply != nullptr) { + const auto mulConst = as_type_ptr(dequantization.multiplyConstant); if (!NetworkHelper::isScalarLike(mulConst)) { return false; } @@ -126,7 +123,7 @@ bool TransposeTransformation::canBeTransformed(const TransformationContext& cont } } - auto checkConstant = [](const std::shared_ptr& dequantizationConstant, const Shape& transposeOutputShape) -> bool { + auto checkShape = [](const std::shared_ptr& dequantizationConstant, const Shape& transposeOutputShape) -> bool { const auto dequantizationShape = dequantizationConstant->get_output_shape(0); if (dequantizationShape.empty() || (dequantizationShape.size() == 1ul) || (dequantizationShape.size() == transposeOutputShape.size())) { return true; @@ -141,8 +138,8 @@ bool TransposeTransformation::canBeTransformed(const TransformationContext& cont return !dequantization.empty() && - ((dequantization.subtract == nullptr) || checkConstant(dequantization.subtract->get_input_node_shared_ptr(1), op->get_output_shape(0))) && - ((dequantization.multiply == nullptr) || checkConstant(dequantization.multiply->get_input_node_shared_ptr(1), op->get_output_shape(0))); + ((dequantization.subtract == nullptr) || checkShape(dequantization.subtract->get_input_node_shared_ptr(1), op->get_output_shape(0))) && + ((dequantization.multiply == nullptr) || checkShape(dequantization.multiply->get_input_node_shared_ptr(1), op->get_output_shape(0))); } } // namespace low_precision diff --git a/inference-engine/src/low_precision_transformations/src/unsqueeze.cpp b/inference-engine/src/low_precision_transformations/src/unsqueeze.cpp index c1dd9592e50a39..d1bc2df60fdc97 100644 --- a/inference-engine/src/low_precision_transformations/src/unsqueeze.cpp +++ b/inference-engine/src/low_precision_transformations/src/unsqueeze.cpp @@ -1,4 +1,4 @@ -// Copyright (C) 2020 Intel Corporation +// Copyright (C) 2020-2021 Intel Corporation // SPDX-License-Identifier: Apache-2.0 // @@ -39,7 +39,7 @@ bool UnsqueezeTransformation::transform(TransformationContext& context, ngraph:: return dequantizationOpConstant; }; - const std::shared_ptr unsqueeze = separateInStandaloneBranch(m.get_match_root()); + const std::shared_ptr unsqueeze = NetworkHelper::separateInStandaloneBranch(m.get_match_root()); FakeQuantizeDequantization dequantization = NetworkHelper::getDequantization(unsqueeze); if (dequantization.multiply != nullptr) { diff --git a/inference-engine/src/low_precision_transformations/src/weightable_layer_transformation.cpp b/inference-engine/src/low_precision_transformations/src/weightable_layer_transformation.cpp index 428dbba640256b..ab475794f7b84a 100644 --- a/inference-engine/src/low_precision_transformations/src/weightable_layer_transformation.cpp +++ b/inference-engine/src/low_precision_transformations/src/weightable_layer_transformation.cpp @@ -1,4 +1,4 @@ -// Copyright (C) 2020 Intel Corporation +// Copyright (C) 2020-2021 Intel Corporation // SPDX-License-Identifier: Apache-2.0 // @@ -88,55 +88,125 @@ bool WeightableLayerTransformation::canBeTransformed(const TransformationContext // TODO Implement similar checks in other weightable operaitons - std::shared_ptr reshapeFromWeights = as_type_ptr(layer->input_value(1).get_node_shared_ptr()); - std::shared_ptr fqFromWeights = as_type_ptr( - reshapeFromWeights == nullptr ? - layer->input_value(1).get_node_shared_ptr() : - layer->get_input_node_ptr(1)->get_input_node_shared_ptr(0)); + const std::shared_ptr reshapeFromWeights = as_type_ptr(layer->input_value(1).get_node_shared_ptr()); - if ((fqFromWeights == nullptr) || (fqFromWeights->get_input_size() != 5ul)) { - return false; + std::shared_ptr fqFromWeights; + if (reshapeFromWeights == nullptr) { + fqFromWeights = as_type_ptr(layer->input_value(1).get_node_shared_ptr()); + if (fqFromWeights == nullptr) { + const FakeQuantizeDequantization dequantization = NetworkHelper::getDequantization(layer, 1ul); + fqFromWeights = as_type_ptr(dequantization.data.get_node_shared_ptr()); + } + } else { + fqFromWeights = as_type_ptr(reshapeFromWeights->get_input_node_shared_ptr(0)); + if (fqFromWeights == nullptr) { + const FakeQuantizeDequantization dequantization = NetworkHelper::getDequantization(reshapeFromWeights, 0ul); + fqFromWeights = as_type_ptr(dequantization.data.get_node_shared_ptr()); + } } - const Shape constOutputShape = fqFromWeights->get_input_node_ptr(3)->get_output_shape(0); - if (fqFromWeights->get_input_node_ptr(4)->get_output_shape(0) != constOutputShape) { - return false; - } + if (fqFromWeights != nullptr) { + if ((!NetworkHelper::isQuantizeSupported(fqFromWeights)) || (fqFromWeights->get_input_size() != 5ul)) { + return false; + } - // Check if all dimensions of scale except the first one (which is O-Output channels dimension) are all ones - if ((shape_size(constOutputShape) != constOutputShape[0]) || - ((constOutputShape[0] != 1ul) && (fqFromWeights->get_output_shape(0)[0] != constOutputShape[0]))) { - return false; + const Shape constOutputShape = fqFromWeights->get_input_node_ptr(3)->get_output_shape(0); + if (fqFromWeights->get_input_node_ptr(4)->get_output_shape(0) != constOutputShape) { + return false; + } + + if ( // Check if all dimensions of scale except the first one (which is O-Output channels dimension) are all ones + (shape_size(constOutputShape) != constOutputShape[0]) || + ((constOutputShape[0] != 1ul) && (fqFromWeights->get_output_shape(0)[0] != constOutputShape[0]))) { + return false; + } + } else { + // TODO: LPT: is it possible to share with isQuantized? + const FakeQuantizeDequantization dequantizationOnWeights = reshapeFromWeights == nullptr ? + NetworkHelper::getDequantization(layer, 1ul) : + NetworkHelper::getDequantization(reshapeFromWeights, 0ul); + if (dequantizationOnWeights.empty()) { + return false; + } + + const opset1::Constant* weightsData = as_type(dequantizationOnWeights.data.get_node()); + if (weightsData == nullptr) { + return false; + } + + const ngraph::element::Type weightsDataPrecision = weightsData->output(0).get_element_type(); + if (!DataPrecision::isSupported(weightsDataPrecision)) { + return false; + } + + if ((dequantizationOnWeights.subtract != nullptr) && (dequantizationOnWeights.subtractConvert != nullptr)) { + const auto subtractConstantType = dequantizationOnWeights.subtractConstant->output(0).get_element_type(); + if (subtractConstantType != weightsDataPrecision) { + return false; + } + } } return true; } -bool WeightableLayerTransformation::isQuantized(std::shared_ptr layer, bool isReshape) const noexcept { - auto isFakeQuantize = [](std::shared_ptr layer) { - std::string opName = layer->get_type_name(); - return opName == "FakeQuantize"; - }; - - auto parentOnWeights = layer->get_input_node_shared_ptr(1); - std::string operationName = parentOnWeights->get_type_name(); - if (isReshape) { - if (operationName != "Reshape") { +bool WeightableLayerTransformation::isQuantized(std::shared_ptr layer, bool reshapeIsRequired) const noexcept { + FakeQuantizeDequantization dequantizationOnWeights; + if (reshapeIsRequired) { + const auto reshape = layer->get_input_node_shared_ptr(1); + if (!is_type(reshape)) { return false; } - parentOnWeights = parentOnWeights->get_input_node_shared_ptr(0); - return isFakeQuantize(parentOnWeights); + + if (is_type(reshape->get_input_node_shared_ptr(0))) { + const std::shared_ptr fq = as_type_ptr(reshape->get_input_node_shared_ptr(0)); + return NetworkHelper::isQuantizeSupported(fq); + } + + dequantizationOnWeights = NetworkHelper::getDequantization(reshape, 0); + } else if (is_type(layer->get_input_node_shared_ptr(1))) { + const std::shared_ptr fq = as_type_ptr(layer->get_input_node_shared_ptr(1)); + return NetworkHelper::isQuantizeSupported(fq); } else { - return isFakeQuantize(parentOnWeights); + dequantizationOnWeights = NetworkHelper::getDequantization(layer, 1); + } + + if (dequantizationOnWeights.empty()) { + return false; + } + + // TODO: LPT: is it possible to share with canBeTransformed? + if (is_type(dequantizationOnWeights.data.get_node())) { + const ngraph::element::Type weightsDataPrecision = dequantizationOnWeights.data.get_element_type(); + if (!DataPrecision::isSupported(weightsDataPrecision)) { + return false; + } + + if ((dequantizationOnWeights.subtract != nullptr) && (dequantizationOnWeights.subtractConvert != nullptr)) { + const auto subtractConstantType = dequantizationOnWeights.subtractConstant->output(0).get_element_type(); + if (subtractConstantType != weightsDataPrecision) { + return false; + } + } + + return true; + } else if (is_type(dequantizationOnWeights.data.get_node())) { + return true; } + + return false; } bool WeightableLayerTransformation::isPrecisionPreserved(std::shared_ptr layer) const noexcept { return false; } -DataPrecision WeightableLayerTransformation::decomposeFakeQuantizeForWeightsPath(std::shared_ptr node) const { +void WeightableLayerTransformation::decomposeFakeQuantizeForWeightsPath(std::shared_ptr node) const { const auto fq = getFakeQuantizeOnWeights(node); + if (fq == nullptr) { + return; + } + const QuantizationDetails quantizationDetails = QuantizationDetails::getDetails(fq); const DataPrecision dataPrecision = getDataPrecision(fq, quantizationDetails, true); auto tuple = NetworkHelper::decomposeFakeQuantize( @@ -151,8 +221,6 @@ DataPrecision WeightableLayerTransformation::decomposeFakeQuantizeForWeightsPath if (as_type_ptr(fqOnWeights) == nullptr) { THROW_IE_LPT_EXCEPTION(*fqOnWeights) << "FakeQuantize on weights was not folded to constant"; } - - return dataPrecision; } bool WeightableLayerTransformation::isGroup(const std::shared_ptr& layer) { diff --git a/inference-engine/src/mkldnn_plugin/CMakeLists.txt b/inference-engine/src/mkldnn_plugin/CMakeLists.txt index fc4c03aeb0be3b..a3b1d18559fad8 100644 --- a/inference-engine/src/mkldnn_plugin/CMakeLists.txt +++ b/inference-engine/src/mkldnn_plugin/CMakeLists.txt @@ -51,7 +51,8 @@ set(LAYERS ${CMAKE_CURRENT_SOURCE_DIR}/nodes/batch_to_space.cpp ${CMAKE_CURRENT_SOURCE_DIR}/nodes/broadcast.cpp ${CMAKE_CURRENT_SOURCE_DIR}/nodes/convert.cpp - ${CMAKE_CURRENT_SOURCE_DIR}/nodes/ctc_greedy.cpp + ${CMAKE_CURRENT_SOURCE_DIR}/nodes/ctc_greedy_decoder.cpp + ${CMAKE_CURRENT_SOURCE_DIR}/nodes/ctc_greedy_decoder_seq_len.cpp ${CMAKE_CURRENT_SOURCE_DIR}/nodes/ctc_loss.cpp ${CMAKE_CURRENT_SOURCE_DIR}/nodes/depth_to_space.cpp ${CMAKE_CURRENT_SOURCE_DIR}/nodes/detectionoutput.cpp @@ -99,6 +100,7 @@ set(LAYERS ${CMAKE_CURRENT_SOURCE_DIR}/nodes/unsqueeze.cpp ${CMAKE_CURRENT_SOURCE_DIR}/nodes/common/softmax.cpp ${CMAKE_CURRENT_SOURCE_DIR}/nodes/common/emitter.cpp + ${CMAKE_CURRENT_SOURCE_DIR}/nodes/common/jit_load_store_emitters.cpp ${CMAKE_CURRENT_SOURCE_DIR}/nodes/jit_eltwise_emitters.cpp ${CMAKE_CURRENT_SOURCE_DIR}/nodes/jit_mkldnn_emitters.cpp @@ -161,6 +163,14 @@ ie_add_plugin(NAME ${TARGET_NAME} set_ie_threading_interface_for(${TARGET_NAME}) +if(SELECTIVE_BUILD STREQUAL "ON") + # After disabling a block of code, some variables might be unused. + if(CMAKE_CXX_COMPILER_ID STREQUAL "GNU" + OR CMAKE_CXX_COMPILER_ID MATCHES "^(Apple)?Clang$") + target_compile_options(${TARGET_NAME} PRIVATE -Wno-unused-variable) + endif() +endif() + target_link_libraries(${TARGET_NAME} PRIVATE mkldnn inference_engine inference_engine_legacy inference_engine_transformations inference_engine_lp_transformations openvino::conditional_compilation) diff --git a/inference-engine/src/mkldnn_plugin/bf16transformer.h b/inference-engine/src/mkldnn_plugin/bf16transformer.h index 947f6ebab02d1b..811c0fa5bd43a6 100644 --- a/inference-engine/src/mkldnn_plugin/bf16transformer.h +++ b/inference-engine/src/mkldnn_plugin/bf16transformer.h @@ -14,7 +14,8 @@ namespace MKLDNNPlugin { class BF16Transformer { const InferenceEngine::details::caseless_set _initbf16 = - { "convolution", "fullyconnected", "innerproduct", "gemm", "RegionYolo", "Interpolate", "PSROIPooling" }; + { "convolution", "fullyconnected", "innerproduct", "gemm", "RegionYolo", "Interpolate", "PSROIPooling", "Deconvolution" }; + const InferenceEngine::details::caseless_set _complementbf16 = { "relu", "tanh", "elu", "square", "abs", "sqrt", "linear", "bounded_relu", "soft_relu", "normalize", "sigmoid", "ReLU6", "not", "activation", "HSwish", "mish", "logistic", "mod", "resample", diff --git a/inference-engine/src/mkldnn_plugin/mkldnn_async_infer_request.cpp b/inference-engine/src/mkldnn_plugin/mkldnn_async_infer_request.cpp index 1e5db742d80ef7..79c2b045d2bc24 100644 --- a/inference-engine/src/mkldnn_plugin/mkldnn_async_infer_request.cpp +++ b/inference-engine/src/mkldnn_plugin/mkldnn_async_infer_request.cpp @@ -8,7 +8,9 @@ MKLDNNPlugin::MKLDNNAsyncInferRequest::MKLDNNAsyncInferRequest(const InferenceEngine::InferRequestInternal::Ptr& inferRequest, const InferenceEngine::ITaskExecutor::Ptr& taskExecutor, const InferenceEngine::ITaskExecutor::Ptr& callbackExecutor) - : InferenceEngine::AsyncInferRequestThreadSafeDefault(inferRequest, taskExecutor, callbackExecutor) {} + : InferenceEngine::AsyncInferRequestThreadSafeDefault(inferRequest, taskExecutor, callbackExecutor) { + static_cast(inferRequest.get())->SetAsyncRequest(this); +} void MKLDNNPlugin::MKLDNNAsyncInferRequest::Infer_ThreadUnsafe() { InferUsingAsync(); diff --git a/inference-engine/src/mkldnn_plugin/mkldnn_graph.cpp b/inference-engine/src/mkldnn_plugin/mkldnn_graph.cpp index 80643e689a50e5..a6c53f219176bd 100644 --- a/inference-engine/src/mkldnn_plugin/mkldnn_graph.cpp +++ b/inference-engine/src/mkldnn_plugin/mkldnn_graph.cpp @@ -20,6 +20,7 @@ #include "mkldnn_extension_mngr.h" #include "mkldnn_memory_solver.hpp" #include "mkldnn_itt.h" +#include "mkldnn_infer_request.h" #include #include @@ -755,7 +756,7 @@ void MKLDNNGraph::PullOutputData(BlobMap &out) { } } -void MKLDNNGraph::Infer(int batch) { +void MKLDNNGraph::Infer(MKLDNNInferRequest* request, int batch) { if (!IsReady()) { THROW_IE_EXCEPTION << "Wrong state. Topology is not ready."; } @@ -763,9 +764,8 @@ void MKLDNNGraph::Infer(int batch) { mkldnn::stream stream(eng); for (int i = 0; i < graphNodes.size(); i++) { - if (IsCancellationRequested()) { - ResetCancellationRequest(); - THROW_IE_EXCEPTION << InferenceEngine::details::as_status << InferenceEngine::INFER_CANCELLED; + if (request != nullptr) { + request->ThrowIfCanceled(); } PERF(graphNodes[i]); diff --git a/inference-engine/src/mkldnn_plugin/mkldnn_graph.h b/inference-engine/src/mkldnn_plugin/mkldnn_graph.h index dee26a4f142ef4..f35a2135a88e50 100644 --- a/inference-engine/src/mkldnn_plugin/mkldnn_graph.h +++ b/inference-engine/src/mkldnn_plugin/mkldnn_graph.h @@ -19,7 +19,7 @@ #include namespace MKLDNNPlugin { - +class MKLDNNInferRequest; class MKLDNNGraph { public: typedef std::shared_ptr Ptr; @@ -30,7 +30,7 @@ class MKLDNNGraph { Ready = 1, }; - MKLDNNGraph(mkldnn::engine eng = mkldnn::engine(mkldnn::engine::kind::cpu, 0)) : status(NotReady), eng(eng), cancelation_requested(false) {} + MKLDNNGraph(mkldnn::engine eng = mkldnn::engine(mkldnn::engine::kind::cpu, 0)) : status(NotReady), eng(eng) {} Status GetStatus() { return status; @@ -40,10 +40,6 @@ class MKLDNNGraph { return (GetStatus() == Ready); } - void Cancel() { - cancelation_requested.store(true); - } - void setConfig(const Config &cfg); void setProperty(const std::map &properties); Config getProperty(); @@ -63,7 +59,7 @@ class MKLDNNGraph { void PushInputData(const std::string& name, const InferenceEngine::Blob::Ptr &in); void PullOutputData(InferenceEngine::BlobMap &out); - void Infer(int batch = -1); + void Infer(MKLDNNInferRequest* request = nullptr, int batch = -1); std::vector& GetNodes() { return graphNodes; @@ -129,14 +125,6 @@ class MKLDNNGraph { void SortTopologically(); protected: - bool IsCancellationRequested() const { - return cancelation_requested.load(); - } - - void ResetCancellationRequest() { - cancelation_requested.store(false); - } - void VisitNode(MKLDNNNodePtr node, std::vector& sortedNodes); void ForgetGraphData() { @@ -198,8 +186,6 @@ class MKLDNNGraph { InferenceEngine::CNNLayerPtr cnnLayer; size_t outIdx; }; - - std::atomic cancelation_requested; }; } // namespace MKLDNNPlugin diff --git a/inference-engine/src/mkldnn_plugin/mkldnn_graph_optimizer.cpp b/inference-engine/src/mkldnn_plugin/mkldnn_graph_optimizer.cpp index bc0ade25d957da..46d32b9981d8fe 100644 --- a/inference-engine/src/mkldnn_plugin/mkldnn_graph_optimizer.cpp +++ b/inference-engine/src/mkldnn_plugin/mkldnn_graph_optimizer.cpp @@ -216,7 +216,7 @@ void MKLDNNGraphOptimizer::FuseConvolutionAndZeroPoints(MKLDNNGraph &graph) { return true; }; - auto initializeInputZeroPoints = [](MKLDNNNodePtr node, MKLDNNNodePtr parent0) { + auto initializeInputZeroPoints = [](MKLDNNNodePtr node, MKLDNNNodePtr parent0, MKLDNNNodePtr parent1) { auto* convNode = dynamic_cast(node.get()); if (convNode == nullptr) THROW_IE_EXCEPTION << "Cannot get convolution node " << node->getName(); @@ -225,6 +225,14 @@ void MKLDNNGraphOptimizer::FuseConvolutionAndZeroPoints(MKLDNNGraph &graph) { int OC = node->getChildEdgesAtPort(0)[0]->getDims()[1]; if (parent0->getType() == Eltwise) { + // The plug-in doesn't support FP32 convolution with input/weights zero points. + // In case weights are in FP32 (or we have zero points on weights which are not supported by INT8 convolution) we cannot use + // INT8 implementation so we have to disable input zero points fusing as well. + auto weightsLayer = parent1->getCnnLayer(); + if (!weightsLayer || weightsLayer->type != "Const" || weightsLayer->outData[0]->getPrecision() != Precision::I8) { + return false; + } + auto* eltwiseNode = dynamic_cast(parent0.get()); if (eltwiseNode->getOpType() != Subtract) return false; @@ -395,7 +403,8 @@ void MKLDNNGraphOptimizer::FuseConvolutionAndZeroPoints(MKLDNNGraph &graph) { if (!isSutableConvNode(conv)) continue; auto dataEltwise = conv->getParentEdgesAtPort(0)[0]->getParent(); - if (initializeInputZeroPoints(conv, dataEltwise)) { + auto weightsEltwise = conv->getParentEdgesAtPort(1)[0]->getParent(); + if (initializeInputZeroPoints(conv, dataEltwise, weightsEltwise)) { auto p_edge = dataEltwise->getParentEdgesAtPort(1)[0]; removeEdge(graph, p_edge); diff --git a/inference-engine/src/mkldnn_plugin/mkldnn_infer_request.cpp b/inference-engine/src/mkldnn_plugin/mkldnn_infer_request.cpp index be09a3dd335e87..804d7e6b11c37c 100644 --- a/inference-engine/src/mkldnn_plugin/mkldnn_infer_request.cpp +++ b/inference-engine/src/mkldnn_plugin/mkldnn_infer_request.cpp @@ -17,6 +17,7 @@ #include "mkldnn_memory_state.h" #include "nodes/mkldnn_memory_node.hpp" #include "nodes/common/cpu_memcpy.h" +#include "mkldnn_async_infer_request.h" MKLDNNPlugin::MKLDNNInferRequest::MKLDNNInferRequest(InferenceEngine::InputsDataMap networkInputs, InferenceEngine::OutputsDataMap networkOutputs, @@ -30,13 +31,11 @@ MKLDNNPlugin::MKLDNNInferRequest::MKLDNNInferRequest(InferenceEngine::InputsData THROW_IE_EXCEPTION << "No graph was found"; graph = execNetwork->_graphs.begin()->get(); for (const auto& it : _networkInputs) { - InferenceEngine::Blob::Ptr blob; - MKLDNNInferRequest::GetBlob(it.first.c_str(), blob); + MKLDNNInferRequest::GetBlob(it.first); } // Allocate all output blobs for (const auto& it : _networkOutputs) { - InferenceEngine::Blob::Ptr blob; - MKLDNNInferRequest::GetBlob(it.first.c_str(), blob); + MKLDNNInferRequest::GetBlob(it.first); } // Save all MemoryLayer data tensors. Will use insight about mechanics @@ -178,43 +177,47 @@ void MKLDNNPlugin::MKLDNNInferRequest::InferImpl() { graph = execNetwork->_graphs.local().get(); + ThrowIfCanceled(); + execDataPreprocessing(_inputs); changeDefaultPtr(); + ThrowIfCanceled(); + PushInputData(); if (memoryStates.size() != 0) { PushStates(); } - graph->Infer(m_curBatch); + graph->Infer(this, m_curBatch); if (memoryStates.size() != 0) { PullStates(); } - graph->PullOutputData(_outputs); -} + ThrowIfCanceled(); -InferenceEngine::StatusCode MKLDNNPlugin::MKLDNNInferRequest::Cancel() { - graph->Cancel(); - return InferenceEngine::OK; + graph->PullOutputData(_outputs); } -void MKLDNNPlugin::MKLDNNInferRequest::GetPerformanceCounts( - std::map &perfMap) const { +std::map MKLDNNPlugin::MKLDNNInferRequest::GetPerformanceCounts() const { if (!graph || !graph->IsReady()) THROW_IE_EXCEPTION << "Graph is not ready!"; + std::map perfMap; graph->GetPerfData(perfMap); + return perfMap; } -void MKLDNNPlugin::MKLDNNInferRequest::GetBlob(const char *name, InferenceEngine::Blob::Ptr &data) { +InferenceEngine::Blob::Ptr MKLDNNPlugin::MKLDNNInferRequest::GetBlob(const std::string& name) { OV_ITT_SCOPED_TASK(itt::domains::MKLDNNPlugin, "GetBlob"); if (!graph || !graph->IsReady()) THROW_IE_EXCEPTION << "Graph is not ready!"; + InferenceEngine::Blob::Ptr data; + InferenceEngine::BlobMap blobs; graph->getInputBlobs(blobs); @@ -223,13 +226,13 @@ void MKLDNNPlugin::MKLDNNInferRequest::GetBlob(const char *name, InferenceEngine auto it = _preProcData.find(name); if (it != _preProcData.end()) { data = it->second->getRoiBlob(); - return; + return data; } if (_inputs.find(name) != _inputs.end()) { data = _inputs[name]; checkBlob(data, name, true); - return; + return data; } InferenceEngine::TensorDesc desc = blobs[name]->getTensorDesc(); @@ -250,7 +253,7 @@ void MKLDNNPlugin::MKLDNNInferRequest::GetBlob(const char *name, InferenceEngine } data = _inputs[name]; checkBlob(data, name, true); - return; + return data; } blobs.clear(); graph->getOutputBlobs(blobs); @@ -258,7 +261,7 @@ void MKLDNNPlugin::MKLDNNInferRequest::GetBlob(const char *name, InferenceEngine if (_outputs.find(name) != _outputs.end()) { data = _outputs[name]; checkBlob(data, name, false); - return; + return data; } InferenceEngine::TensorDesc desc = blobs[name]->getTensorDesc(); @@ -277,14 +280,14 @@ void MKLDNNPlugin::MKLDNNInferRequest::GetBlob(const char *name, InferenceEngine } data = _outputs[name]; checkBlob(data, name, false); - return; + return data; } THROW_IE_EXCEPTION << "Cannot find blob with name: " << name; } -void MKLDNNPlugin::MKLDNNInferRequest::SetBlob(const char *name, const InferenceEngine::Blob::Ptr &data) { +void MKLDNNPlugin::MKLDNNInferRequest::SetBlob(const std::string& name, const InferenceEngine::Blob::Ptr &data) { OV_ITT_SCOPED_TASK(itt::domains::MKLDNNPlugin, "SetBlob"); - if (name == nullptr) { + if (name.empty()) { THROW_IE_EXCEPTION << NOT_FOUND_str + "Failed to set blob with empty name"; } @@ -474,3 +477,13 @@ void MKLDNNPlugin::MKLDNNInferRequest::SetBatch(int new_batch) { std::vector MKLDNNPlugin::MKLDNNInferRequest::QueryState() { return memoryStates; } + +void MKLDNNPlugin::MKLDNNInferRequest::SetAsyncRequest(MKLDNNAsyncInferRequest* asyncRequest) { + _asyncRequest = asyncRequest; +} + +void MKLDNNPlugin::MKLDNNInferRequest::ThrowIfCanceled() const { + if (_asyncRequest != nullptr) { + _asyncRequest->ThrowIfCanceled(); + } +} \ No newline at end of file diff --git a/inference-engine/src/mkldnn_plugin/mkldnn_infer_request.h b/inference-engine/src/mkldnn_plugin/mkldnn_infer_request.h index 6963bed2ba61be..21d9a3b6dfb8f1 100644 --- a/inference-engine/src/mkldnn_plugin/mkldnn_infer_request.h +++ b/inference-engine/src/mkldnn_plugin/mkldnn_infer_request.h @@ -13,6 +13,7 @@ namespace MKLDNNPlugin { class MKLDNNExecNetwork; +class MKLDNNAsyncInferRequest; class MKLDNNInferRequest : public InferenceEngine::InferRequestInternal { public: @@ -25,27 +26,26 @@ class MKLDNNInferRequest : public InferenceEngine::InferRequestInternal { void InferImpl() override; - InferenceEngine::StatusCode Cancel() override; + std::map GetPerformanceCounts() const override; - void GetPerformanceCounts(std::map &perfMap) const override; + void SetBlob(const std::string& name, const InferenceEngine::Blob::Ptr &data) override; + + InferenceEngine::Blob::Ptr GetBlob(const std::string& name) override; + + void SetBatch(int batch = -1) override; + + std::vector QueryState() override; /** - * @brief Given optional implementation of setting blob to avoid need for it to be implemented by plugin - * @param name - a name of input or output blob. - * @param data - a reference to input or output blob. The type of Blob must correspond to the network input precision and size. + * @brief Sets the pointer to asynchronous inference request that holds this request + * @param[in] asyncRequest Pointer to asynchronous inference request */ - void SetBlob(const char *name, const InferenceEngine::Blob::Ptr &data) override; + void SetAsyncRequest(MKLDNNAsyncInferRequest* asyncRequest); /** - * @brief Given optional implementation of getting blob to avoid need for it to be implemented by plugin - * @param name - a name of input or output blob. - * @param data - a reference to input or output blob. The type of Blob must correspond to the network input precision and size. + * @brief If `_asyncRequest` is initialized throw exception with `InferenceEngine::INFER_CANCELLED` status if inference request is canceled */ - void GetBlob(const char *name, InferenceEngine::Blob::Ptr &data) override; - - void SetBatch(int batch = -1) override; - - std::vector QueryState() override; + void ThrowIfCanceled() const; private: void PushInputData(); @@ -60,5 +60,6 @@ class MKLDNNInferRequest : public InferenceEngine::InferRequestInternal { std::map externalPtr; openvino::itt::handle_t profilingTask; std::vector memoryStates; + MKLDNNAsyncInferRequest* _asyncRequest = nullptr; }; } // namespace MKLDNNPlugin diff --git a/inference-engine/src/mkldnn_plugin/mkldnn_plugin.cpp b/inference-engine/src/mkldnn_plugin/mkldnn_plugin.cpp index 3200bfb81e6371..9a93cd89391df9 100644 --- a/inference-engine/src/mkldnn_plugin/mkldnn_plugin.cpp +++ b/inference-engine/src/mkldnn_plugin/mkldnn_plugin.cpp @@ -1,4 +1,4 @@ -// Copyright (C) 2018-2020 Intel Corporation +// Copyright (C) 2018-2021 Intel Corporation // SPDX-License-Identifier: Apache-2.0 // @@ -33,12 +33,14 @@ #include #include +#include "transformations/common_optimizations/convert_quantize_dequantize.hpp" #include #include #include #include #include #include +#include #include #include #include @@ -46,6 +48,7 @@ #include #include #include +#include #include #include #include @@ -57,6 +60,8 @@ #include #include #include +#include +#include #include #include @@ -66,19 +71,23 @@ #include -# include -# include -# include -# include +#include +#include +#include +#include +#include +#include -#if !defined(__arm__) && !defined(_M_ARM) && !defined(__aarch64__) && !defined(_M_ARM64) -#if defined(_WIN32) || defined(WIN32) -#include -#include -#else -#include +#include "nodes/mkldnn_mvn_node.h" +#include "nodes/mkldnn_quantize_node.h" -#endif +#if !defined(__arm__) && !defined(_M_ARM) && !defined(__aarch64__) && !defined(_M_ARM64) +# ifdef _WIN32 +# include +# include +# else +# include +# endif #endif using namespace MKLDNNPlugin; @@ -101,8 +110,18 @@ static void Transformation(CNNNetwork& clonedNetwork, const Config& conf) { ngraph::pass::Manager manager; manager.register_pass(); + + const bool useLpt = + (conf.lpTransformsMode == Config::LPTransformsMode::On) && + ngraph::pass::low_precision::LowPrecisionTransformer::isFunctionQuantized(nGraphFunc); + if (useLpt) { + manager.register_pass( + std::vector{ ngraph::element::i8, ngraph::element::u8 }); + } + // WA: ConvertPriorBox must be executed before the 1st ConstantFolding pass manager.register_pass(); + manager.register_pass(); manager.register_pass(); manager.register_pass(); manager.register_pass(); @@ -194,6 +213,11 @@ static void Transformation(CNNNetwork& clonedNetwork, const Config& conf) { return true; }); + pass_config->set_callback( + [](const_node_ptr &node) -> bool { + return MKLDNNMVNNode::checkAxesSuitability(node); + }); + // List of enabled/disabled transformations pass_config->disable(); pass_config->disable(); @@ -207,10 +231,21 @@ static void Transformation(CNNNetwork& clonedNetwork, const Config& conf) { pass_config->enable(); + if (useLpt) { + pass_config->set_callback([](const_node_ptr &node) -> bool { + return ngraph::pass::low_precision::NetworkHelper::areQuantizeAndDequantizeSupportedForMultiply(node); + }); + + pass_config->set_callback([](const_node_ptr &node) -> bool { + return ngraph::pass::low_precision::NetworkHelper::areQuantizeAndDequantizeSupportedForSubtract(node); + }); + } + manager.run_passes(nGraphFunc); using namespace ngraph::pass::low_precision; - if (conf.lpTransformsMode == Config::LPTransformsMode::On) { + if (useLpt) { + OV_ITT_SCOPED_TASK(MKLDNNPlugin::itt::domains::MKLDNN_LT, "LowPrecisionTransformations"); auto params = LayerTransformation::Params( true, // updatePrecisions LayerTransformation::QuantizedTensorAlignment::UpdateLevel, // quantizedTensorAlignmentOnActivations @@ -227,13 +262,22 @@ static void Transformation(CNNNetwork& clonedNetwork, const Config& conf) { transformer.transform(nGraphFunc); } + bool has_fake_quantize = ::ngraph::op::util::has_op_with_type(nGraphFunc); + ngraph::pass::Manager legacyManager; + + legacyManager.register_pass(); legacyManager.register_pass(); legacyManager.register_pass(ngraph::element::i64, ngraph::element::i32); // not legacy actually, but it should be the last transformation in the transformation pipeline legacyManager.register_pass(); auto legacyPassConfig = legacyManager.get_pass_config(); + + legacyPassConfig->set_callback([](const_node_ptr &node) -> bool { + return !MKLDNNQuantizeNode::isNeedToDecompose(node); + }); + legacyPassConfig->set_callback([](const_node_ptr &node) -> bool { if (auto mul_op = std::dynamic_pointer_cast(node)) { auto add_op = std::dynamic_pointer_cast(mul_op->get_input_node_shared_ptr(0)); @@ -248,15 +292,16 @@ static void Transformation(CNNNetwork& clonedNetwork, const Config& conf) { return false; }); - legacyManager.get_pass_config()->set_callback([](const_node_ptr &node) -> bool { + legacyPassConfig->set_callback([](const_node_ptr &node) -> bool { // UnrollTI transformation is disabled by default, is turned on by LowLatency transformation return node->get_rt_info().count("UNROLL_TI") == 0; }); + legacyManager.run_passes(nGraphFunc); OV_ITT_TASK_CHAIN(taskChain, MKLDNNPlugin::itt::domains::MKLDNN_LT, "Transformation", "convertFunctionToICNNNetwork"); - clonedNetwork = CNNNetwork(InferenceEngine::details::convertFunctionToICNNNetwork(nGraphFunc, clonedNetwork)); + clonedNetwork = CNNNetwork(InferenceEngine::details::convertFunctionToICNNNetwork(nGraphFunc, clonedNetwork, has_fake_quantize)); OV_ITT_TASK_NEXT(taskChain, "ConvertIOPrecision"); @@ -352,7 +397,7 @@ Parameter Engine::GetConfig(const std::string& name, const std::map(regs), regs[0]); #else __cpuid_count(regs[0], regs[1], regs[0], regs[1], regs[2], regs[3]); @@ -381,7 +426,7 @@ Parameter Engine::GetMetric(const std::string& name, const std::map(regs), regs[0]); #else __get_cpuid(regs[0], ®s[0], ®s[1], ®s[2], ®s[3]); diff --git a/inference-engine/src/mkldnn_plugin/nodes/common/defs.h b/inference-engine/src/mkldnn_plugin/nodes/common/defs.h index bc06c0e7e33b78..c93d1b34b470c8 100644 --- a/inference-engine/src/mkldnn_plugin/nodes/common/defs.h +++ b/inference-engine/src/mkldnn_plugin/nodes/common/defs.h @@ -5,29 +5,29 @@ #pragma once #if defined (HAVE_SSE) || defined (HAVE_AVX2) -#if defined (_WIN32) -#include -#else -#include -#endif +# if defined (_WIN32) +# include +# else +# include +# endif #endif -#if defined (WIN32) || defined (_WIN32) -#if defined (__INTEL_COMPILER) -#define DLSDK_EXT_IVDEP() __pragma(ivdep) -#elif defined(_MSC_VER) -#define DLSDK_EXT_IVDEP() __pragma(loop(ivdep)) -#else -#define DLSDK_EXT_IVDEP() -#endif +#ifdef _WIN32 +# if defined (__INTEL_COMPILER) +# define DLSDK_EXT_IVDEP() __pragma(ivdep) +# elif defined(_MSC_VER) +# define DLSDK_EXT_IVDEP() __pragma(loop(ivdep)) +# else +# define DLSDK_EXT_IVDEP() +# endif #elif defined(__linux__) -#if defined(__INTEL_COMPILER) -#define DLSDK_EXT_IVDEP() _Pragma("ivdep") -#elif defined(__GNUC__) -#define DLSDK_EXT_IVDEP() _Pragma("GCC ivdep") +# if defined(__INTEL_COMPILER) +# define DLSDK_EXT_IVDEP() _Pragma("ivdep") +# elif defined(__GNUC__) +# define DLSDK_EXT_IVDEP() _Pragma("GCC ivdep") +# else +# define DLSDK_EXT_IVDEP() +# endif #else -#define DLSDK_EXT_IVDEP() +# define DLSDK_EXT_IVDEP() #endif -#else -#define DLSDK_EXT_IVDEP() -#endif \ No newline at end of file diff --git a/inference-engine/src/mkldnn_plugin/nodes/common/emitter.cpp b/inference-engine/src/mkldnn_plugin/nodes/common/emitter.cpp index 35f4296a98cc6a..1aa4744b249bdf 100644 --- a/inference-engine/src/mkldnn_plugin/nodes/common/emitter.cpp +++ b/inference-engine/src/mkldnn_plugin/nodes/common/emitter.cpp @@ -56,9 +56,11 @@ std::set jit_emitter::get_supported_precisions() { return {InferenceEngine::Precision::FP32}; } -void jit_emitter::emitter_preamble(const std::vector &in_vec_idxs, const std::vector &pool_vec_idxs, - const std::vector &pool_gpr_idxs) { +void jit_emitter::emitter_preamble(const std::vector &in_idxs, const std::vector &out_idxs, + const std::vector &pool_vec_idxs, const std::vector &pool_gpr_idxs) { using namespace Xbyak::util; + bool is_vec_input = (in_out_type_ == emitter_in_out_map::vec_to_vec) || (in_out_type_ == emitter_in_out_map::vec_to_gpr); + bool is_vec_output = (in_out_type_ == emitter_in_out_map::vec_to_vec) || (in_out_type_ == emitter_in_out_map::gpr_to_vec); for (auto idx : pool_vec_idxs) aux_vec_idxs.push_back(idx); @@ -66,7 +68,10 @@ void jit_emitter::emitter_preamble(const std::vector &in_vec_idxs, const // For sse41 mask register has to be Xmm(0) if (host_isa_ == cpu::x64::sse41 && aux_vecs_count() > 0) { size_t idx = 0; - assert(std::find(in_vec_idxs.begin(), in_vec_idxs.end(), idx) == in_vec_idxs.end()); + if (is_vec_input) + assert(std::find(in_idxs.begin(), in_idxs.end(), idx) == in_idxs.end()); + if (is_vec_output) + assert(std::find(out_idxs.begin(), out_idxs.end(), idx) == out_idxs.end()); if (std::find(aux_vec_idxs.begin(), aux_vec_idxs.end(), idx) == aux_vec_idxs.end()) { aux_vec_idxs.push_back(idx); preserved_vec_idxs.push_back(idx); @@ -86,7 +91,12 @@ void jit_emitter::emitter_preamble(const std::vector &in_vec_idxs, const for (size_t idx = 0; idx < get_max_vecs_count(); idx++) { if (aux_vec_idxs.size() >= aux_vecs_count()) break; - if (std::find(in_vec_idxs.begin(), in_vec_idxs.end(), idx) != in_vec_idxs.end()) continue; + if (is_vec_input) { + if (std::find(in_idxs.begin(), in_idxs.end(), idx) != in_idxs.end()) continue; + } + if (is_vec_output) { + if (std::find(out_idxs.begin(), out_idxs.end(), idx) != out_idxs.end()) continue; + } if (std::find(aux_vec_idxs.begin(), aux_vec_idxs.end(), idx) != aux_vec_idxs.end()) continue; if (std::find(preserved_vec_idxs.begin(), preserved_vec_idxs.end(), idx) != preserved_vec_idxs.end()) continue; @@ -104,6 +114,12 @@ void jit_emitter::emitter_preamble(const std::vector &in_vec_idxs, const if (aux_gpr_idxs.size() >= aux_gprs_count()) break; if (_idx == Operand::RSP) continue; + if (!is_vec_input) { + if (std::find(in_idxs.begin(), in_idxs.end(), _idx) != in_idxs.end()) continue; + } + if (!is_vec_output) { + if (std::find(out_idxs.begin(), out_idxs.end(), _idx) != out_idxs.end()) continue; + } if (std::find(aux_gpr_idxs.begin(), aux_gpr_idxs.end(), _idx) != aux_gpr_idxs.end()) continue; if (std::find(preserved_gpr_idxs.begin(), preserved_gpr_idxs.end(), _idx) != preserved_gpr_idxs.end()) continue; @@ -113,8 +129,9 @@ void jit_emitter::emitter_preamble(const std::vector &in_vec_idxs, const assert(aux_gpr_idxs.size() == aux_gprs_count()); if (!entry_map_.empty()) { - p_table = Reg64(aux_gpr_idxs[0]); - aux_gpr_idxs.erase(aux_gpr_idxs.begin()); + // last aux_gpr_idx is for p_table, we can use aux_gpr_idxs from idx 0 for other purpose + p_table = Reg64(aux_gpr_idxs[aux_gprs_count() - 1]); + aux_gpr_idxs.erase(aux_gpr_idxs.end() - 1); } for (size_t i = 0; i < preserved_gpr_idxs.size(); ++i) @@ -131,7 +148,6 @@ void jit_emitter::emitter_preamble(const std::vector &in_vec_idxs, const load_table_addr(); } - void jit_emitter::emitter_postamble() { using namespace Xbyak::util; @@ -141,7 +157,7 @@ void jit_emitter::emitter_postamble() { if (preserved_vec_idxs.size()) h->add(h->rsp, preserved_vec_idxs.size() * get_vec_length()); - for (int i = aux_gprs_count() - 1; i >= 0; --i) + for (int i = preserved_gpr_idxs.size() - 1; i >= 0; --i) h->pop(Reg64(preserved_gpr_idxs[i])); preserved_vec_idxs.clear(); @@ -182,11 +198,21 @@ void jit_emitter::prepare_table() { } } -void jit_emitter::emit(const std::vector &in_vec_idxs, const std::vector &out_vec_idxs, +void jit_emitter::emit(const std::vector &in_idxs, const std::vector &out_idxs, + const std::vector &pool_vec_idxs, const std::vector &pool_gpr_idxs) { + emitter_preamble(in_idxs, out_idxs, pool_vec_idxs, pool_gpr_idxs); + + emit_impl(in_idxs, out_idxs, pool_vec_idxs, pool_gpr_idxs, nullptr); + + emitter_postamble(); +} + +void jit_emitter::emit(const std::vector &in_idxs, const std::vector &out_idxs, + const std::shared_ptr &emit_context, const std::vector &pool_vec_idxs, const std::vector &pool_gpr_idxs) { - emitter_preamble(in_vec_idxs, pool_vec_idxs, pool_gpr_idxs); + emitter_preamble(in_idxs, out_idxs, pool_vec_idxs, pool_gpr_idxs); - emit_impl(in_vec_idxs, out_vec_idxs, pool_vec_idxs, pool_gpr_idxs); + emit_impl(in_idxs, out_idxs, pool_vec_idxs, pool_gpr_idxs, emit_context.get()); emitter_postamble(); } diff --git a/inference-engine/src/mkldnn_plugin/nodes/common/emitter.h b/inference-engine/src/mkldnn_plugin/nodes/common/emitter.h index fe59cc7d73adc2..5f6428c2fa50a6 100644 --- a/inference-engine/src/mkldnn_plugin/nodes/common/emitter.h +++ b/inference-engine/src/mkldnn_plugin/nodes/common/emitter.h @@ -12,15 +12,30 @@ namespace MKLDNNPlugin { +enum emitter_in_out_map { + vec_to_vec, + vec_to_gpr, + gpr_to_vec, + gpr_to_gpr, +}; + +struct emitter_context { + virtual ~emitter_context() = default; +}; + class jit_emitter { public: jit_emitter(mkldnn::impl::cpu::x64::jit_generator* host, mkldnn::impl::cpu::x64::cpu_isa_t host_isa, const MKLDNNNode* node, - InferenceEngine::Precision exec_prc = InferenceEngine::Precision::FP32) - : h(host), host_isa_(host_isa), n(node), exec_prc_(exec_prc) { + InferenceEngine::Precision exec_prc = InferenceEngine::Precision::FP32, emitter_in_out_map in_out_type = emitter_in_out_map::vec_to_vec) + : h(host), host_isa_(host_isa), n(node), exec_prc_(exec_prc), in_out_type_(in_out_type) { k_mask = Xbyak::Opmask(1); // FIXME: in general case we need preserve k_mask state as well } - virtual void emit(const std::vector &in_vec_idxs, const std::vector &out_vec_idxs, + virtual void emit(const std::vector &in_idxs, const std::vector &out_idxs, + const std::vector &pool_vec_idxs = {}, const std::vector &pool_gpr_idxs = {}); + + virtual void emit(const std::vector &in_idxs, const std::vector &out_idxs, + const std::shared_ptr &emit_context, const std::vector &pool_vec_idxs = {}, const std::vector &pool_gpr_idxs = {}); virtual void emit_table(); virtual size_t get_inputs_num() = 0; @@ -72,13 +87,16 @@ class jit_emitter { _cmp_gt_os = mkldnn::impl::cpu::x64::jit_generator::_cmp_nle_us, }; - virtual void emit_impl(const std::vector &in_vec_idxs, const std::vector &out_vec_idxs, - const std::vector &pool_vec_idxs, const std::vector &pool_gpr_idxs) {} + virtual void emit_impl(const std::vector &in_idxs, const std::vector &out_idxs, + const std::vector &pool_vec_idxs, const std::vector &pool_gpr_idxs, + const emitter_context *emit_context) = 0; - virtual void emitter_preamble(const std::vector &in_vec_idxs, const std::vector &pool_vec_idxs, - const std::vector &pool_gpr_idxs); + virtual void emitter_preamble(const std::vector &in_idxs, const std::vector &out_idxs, + const std::vector &pool_vec_idxs, const std::vector &pool_gpr_idxs); virtual void emitter_postamble(); + emitter_in_out_map in_out_type_; + std::vector aux_vec_idxs; std::vector aux_gpr_idxs; diff --git a/inference-engine/src/mkldnn_plugin/nodes/common/jit_load_store_emitters.cpp b/inference-engine/src/mkldnn_plugin/nodes/common/jit_load_store_emitters.cpp new file mode 100644 index 00000000000000..75850b92b79eea --- /dev/null +++ b/inference-engine/src/mkldnn_plugin/nodes/common/jit_load_store_emitters.cpp @@ -0,0 +1,878 @@ +// Copyright (C) 2020 Intel Corporation +// SPDX-License-Identifier: Apache-2.0 +// + +#include "emitter.h" +#include "jit_load_store_emitters.h" +#include "legacy/ie_layers.h" +#include +#include "utils/bfloat16.hpp" + +using namespace InferenceEngine; +using namespace mkldnn::impl; +using namespace mkldnn::impl::utils; +using namespace mkldnn::impl::cpu; +using namespace mkldnn::impl::cpu::x64; +using namespace Xbyak; +using namespace Xbyak::util; + +namespace MKLDNNPlugin { + +/// LOAD /// +jit_load_emitter::jit_load_emitter(jit_generator *host, cpu_isa_t host_isa, const MKLDNNNode* node, + Precision exec_prc, emitter_in_out_map in_out_type) +: jit_emitter(host, host_isa, node, exec_prc, in_out_type) { + prepare_table(); + v_len_elt = get_vec_length() / exec_prc.size(); +} + +size_t jit_load_emitter::get_inputs_num() { return 1; } + +// 0 for temp reg for mask load, 1 for table address +size_t jit_load_emitter::aux_gprs_count() const { + return 2; +} + +void jit_load_emitter::emit_impl(const std::vector &in_idxs, const std::vector &out_idxs, + const std::vector &pool_vec_idxs, const std::vector &pool_gpr_idxs, + const emitter_context *emit_context) { + const auto* load_emitter_context = dynamic_cast(emit_context); + if (load_emitter_context == nullptr) { + THROW_IE_EXCEPTION << "Load emitter in " << n->getName() << " does not get load emmiter context."; + } + + if (host_isa_ == cpu::x64::sse41) { + emit_isa(Reg64(in_idxs[0]), load_emitter_context->offset_byte_, load_emitter_context->src_prc_, static_cast(out_idxs[0]), + load_emitter_context->dst_prc_, load_emitter_context->load_num_, load_emitter_context->is_fill_, load_emitter_context->fill_value_); + } else if (host_isa_ == cpu::x64::avx2) { + emit_isa(Reg64(in_idxs[0]), load_emitter_context->offset_byte_, load_emitter_context->src_prc_, static_cast(out_idxs[0]), + load_emitter_context->dst_prc_, load_emitter_context->load_num_, load_emitter_context->is_fill_, load_emitter_context->fill_value_); + } else if (host_isa_ == cpu::x64::avx512_common) { + emit_isa(Reg64(in_idxs[0]), load_emitter_context->offset_byte_, load_emitter_context->src_prc_, static_cast(out_idxs[0]), + load_emitter_context->dst_prc_, load_emitter_context->load_num_, load_emitter_context->is_fill_, load_emitter_context->fill_value_); + } else { + THROW_IE_EXCEPTION << "Load emitter in " << n->getName() << " is performed on unsupported isa(at least x64::sse41)."; + } +} + +template +void jit_load_emitter::emit_isa(const Xbyak::Reg64 ®_src, int offset_byte, InferenceEngine::Precision src_prc, + const int out_vec_idx, InferenceEngine::Precision dst_prc, int load_num, bool is_fill, std::string fill_value) const { + bool matched_prc = (dst_prc == src_prc) || (dst_prc == Precision::FP32) || (dst_prc == Precision::I32); + if (!matched_prc) { + THROW_IE_EXCEPTION << "Load emitter in " << n->getName() << " only support output precision of FP32 or I32 or the same precision as input."; + } + if (load_num > (get_vec_length() / dst_prc.size())) { + THROW_IE_EXCEPTION << "Load emitter in " << n->getName() << " have unexpected number of elements to load."; + } + + using Vmm = typename conditional3::type; + + // pure load + if (src_prc == dst_prc) { + load_bytes(Vmm(out_vec_idx), reg_src, offset_byte, load_num * src_prc.size(), is_fill, fill_value); + } else { + // "pure load" + convert. dst_prc must be FP32 or I32. + switch (src_prc) { + case Precision::FP32: + case Precision::I32: + load_bytes(Vmm(out_vec_idx), reg_src, offset_byte, load_num * src_prc.size(), is_fill, fill_value); + break; + case Precision::I8: + load_bytes_to_dword_extension(Vmm(out_vec_idx), reg_src, offset_byte, true, load_num * src_prc.size(), is_fill, fill_value); + break; + case Precision::U8: + load_bytes_to_dword_extension(Vmm(out_vec_idx), reg_src, offset_byte, false, load_num * src_prc.size(), is_fill, fill_value); + break; + case Precision::I16: + load_words_to_dword_extension(Vmm(out_vec_idx), reg_src, offset_byte, false, true, load_num * src_prc.size(), is_fill, fill_value); + break; + case Precision::U16: + load_words_to_dword_extension(Vmm(out_vec_idx), reg_src, offset_byte, false, false, load_num * src_prc.size(), is_fill, fill_value); + break; + case Precision::BF16: + load_words_to_dword_extension(Vmm(out_vec_idx), reg_src, offset_byte, true, false, load_num * src_prc.size(), is_fill, fill_value); + break; + default: + THROW_IE_EXCEPTION << "Load emitter in " << n->getName() << " has unsupported src precision to load."; + } + } + + // post convert between I32 and FP32 + if (src_prc != dst_prc) { + switch (dst_prc) { + case Precision::FP32: + if ((src_prc != Precision::FP32) && (src_prc != Precision::BF16)) + h->uni_vcvtdq2ps(Vmm(out_vec_idx), Vmm(out_vec_idx)); + break; + case Precision::I32: + if ((src_prc == Precision::FP32) || (src_prc == Precision::BF16)) + h->uni_vcvtps2dq(Vmm(out_vec_idx), Vmm(out_vec_idx)); + break; + default: + break; + } + } +} + +/** +* load_bytes is the utility function to facilitate loading of +* load_size (0 <= load_size <= 64) many contiguous bytes into the Xmm/Ymm/Zmm +* register from the memory referenced by ptr[reg + offset] address. +* +* Functionally, invocation of load_bytes is equivalent to +* the following loop: +* +* for (int idx = 0; idx < load_size; ++idx) +* vpinsrb(vmm, vmm, ptr[reg + offset + idx], idx); +* +*/ +template +void jit_load_emitter::load_bytes(const Vmm &vmm, const Xbyak::Reg64 ®, int offset, int load_size, + bool is_fill, std::string fill_value) const { + constexpr bool is_xmm = std::is_same::value; + constexpr bool is_ymm = std::is_same::value; + constexpr bool is_zmm = std::is_same::value; + + MAYBE_UNUSED(is_xmm); + MAYBE_UNUSED(is_ymm); + MAYBE_UNUSED(is_zmm); + + // Ensure data fits completely inside the Xmm/Ymm/Zmm register + if (load_size < 0 || load_size > 64) + THROW_IE_EXCEPTION << "Load emitter in " << n->getName() << " has unexpected number of values to load in load_byte."; + // check if proper number bytes fit inside the Xmm/Ymm register + if (is_ymm && load_size > 32) + THROW_IE_EXCEPTION << "Load emitter in " << n->getName() << " has unexpected number of values to load to ymm in load_byte."; + if (is_xmm && load_size > 16) + THROW_IE_EXCEPTION << "Load emitter in " << n->getName() << " has unexpected number of values to load to xmm in load_byte."; + + auto xmm = Xbyak::Xmm(vmm.getIdx()); + auto ymm = Xbyak::Ymm(vmm.getIdx()); + auto zmm = Xbyak::Zmm(vmm.getIdx()); + + // addr(i) denotes the memory pointed by ptr[reg + offset + (i bytes)] + const auto addr = [&](int bytes_offset) { + return ptr[reg + offset + bytes_offset * sizeof(int8_t)]; + }; + + if (is_zmm && (load_size != 64) && mayiuse(cpu::x64::avx512_core)) { + uint64_t mask = 1; + mask = (mask << load_size) - mask; + h->mov(Reg64(aux_gpr_idxs[0]), mask); + h->kmovq(k_mask, Reg64(aux_gpr_idxs[0])); + h->vmovdqu8(zmm | k_mask | T_z, addr(0)); + } else { + if (load_size == 64) { + h->uni_vmovdqu(zmm, addr(0)); + } else if (load_size == 32) { + h->uni_vmovdqu(ymm, addr(0)); + } else if (load_size == 16) { + h->uni_vmovdqu(xmm, addr(0)); + } else { + int start_bytes = 0; + int bytes_to_load = load_size; + + bool has_ymm_block = false; + if (bytes_to_load > 32) { + // Prepare to insert to upper bits of zmm + start_bytes += 32; + bytes_to_load -= 32; + has_ymm_block = true; + } + + bool has_xmm_block = false; + if (bytes_to_load > 16) { + // Prepare to insert to upper bits of ymm + start_bytes += 16; + bytes_to_load -= 16; + has_xmm_block = true; + } + + if (bytes_to_load >= 8 && bytes_to_load < 16) + h->pinsrq(xmm, addr(start_bytes), 0); + else if (bytes_to_load == 16) + h->uni_vmovdqu(xmm, addr(start_bytes)); + + switch (bytes_to_load) { + case 0: break; + case 1: h->uni_vpinsrb(xmm, xmm, addr(start_bytes), 0); break; + case 2: h->uni_vpinsrw(xmm, xmm, addr(start_bytes), 0); break; + case 3: + h->uni_vpinsrw(xmm, xmm, addr(start_bytes), 0); + h->uni_vpinsrb(xmm, xmm, addr(start_bytes + 2), 2); + break; + case 4: h->pinsrd(xmm, addr(start_bytes), 0); break; + case 5: + h->pinsrd(xmm, addr(start_bytes), 0); + h->uni_vpinsrb(xmm, xmm, addr(start_bytes + 4), 4); + break; + case 6: + h->pinsrd(xmm, addr(start_bytes), 0); + h->uni_vpinsrw(xmm, xmm, addr(start_bytes + 4), 2); + break; + case 7: + h->pinsrd(xmm, addr(start_bytes), 0); + h->uni_vpinsrw(xmm, xmm, addr(start_bytes + 4), 2); + h->uni_vpinsrb(xmm, xmm, addr(start_bytes + 6), 6); + break; + case 8: break; + case 9: h->uni_vpinsrb(xmm, xmm, addr(start_bytes + 8), 8); break; + case 10: h->uni_vpinsrw(xmm, xmm, addr(start_bytes + 8), 4); break; + case 11: + h->uni_vpinsrw(xmm, xmm, addr(start_bytes + 8), 4); + h->uni_vpinsrb(xmm, xmm, addr(start_bytes + 10), 10); + break; + case 12: h->pinsrd(xmm, addr(start_bytes + 8), 2); break; + case 13: + h->pinsrd(xmm, addr(start_bytes + 8), 2); + h->uni_vpinsrb(xmm, xmm, addr(start_bytes + 12), 12); + break; + case 14: + h->pinsrd(xmm, addr(start_bytes + 8), 2); + h->uni_vpinsrw(xmm, xmm, addr(start_bytes + 12), 6); + break; + case 15: + h->pinsrd(xmm, addr(start_bytes + 8), 2); + h->uni_vpinsrw(xmm, xmm, addr(start_bytes + 12), 6); + h->uni_vpinsrb(xmm, xmm, addr(start_bytes + 14), 14); + break; + case 16: break; + default: + THROW_IE_EXCEPTION << "Load emitter in " << n->getName() << " has unexpected number of values to load in load_byte."; + } + + if (has_xmm_block) { + h->vinsertf128(ymm, ymm, xmm, 1); // insert to upper bits of ymm + if (has_ymm_block) + h->vinsertf128(ymm, ymm, addr(32), 0); // insert to lower bits of ymm + else + h->vinsertf128(ymm, ymm, addr(0), 0); // insert to lower bits of ymm + } + + if (has_ymm_block) { + h->vinsertf64x4(zmm, zmm, ymm, 1); // insert to upper bits of zmm + h->vinsertf64x4(zmm, zmm, addr(0), 0); // insert to lower bits of zmm + } + } + } + + if (is_fill) + fill_with_default(vmm, fill_value, load_size / 4); +} + +/** +* load_bytes_to_dword_extension is the utility function to facilitate +* loading of load_size (0 <= load_size <= 16) many contiguous bytes in +* the xmm register from the memory referenced by ptr[reg + offset] +* address and then do signed/zero extension of those to double words. +* +* Functionally, invocation of load_bytes_to_dword_extension is equivalent +* to the following: +* +* for (int idx = 0; idx < load_size; ++idx) +* vpinsrb(vmm, vmm, ptr[reg + offset + idx], idx); +* if (is_signed) vpmovsxbd(vmm, vmm); else vpmovzxbd(vmm, vmm); +* +* Valid values for the load_size variable are: +* [0..4] for XMM version of the function, i.e. 4 bytes -> 4 * 32 bit == 128 bit +* [0..8] for YMM version of the function. i.e. 8 bytes -> 8 * 32 bit == 256 bit +* [0..16] for ZMM version of the function. i.e. 16 bytes -> 16 * 32 bit == 512 bit +*/ + +template +void jit_load_emitter::load_bytes_to_dword_extension(const Vmm &vmm, const Xbyak::Reg64 ®, + int offset, bool is_signed, int load_size, bool is_fill, std::string fill_value) const { + constexpr bool is_xmm = std::is_same::value; + constexpr bool is_ymm = std::is_same::value; + constexpr bool is_zmm = std::is_same::value; + + MAYBE_UNUSED(is_xmm); + MAYBE_UNUSED(is_ymm); + MAYBE_UNUSED(is_zmm); + + // Ensure extended double words fit inside Zmm (32 * load_size <= 512) + // For Ymm register, load capacity is halved (32 * load_size <= 256) + // For Xmm register, load capacity is halved further (32 * load_size <= 128) + if (load_size < 0 || load_size > 16) + THROW_IE_EXCEPTION << "Load emitter in " << n->getName() << " has unexpected number of values to load in load_bytes_to_dword_extension."; + if (is_ymm && load_size > 8) + THROW_IE_EXCEPTION << "Load emitter in " << n->getName() << " has unexpected number of values to load to ymm in load_bytes_to_dword_extension."; + if (is_xmm && load_size > 4) + THROW_IE_EXCEPTION << "Load emitter in " << n->getName() << " has unexpected number of values to load to xmm in load_bytes_to_dword_extension."; + + // For load_size == 4/8/16, do load/extension in one go + if (load_size == 16) { + const auto zmm = Xbyak::Zmm(vmm.getIdx()); + if (is_signed) + h->uni_vpmovsxbd(zmm, ptr[reg + offset]); + else + h->uni_vpmovzxbd(zmm, ptr[reg + offset]); + } else if (load_size == 8) { + // full size of ymm or ymm_block of zmm + const auto ymm = Xbyak::Ymm(vmm.getIdx()); + if (is_signed) + h->uni_vpmovsxbd(ymm, ptr[reg + offset]); + else + h->uni_vpmovzxbd(ymm, ptr[reg + offset]); + } else if (load_size == 4) { + // full size of xmm or xmm_block of ymm/zmm + const auto xmm = Xbyak::Xmm(vmm.getIdx()); + if (is_signed) + h->uni_vpmovsxbd(xmm, ptr[reg + offset]); + else + h->uni_vpmovzxbd(xmm, ptr[reg + offset]); + } else { + // tails process + if (is_zmm) { + unsigned int mask = 1; + mask = (mask << load_size) - mask; + h->mov(Reg32(aux_gpr_idxs[0]), mask); + h->kmovw(k_mask, Reg32(aux_gpr_idxs[0])); + if (is_signed) + h->uni_vpmovsxbd(vmm | k_mask | T_z, ptr[reg + offset]); + else + h->uni_vpmovzxbd(vmm | k_mask | T_z, ptr[reg + offset]); + } else { + const auto xmm = Xbyak::Xmm(vmm.getIdx()); + load_bytes(xmm, reg, offset, load_size); + if (is_signed) + h->uni_vpmovsxbd(vmm, xmm); + else + h->uni_vpmovzxbd(vmm, xmm); + } + } + + if (is_fill) + fill_with_default(vmm, fill_value, load_size); +} + +/** +* load_words_to_dword_extension is the utility function to facilitate +* loading of load_size (0 <= load_size <= 32) byte many contiguous words(num == load_size / 2) +* in the Vmm register from the memory referenced by ptr[reg + offset] +* address and then do signed/zero extension of those to double words. +* +* Functionally, invocation of load_words_to_dword_extension is equivalent +* to the following extended pseudo code: +* +* for (int idx = 0; idx < load_size / 2; ++idx) +* vpinsrw(vmm, vmm, ptr[reg + offset + 2 * idx], idx); +* if (is_signed) vpmovsxwd(vmm, vmm); else vpmovzxwd(vmm, vmm); +* +* Valid values for the load_size variable are: +* [0..8] for XMM version of the function. i.e. 4 words -> 4 * 32 bit == 128 bit +* [0..16] for YMM version of the function. i.e. 8 words -> 8 * 32 bit == 256 bit +* [0.. 32] for ZMM version of the function. i.e. 16 words -> 16 * 32 bit == 512 bit +*/ +template +void jit_load_emitter::load_words_to_dword_extension(const Vmm &vmm, const Xbyak::Reg64 ®, + int offset, bool is_bf16, bool is_signed, int load_size, bool is_fill, std::string fill_value) const { + constexpr bool is_xmm = std::is_same::value; + constexpr bool is_ymm = std::is_same::value; + constexpr bool is_zmm = std::is_same::value; + + MAYBE_UNUSED(is_xmm); + MAYBE_UNUSED(is_ymm); + MAYBE_UNUSED(is_zmm); + + // Ensure extended double words fit inside Zmm (32/2(num) * 32 <= 512) + // For Ymm register, load capacity is halved (16/2(num) * 32 <= 128) + // For Xmm register, load capacity is halved again (8/2(num) * 32 <= 128) + if (load_size < 0 || load_size > 32) + THROW_IE_EXCEPTION << "Load emitter in " << n->getName() << " has unexpected number of values to load in load_words_to_dword_extension."; + if (is_ymm && load_size > 16) + THROW_IE_EXCEPTION << "Load emitter in " << n->getName() << " has unexpected number of values to load to ymm in load_words_to_dword_extension."; + if (is_xmm && load_size > 8) + THROW_IE_EXCEPTION << "Load emitter in " << n->getName() << " has unexpected number of values to load to xmm in load_words_to_dword_extension."; + + auto xmm = Xbyak::Xmm(vmm.getIdx()); + auto ymm = Xbyak::Ymm(vmm.getIdx()); + auto zmm = Xbyak::Zmm(vmm.getIdx()); + + // For load_size == 32/16/8, do load/extension in one go + // including xmm/ymm tail block for ymm/zmm, so explicite xmm/ymm/zmm + if (load_size == 32) { + if (is_bf16) { + h->uni_vpmovzxwd(zmm, ptr[reg + offset]); + h->uni_vpslld(zmm, zmm, 16); + } else { + if (is_signed) + h->uni_vpmovsxwd(zmm, ptr[reg + offset]); + else + h->uni_vpmovzxwd(zmm, ptr[reg + offset]); + } + } else if (load_size == 16) { + if (is_bf16) { + h->uni_vpmovzxwd(ymm, ptr[reg + offset]); + h->uni_vpslld(ymm, ymm, 16); + } else { + if (is_signed) + h->uni_vpmovsxwd(ymm, ptr[reg + offset]); + else + h->uni_vpmovzxwd(ymm, ptr[reg + offset]); + } + } else if (load_size == 8) { + if (is_bf16) { + h->uni_vpmovzxwd(xmm, ptr[reg + offset]); + h->uni_vpslld(xmm, xmm, 16); + } else { + if (is_signed) + h->uni_vpmovsxwd(xmm, ptr[reg + offset]); + else + h->uni_vpmovzxwd(xmm, ptr[reg + offset]); + } + } else { + if (is_zmm) { + unsigned int mask = 1; + mask = (mask << (load_size / 2)) - mask; + h->mov(Reg32(aux_gpr_idxs[0]), mask); + h->kmovw(k_mask, Reg32(aux_gpr_idxs[0])); + if (is_bf16) { + h->uni_vpmovzxwd(vmm | k_mask | T_z, ptr[reg + offset]); + h->uni_vpslld(vmm, vmm, 16); + } else { + if (is_signed) + h->uni_vpmovsxwd(vmm | k_mask | T_z, ptr[reg + offset]); + else + h->uni_vpmovzxwd(vmm | k_mask | T_z, ptr[reg + offset]); + } + } else { + // xmm or ymm version + load_bytes(xmm, reg, offset, load_size); + if (is_bf16) { + h->uni_vpmovzxwd(vmm, xmm); + h->uni_vpslld(vmm, vmm, 16); + } else { + if (is_signed) + h->uni_vpmovsxwd(vmm, xmm); + else + h->uni_vpmovzxwd(vmm, xmm); + } + } + } + + if (is_fill) + fill_with_default(vmm, fill_value, load_size / 2); +} + +template + void jit_load_emitter::fill_with_default(const Vmm &vmm, std::string fill_value, const int &load_num) const { + constexpr bool is_xmm = std::is_same::value; + constexpr bool is_ymm = std::is_same::value; + constexpr bool is_zmm = std::is_same::value; + + if (is_xmm || is_ymm) { + uint8 imm = 1; + imm = ~((imm << load_num) - imm); // shift load_num bit + if (is_xmm) + h->blendps(vmm, table_val(fill_value), imm); + else + h->vblendps(vmm, vmm, table_val(fill_value), imm); + } else if (is_zmm) { + uint64_t tail_mask = 1; + tail_mask = ~((tail_mask << load_num) - tail_mask); + h->mov(Reg64(aux_gpr_idxs[0]), tail_mask); + h->kmovq(k_mask, Reg64(aux_gpr_idxs[0])); + h->vblendmps(vmm | k_mask, vmm, table_val(fill_value)); + } + } + +void jit_load_emitter::register_table_entries() { + push_arg_entry_of("zero", 0x00000000, true); + push_arg_entry_of("int_one", 0x00000001, true); + push_arg_entry_of("float_one", 0x3f800000, true); + push_arg_entry_of("int32_min", 0xcf000000, true); + push_arg_entry_of("float_min", 0xff7fffff, true); + push_arg_entry_of("int32_max", 0x4effffff, true); + push_arg_entry_of("float_max", 0x7f7fffff, true); +} + +/// STORE /// +jit_store_emitter::jit_store_emitter(jit_generator *host, cpu_isa_t host_isa, const MKLDNNNode* node, + Precision exec_prc, emitter_in_out_map in_out_type) +: jit_emitter(host, host_isa, node, exec_prc, in_out_type) { + v_len_elt = get_vec_length() / exec_prc.size(); + if (!mayiuse(cpu::x64::avx512_core_bf16) && mayiuse(cpu::x64::avx512_core)) { + emu_vcvtneps2bf16.reset(new jit_emu_vcvtneps2bf16(host, host_isa, nullptr)); + } +} + +// 0 for temp reg for mask store +size_t jit_store_emitter::aux_gprs_count() const { + return 1; +} + +// zero value, zeroed and passed from caller from performance standpoint(zeroed one time and not need preserve and restore status) +size_t jit_store_emitter::aux_vecs_count() const { + return 1; +} + +size_t jit_store_emitter::get_inputs_num() { return 1; } + +void jit_store_emitter::emit_impl(const std::vector &in_idxs, const std::vector &out_idxs, + const std::vector &pool_vec_idxs, const std::vector &pool_gpr_idxs, + const emitter_context *emit_context) { + const auto* store_emitter_context = dynamic_cast(emit_context); + if (store_emitter_context == nullptr) { + THROW_IE_EXCEPTION << "Store emitter in " << n->getName() << " does not get store emmiter context."; + } + if (host_isa_ == cpu::x64::sse41) { + emit_isa(static_cast(in_idxs[0]), store_emitter_context->src_prc_, Reg64(out_idxs[0]), + store_emitter_context->offset_byte_, store_emitter_context->dst_prc_, store_emitter_context->store_num_); + } else if (host_isa_ == cpu::x64::avx2) { + emit_isa(static_cast(in_idxs[0]), store_emitter_context->src_prc_, Reg64(out_idxs[0]), + store_emitter_context->offset_byte_, store_emitter_context->dst_prc_, store_emitter_context->store_num_); + } else if (host_isa_ == cpu::x64::avx512_common) { + emit_isa(static_cast(in_idxs[0]), store_emitter_context->src_prc_, Reg64(out_idxs[0]), + store_emitter_context->offset_byte_, store_emitter_context->dst_prc_, store_emitter_context->store_num_); + } else { + THROW_IE_EXCEPTION << "Store emitter in " << n->getName() << " is performed on unsupported isa(at least x64::sse41)."; + } +} + +template + void jit_store_emitter::emit_isa(const int in_vec_idx, InferenceEngine::Precision src_prc, + const Xbyak::Reg64 ®_dst, int offset_byte, InferenceEngine::Precision dst_prc, int store_num) const { + bool matched_prc = (src_prc == dst_prc) || (src_prc == Precision::FP32) || (src_prc == Precision::I32); + if (!matched_prc) { + THROW_IE_EXCEPTION << "Store emitter in " << n->getName() << " only support input precision of FP32 or I32 or the same precision as output."; + } + if ((src_prc == Precision::FP32) || (src_prc == Precision::I32)) { + if ((isa == cpu::x64::sse41 && store_num > 4) || (isa == cpu::x64::avx2 && store_num > 8) || + (isa == cpu::x64::avx512_common && store_num > 16) || store_num < 0) { + THROW_IE_EXCEPTION << "Store emitter in " << n->getName() << " has unexpected number of values to store."; + } + } + + using Vmm = typename conditional3::type; + + if (src_prc != dst_prc) { + switch (src_prc) { + case Precision::FP32: + if ((dst_prc != Precision::FP32) && (dst_prc != Precision::BF16)) + h->uni_vcvtps2dq(Vmm(in_vec_idx), Vmm(in_vec_idx)); + break; + case Precision::I32: + if ((dst_prc == Precision::FP32) || (dst_prc == Precision::BF16)) + h->uni_vcvtdq2ps(Vmm(in_vec_idx), Vmm(in_vec_idx)); + break; + default: + break; + } + } + + if (src_prc == dst_prc) { + store_bytes(Vmm(in_vec_idx), reg_dst, offset_byte, store_num * dst_prc.size()); + } else { + switch (dst_prc) { + case Precision::FP32: + case Precision::I32: + store_bytes(Vmm(in_vec_idx), reg_dst, offset_byte, store_num * dst_prc.size()); + break; + case Precision::I8: + store_dword_to_byte_extension(Vmm(in_vec_idx), reg_dst, offset_byte, true, store_num); + break; + case Precision::U8: + store_dword_to_byte_extension(Vmm(in_vec_idx), reg_dst, offset_byte, false, store_num); + break; + case Precision::I16: + store_dword_to_word_extension(Vmm(in_vec_idx), reg_dst, offset_byte, false, true, store_num); + break; + case Precision::U16: + store_dword_to_word_extension(Vmm(in_vec_idx), reg_dst, offset_byte, false, false, store_num); + break; + case Precision::BF16: + store_dword_to_word_extension(Vmm(in_vec_idx), reg_dst, offset_byte, true, false, store_num); + break; + default: + THROW_IE_EXCEPTION << "Store emitter in " << n->getName() << " has unsupported dst precision to store."; + } + } + } + +/** +* store_bytes is the utility function to facilitate storing of +* store_size (0 <= store_size <= 64) many contiguous bytes from the Xmm/Ymm/Zmm +* register into the memory referenced by ptr[reg + offset] address. +* +* Additionally, when store_size > 16, the input Ymm register will not be +* preserved due to the usage of vextracti128 instruction. +* +* Functionally, invocation of store_bytes is equivalent +* to the following loop: +* +* for (int idx = 0; idx < store_size; ++idx) +* vpextrb(ptr[reg + offset + idx], vmm, idx); +* +*/ +template + void jit_store_emitter::store_bytes(const Vmm &vmm, const Xbyak::Reg64 ®, int offset, int store_size) const { + constexpr bool is_xmm = std::is_same::value; + constexpr bool is_ymm = std::is_same::value; + constexpr bool is_zmm = std::is_same::value; + + MAYBE_UNUSED(is_xmm); + MAYBE_UNUSED(is_ymm); + MAYBE_UNUSED(is_zmm); + + // Ensure data fits completely inside the Xmm/Ymm/Zmm register + if (store_size < 0 || store_size > 64) + THROW_IE_EXCEPTION << "Store emitter in " << n->getName() << " has unexpected number of values to store in store_bytes."; + if (is_ymm && store_size > 32) + THROW_IE_EXCEPTION << "Store emitter in " << n->getName() << " has unexpected number of values to store to ymm in store_bytes."; + if (is_xmm && store_size > 16) + THROW_IE_EXCEPTION << "Store emitter in " << n->getName() << " has unexpected number of values to store to xmm in store_bytes."; + + auto xmm = Xbyak::Xmm(vmm.getIdx()); + auto ymm = Xbyak::Ymm(vmm.getIdx()); + auto zmm = Xbyak::Zmm(vmm.getIdx()); + + const auto addr = [&](int bytes_offset) { + return ptr[reg + offset + bytes_offset * sizeof(int8_t)]; + }; + + if (is_zmm && (store_size != 64) && mayiuse(cpu::x64::avx512_core)) { + uint64_t mask = 1; + mask = (mask << store_size) - mask; + h->mov(Reg64(aux_gpr_idxs[0]), mask); + h->kmovq(k_mask, Reg64(aux_gpr_idxs[0])); + h->vmovdqu8(addr(0) | k_mask, zmm); + } else { + if (store_size == 64) { + h->uni_vmovdqu(addr(0), zmm); + return; + } else if (store_size == 32) { + h->uni_vmovdqu(addr(0), ymm); + return; + } else if (store_size == 16) { + h->uni_vmovdqu(addr(0), xmm); + return; + } else { + int start_bytes = 0; + int bytes_to_store = store_size; + + if (store_size > 32) { + h->uni_vmovdqu(addr(0), ymm); // store lower bits from zmm + start_bytes += 32; + bytes_to_store -= 32; + h->vextractf64x4(ymm, zmm, 1); // load upper bits from zmm into ymm + } + + if (bytes_to_store > 16) { + h->uni_vmovdqu(addr(start_bytes), xmm); // store lower bits from ymm + start_bytes += 16; + bytes_to_store -= 16; + h->vextractf128(xmm, ymm, 1); // load upper bits from ymm into xmm + } + + if (bytes_to_store >= 8 && bytes_to_store < 16) + h->pextrq(addr(start_bytes), xmm, 0); + else if (bytes_to_store == 16) + h->uni_vmovdqu(addr(start_bytes), xmm); + + // 64/32/16/8 with one go + // tail 7 bytes for lower or upper xmm + switch (bytes_to_store) { + case 0: break; + case 1: h->uni_vpextrb(addr(start_bytes), xmm, 0); break; + case 2: h->uni_vpextrw(addr(start_bytes), xmm, 0); break; + case 3: + h->uni_vpextrw(addr(start_bytes), xmm, 0); + h->uni_vpextrb(addr(start_bytes + 2), xmm, 2); + break; + case 4: h->pextrd(addr(start_bytes), xmm, 0); break; + case 5: + h->pextrd(addr(start_bytes), xmm, 0); + h->uni_vpextrb(addr(start_bytes + 4), xmm, 4); + break; + case 6: + h->pextrd(addr(start_bytes), xmm, 0); + h->uni_vpextrw(addr(start_bytes + 4), xmm, 2); + break; + case 7: + h->pextrd(addr(start_bytes), xmm, 0); + h->uni_vpextrw(addr(start_bytes + 4), xmm, 2); + h->uni_vpextrb(addr(start_bytes + 6), xmm, 6); + break; + case 8: break; + case 9: h->uni_vpextrb(addr(start_bytes + 8), xmm, 8); break; + case 10: h->uni_vpextrw(addr(start_bytes + 8), xmm, 4); break; + case 11: + h->uni_vpextrw(addr(start_bytes + 8), xmm, 4); + h->uni_vpextrb(addr(start_bytes + 10), xmm, 10); + break; + case 12: h->pextrd(addr(start_bytes + 8), xmm, 2); break; + case 13: + h->pextrd(addr(start_bytes + 8), xmm, 2); + h->uni_vpextrb(addr(start_bytes + 12), xmm, 12); + break; + case 14: + h->pextrd(addr(start_bytes + 8), xmm, 2); + h->uni_vpextrw(addr(start_bytes + 12), xmm, 6); + break; + case 15: + h->pextrd(addr(start_bytes + 8), xmm, 2); + h->uni_vpextrw(addr(start_bytes + 12), xmm, 6); + h->uni_vpextrb(addr(start_bytes + 14), xmm, 14); + break; + case 16: break; + default: + THROW_IE_EXCEPTION << "Store emitter in " << n->getName() << " has unexpected number of values to store in store_bytes."; + } + } + } + } + +/** +* store_dword_to_byte_extension is the utility function to +* 1. convert store_num (0 <= store_num <= 16) dwords in the Xmm/Ymm/Zmm to store_num bytes with singed or unsinged saturation. +* 2. store the packed byte into the memory referenced by ptr[reg + offset] address. +*/ +template + void jit_store_emitter::store_dword_to_byte_extension(const Vmm &vmm, const Xbyak::Reg64 ®, int offset, bool is_signed, int store_num) const { + constexpr bool is_xmm = std::is_same::value; + constexpr bool is_ymm = std::is_same::value; + constexpr bool is_zmm = std::is_same::value; + + MAYBE_UNUSED(is_xmm); + MAYBE_UNUSED(is_ymm); + MAYBE_UNUSED(is_zmm); + + // Ensure data fits completely inside the Xmm/Ymm/Zmm register + // At most 8 dwords can fit inside the Ymm register + // At most 4 dwords can fit inside the Xmm register + if (store_num < 0 || store_num > 16) + THROW_IE_EXCEPTION << "Store emitter in " << n->getName() << " has unexpected number of values to store in store_dword_to_byte_extension."; + if (is_ymm && store_num > 8) + THROW_IE_EXCEPTION << "Store emitter in " << n->getName() << " has unexpected number of values to store to ymm in store_dword_to_byte_extension."; + if (is_xmm && store_num > 4) + THROW_IE_EXCEPTION << "Store emitter in " << n->getName() << " has unexpected number of values to store to xmm in store_dword_to_byte_extension."; + + auto ymm = Xbyak::Ymm(vmm.getIdx()); + + const auto addr = [&](int bytes_offset) { + return ptr[reg + offset + bytes_offset * sizeof(int8_t)]; + }; + + if (is_zmm) { + if (store_num == 16) { // v_len_elt(16) + if (is_signed) { + h->vpmovsdb(addr(0), vmm); + } else { + h->vpmaxsd(vmm, vmm, Vmm(aux_vec_idxs[0])); + h->vpmovusdb(addr(0), vmm); + } + } else { + unsigned int mask = 1; + mask = (mask << store_num) - mask; + h->mov(Reg32(aux_gpr_idxs[0]), mask); + h->kmovw(k_mask, Reg32(aux_gpr_idxs[0])); + if (is_signed) { + h->vpmovsdb(addr(0) | k_mask, vmm); + } else { + h->vpmaxsd(vmm, vmm, Vmm(aux_vec_idxs[0])); + h->vpmovusdb(addr(0) | k_mask, vmm); + } + } + } else { + // db only available on avx512, need dw+wb to emulate + if (is_signed) + h->uni_vpackssdw(vmm, vmm, vmm); + else + h->uni_vpackusdw(vmm, vmm, vmm); + // gather 2(cross lane) 64 bits into lower vmm to store + // [y_3 y_2 y_1 y_0] |--> [y_0 y_0 y_2 y_0] + if (is_ymm) { + h->vpermq(ymm, ymm, 0x08); // 00001000 + } + + if (is_signed) + h->uni_vpacksswb(vmm, vmm, vmm); + else + h->uni_vpackuswb(vmm, vmm, vmm); + + store_bytes(vmm, reg, offset, store_num); + } + } + +/** +* store_dword_to_word_extension is the utility function to +* 1. convert store_num (0 <= store_num <= 16) dwords in the Xmm/Ymm/Zmm to store_num words with singed or unsinged saturation. +* 2. store the packed words into the memory referenced by ptr[reg + offset] address. +*/ +template + void jit_store_emitter::store_dword_to_word_extension(const Vmm &vmm, const Xbyak::Reg64 ®, int offset, + bool is_bf16, bool is_signed, int store_num) const { + constexpr bool is_xmm = std::is_same::value; + constexpr bool is_ymm = std::is_same::value; + constexpr bool is_zmm = std::is_same::value; + + MAYBE_UNUSED(is_xmm); + MAYBE_UNUSED(is_ymm); + MAYBE_UNUSED(is_zmm); + + // Ensure data fits completely inside the Xmm/Ymm/Zmm register + // At most 4 dwords can fit inside the Xmm register + // At most 8 dwords can fit inside the Ymm register + if (store_num < 0 || store_num > 16) + THROW_IE_EXCEPTION << "Store emitter in " << n->getName() << " has unexpected number of values to store in store_dword_to_word_extension."; + if (is_ymm && store_num > 8) + THROW_IE_EXCEPTION << "Store emitter in " << n->getName() << " has unexpected number of values to store to ymm in store_dword_to_word_extension."; + if (is_xmm && store_num > 4) + THROW_IE_EXCEPTION << "Store emitter in " << n->getName() << " has unexpected number of values to store to xmm in store_dword_to_word_extension."; + + auto ymm = Xbyak::Ymm(vmm.getIdx()); + auto zmm = Xbyak::Zmm(vmm.getIdx()); + + if (is_bf16) { + if (mayiuse(cpu::x64::avx512_core_bf16)) { + h->vcvtneps2bf16(ymm, zmm); + } else { + emu_vcvtneps2bf16->emit({static_cast(vmm.getIdx())}, {static_cast(ymm.getIdx())}); + } + if (store_num == 16) { + h->vmovdqu16(ptr[reg + offset], ymm); + } else { + store_bytes(ymm, reg, offset, store_num * 2); + } + } else { + if (is_zmm) { + if (store_num == 16) { // v_len_elt + if (is_signed) { + h->vpmovsdw(ptr[reg + offset], vmm); // singed int32 saturate to signed int16. + } else { + h->vmaxsd(vmm, Vmm(aux_vec_idxs[0]), vmm); // if singed bit is 1, set value as 0. + h->vpmovusdw(ptr[reg + offset], vmm); // unsinged int32 saturate to unsigned int16. + } + } else { + unsigned int mask = 1; + mask = (mask << store_num) - mask; + h->mov(Reg32(aux_gpr_idxs[0]), mask); + h->kmovw(k_mask, Reg32(aux_gpr_idxs[0])); + if (is_signed) { + h->vpmovsdw(ptr[reg + offset] | k_mask, vmm); + } else { + h->vmaxsd(vmm, Vmm(aux_vec_idxs[0]), vmm); + h->vpmovusdw(ptr[reg + offset] | k_mask, vmm); + } + } + } else { + // direct mov_dw available only on avx512, emulate with pack_dw + permute + pure store + if (is_signed) + h->uni_vpackssdw(vmm, vmm, vmm); + else + h->uni_vpackusdw(vmm, vmm, vmm); + // gather 2/4(cross lane) 64 bits into lower vmm to store + // [y_3 y_2 y_1 y_0] |--> [y_0 y_0 y_2 y_0] + // [ 128 | 128 ] |--> [ 128 | 128 ] + if (is_ymm) { + h->vpermq(ymm, ymm, 0x08); // 00001000 + } + + store_bytes(vmm, reg, offset, store_num * 2); + } + } + } + +} // namespace MKLDNNPlugin diff --git a/inference-engine/src/mkldnn_plugin/nodes/common/jit_load_store_emitters.h b/inference-engine/src/mkldnn_plugin/nodes/common/jit_load_store_emitters.h new file mode 100644 index 00000000000000..332d54903e5b99 --- /dev/null +++ b/inference-engine/src/mkldnn_plugin/nodes/common/jit_load_store_emitters.h @@ -0,0 +1,151 @@ +// Copyright (C) 2020 Intel Corporation +// SPDX-License-Identifier: Apache-2.0 +// + +#pragma once + +#include "emitter.h" +#include +#include "mkldnn_node.h" +#include "utils/bfloat16.hpp" + +using namespace mkldnn::impl; +using namespace mkldnn::impl::cpu::x64; +using namespace InferenceEngine; + +namespace MKLDNNPlugin { +struct load_emitter_context : public emitter_context { + load_emitter_context() : src_prc_(Precision::FP32), dst_prc_(Precision::FP32), load_num_(8), + offset_byte_(0), is_fill_(false), fill_value_("zero") {} + + load_emitter_context(Precision src_prc, Precision dst_prc, int load_num, bool is_fill = false, std::string fill_value = "zero", int offset_byte = 0): + src_prc_(src_prc), dst_prc_(dst_prc), load_num_(load_num), is_fill_(is_fill), fill_value_(fill_value), offset_byte_(offset_byte) {} + + int offset_byte_; + int load_num_; + Precision src_prc_; + Precision dst_prc_; + bool is_fill_; + std::string fill_value_; +}; + +struct store_emitter_context : public emitter_context { + store_emitter_context() : src_prc_(Precision::FP32), dst_prc_(Precision::FP32), + store_num_(8), offset_byte_(0) {} + + store_emitter_context(Precision src_prc, Precision dst_prc, int store_num, int offset_byte = 0) + : src_prc_(src_prc), dst_prc_(dst_prc), store_num_(store_num), offset_byte_(offset_byte) {} + + int offset_byte_; + int store_num_; + Precision src_prc_; + Precision dst_prc_; +}; + +class jit_load_emitter : public jit_emitter { +public: + jit_load_emitter(mkldnn::impl::cpu::x64::jit_generator *host, mkldnn::impl::cpu::x64::cpu_isa_t host_isa, const MKLDNNNode* node, + InferenceEngine::Precision exec_prc = InferenceEngine::Precision::FP32, emitter_in_out_map in_out_type = emitter_in_out_map::gpr_to_vec); + /** + * load_num values with src_prc precision are loaded from ptr[Reg64(in_idxs[0]) + offset_byte] address to Vmm[out_idxs[0]] as dst_prc. + * is_fill: when load_num can not fully fit in vector register, whether fill_value should be filled as default values. + * fill_value: when load_num can not fully fit in vector register, what values should be filled as default values. + * currently support "zero", "int_one", "float_one", "int32_min", "float_min", "int32_max" and "float_max". + * supported src_prc and dst_prc pairs are as below(x indicate for support): + * FP32 I32 I16 U16 I8 U8 BF16 --> src_prc + * FP32 x x x x x x x + * I32 x x x x x x x + * I16 x + * U16 x + * I8 x + * U8 x + * BF16 x + * | + * \|/ + * dst_prc + */ + void emit_impl(const std::vector &in_idxs, const std::vector &out_idxs, + const std::vector &pool_vec_idxs, const std::vector &pool_gpr_idxs, + const emitter_context *emit_context) override; + + size_t get_inputs_num() override; + +private: + template + void emit_isa(const Xbyak::Reg64 ®_src, int offset_byte, InferenceEngine::Precision src_prc, + const int out_vec_idx, InferenceEngine::Precision dst_prc, int load_num, bool is_fill = false, std::string fill_value = "zero") const; + + template + void load_bytes(const Vmm &vmm, const Xbyak::Reg64 ®, int offset, int load_size, + bool is_fill = false, std::string fill_value = "zero") const; + + template + void load_bytes_to_dword_extension(const Vmm &vmm, const Xbyak::Reg64 ®, int offset, bool is_signed, int load_size, + bool is_fill = false, std::string fill_value = "zero") const; + + template + void load_words_to_dword_extension(const Vmm &vmm, const Xbyak::Reg64 ®, int offset, bool is_bf16, bool is_signed, int load_size, + bool is_fill = false, std::string fill_value = "zero") const; + + template + void fill_with_default(const Vmm &vmm, std::string fill_value, const int &load_num) const; + + void register_table_entries() override; + + size_t aux_gprs_count() const override; + + int v_len_elt; // 4/8/16 +}; + +class jit_store_emitter : public jit_emitter { +public: + jit_store_emitter(mkldnn::impl::cpu::x64::jit_generator *host, mkldnn::impl::cpu::x64::cpu_isa_t host_isa, const MKLDNNNode* node, + InferenceEngine::Precision exec_prc = InferenceEngine::Precision::FP32, emitter_in_out_map in_out_type = emitter_in_out_map::vec_to_gpr); + + /** + * store_num values with src_prc in Vmm[in_vec_idx] is stored to ptr[reg_dst + offset_byte] address as dst_prc data. + * supported src_prc and dst_prc pairs are as below(x indicate for support): + * FP32 I32 I16 U16 I8 U8 BF16 --> src_prc + * FP32 x x + * I32 x x + * I16 x x x + * U16 x x x + * I8 x x x + * U8 x x x + * BF16 x* x* x + * \|/ + * dst_prc + * note: FP32/I32-->BF16(x*) is supported only on at least avx512-core plateform + */ + void emit_impl(const std::vector &in_idxs, const std::vector &out_idxs, + const std::vector &pool_vec_idxs, const std::vector &pool_gpr_idxs, + const emitter_context *emit_context) override; + + size_t get_inputs_num() override; + + std::shared_ptr get_emu_vcvtneps2bf16() const { + return emu_vcvtneps2bf16; + } + +private: + template + void emit_isa(const int in_vec_idx, InferenceEngine::Precision src_prc, + const Xbyak::Reg64 ®_dst, int offset_byte, InferenceEngine::Precision dst_prc, int store_num) const; + + template + void store_bytes(const Vmm &vmm, const Xbyak::Reg64 ®, int offset, int store_size) const; + + template + void store_dword_to_byte_extension(const Vmm &vmm, const Xbyak::Reg64 ®, int offset, bool is_signed, int store_size) const; + + template + void store_dword_to_word_extension(const Vmm &vmm, const Xbyak::Reg64 ®, int offset, bool is_bf16, bool is_signed, int store_size) const; + + size_t aux_gprs_count() const override; + size_t aux_vecs_count() const override; + + int v_len_elt; // 4/8/16 + std::shared_ptr emu_vcvtneps2bf16; +}; + +} // namespace MKLDNNPlugin diff --git a/inference-engine/src/mkldnn_plugin/nodes/ctc_greedy.cpp b/inference-engine/src/mkldnn_plugin/nodes/ctc_greedy.cpp deleted file mode 100644 index 87e688684a1d3f..00000000000000 --- a/inference-engine/src/mkldnn_plugin/nodes/ctc_greedy.cpp +++ /dev/null @@ -1,92 +0,0 @@ -// Copyright (C) 2018-2020 Intel Corporation -// SPDX-License-Identifier: Apache-2.0 -// - -#include "base.hpp" - -#include -#include -#include - -namespace InferenceEngine { -namespace Extensions { -namespace Cpu { - -class CTCGreedyDecoderImpl: public ExtLayerBase { -public: - explicit CTCGreedyDecoderImpl(const CNNLayer* layer) { - try { - if (layer->insData.empty() || layer->outData.size() != 1) - THROW_IE_EXCEPTION << "Incorrect number of input/output edges!"; - - std::vector inps; - inps.resize(layer->insData.size(), DataConfigurator(ConfLayout::PLN, Precision::FP32)); - addConfig(layer, inps, {DataConfigurator(ConfLayout::PLN, Precision::FP32)}); - } catch (InferenceEngine::details::InferenceEngineException &ex) { - errorMsg = ex.what(); - } - } - - StatusCode execute(std::vector& inputs, std::vector& outputs, - ResponseDesc *resp) noexcept override { - if ((inputs.size() != 1 && inputs.size() != 2) || outputs.empty()) { - if (resp) { - std::string errorMsg = "Incorrect number of input or output edges!"; - errorMsg.copy(resp->msg, sizeof(resp->msg) - 1); - } - return GENERAL_ERROR; - } - const float* probabilities = inputs[0]->buffer(); - const float* sequence_indicators = inputs[1]->buffer(); - float* output_sequences = outputs[0]->buffer(); - - size_t T_ = inputs[0]->getTensorDesc().getDims()[0]; - size_t N_ = inputs[0]->getTensorDesc().getDims()[1]; - size_t C_ = inputs[0]->getTensorDesc().getDims()[2]; - - // Fill output_sequences with -1 - for (size_t ii = 0; ii < T_*N_; ii++) { - output_sequences[ii] = -1; - } - - for (size_t n = 0; n < N_; ++n) { - int prev_class_idx = -1; - size_t output_index = n*T_; - - for (int t = 0; /* check at end */; ++t) { - // get maximum probability and its index - int max_class_idx = 0; - - const float* probs = probabilities + t*C_*N_ + n*C_; - float max_prob = probs[0]; - ++probs; - - for (size_t c = 1; c < C_; ++c, ++probs) { - if (*probs > max_prob) { - max_class_idx = static_cast(c); - max_prob = *probs; - } - } - - if (max_class_idx < static_cast(C_) - 1 && - max_class_idx != prev_class_idx) { - output_sequences[output_index] = static_cast(max_class_idx); - output_index++; - } - - prev_class_idx = max_class_idx; - - if (t + 1 == static_cast(T_) || sequence_indicators[(t + 1)*N_ + n] == 0) { - break; - } - } - } - return OK; - } -}; - -REG_FACTORY_FOR(CTCGreedyDecoderImpl, CTCGreedyDecoder); - -} // namespace Cpu -} // namespace Extensions -} // namespace InferenceEngine diff --git a/inference-engine/src/mkldnn_plugin/nodes/ctc_greedy_decoder.cpp b/inference-engine/src/mkldnn_plugin/nodes/ctc_greedy_decoder.cpp new file mode 100644 index 00000000000000..ae8d5466e53a2c --- /dev/null +++ b/inference-engine/src/mkldnn_plugin/nodes/ctc_greedy_decoder.cpp @@ -0,0 +1,160 @@ +// Copyright (C) 2018-2021 Intel Corporation +// SPDX-License-Identifier: Apache-2.0 +// + +#include "base.hpp" +#include "ie_parallel.hpp" + +#include +#include + +namespace InferenceEngine { +namespace Extensions { +namespace Cpu { + +class CTCGreedyDecoderImpl: public ExtLayerBase { +public: + explicit CTCGreedyDecoderImpl(const CNNLayer* layer) : mergeRepeated_(true) { + std::string errPrefix = "CTCGreedyDecoder layer with name '" + layer->name + "' "; + if (layer->insData.size() != 2) + THROW_IE_EXCEPTION << errPrefix << "has invalid number of input edges: " << layer->insData.size(); + if (layer->outData.size() != 1) + THROW_IE_EXCEPTION << errPrefix << "has invalid number of outputs edges: " << layer->outData.size(); + + auto inData = layer->insData[DATA_INDEX].lock(); + auto sequenceLenData = layer->insData[SEQUENCE_LENGTH_INDEX].lock(); + if (!inData || !sequenceLenData) + THROW_IE_EXCEPTION << errPrefix << "has nullable inputs."; + if (inData->getTensorDesc().getDims()[0] != sequenceLenData->getTensorDesc().getDims()[0] && + inData->getTensorDesc().getDims()[1] != sequenceLenData->getTensorDesc().getDims()[1]) + THROW_IE_EXCEPTION << errPrefix << "has invalid input shapes."; + if (inData->getTensorDesc().getPrecision() != Precision::FP32 && + inData->getTensorDesc().getPrecision() != Precision::BF16) + THROW_IE_EXCEPTION << errPrefix << "has unsupported 'data' input precision: " << inData->getTensorDesc().getPrecision(); + if (sequenceLenData->getTensorDesc().getPrecision() != Precision::FP32 && + inData->getTensorDesc().getPrecision() != Precision::BF16) + THROW_IE_EXCEPTION << errPrefix << "has unsupported 'sequence_length' input precision: " << sequenceLenData->getTensorDesc().getPrecision(); + + std::vector inputConfigs{{ConfLayout::PLN, Precision::FP32}, {ConfLayout::PLN, Precision::FP32}}; + std::vector outputConfigs{{ConfLayout::PLN, Precision::FP32}}; + addConfig(layer, inputConfigs, outputConfigs); + + if (layer->CheckParamPresence("ctc_merge_repeated")) { + mergeRepeated_ = layer->GetParamAsBool("ctc_merge_repeated"); + } else if (layer->CheckParamPresence("merge_repeated")) { + mergeRepeated_ = layer->GetParamAsBool("merge_repeated", true); + } + } + + StatusCode execute(std::vector& inputs, std::vector& outputs, + ResponseDesc *resp) noexcept override { + const float* probabilities = inputs[DATA_INDEX]->cbuffer().as() + + inputs[DATA_INDEX]->getTensorDesc().getBlockingDesc().getOffsetPadding(); + const float* sequenceMask = inputs[SEQUENCE_LENGTH_INDEX]->cbuffer().as() + + inputs[SEQUENCE_LENGTH_INDEX]->getTensorDesc().getBlockingDesc().getOffsetPadding(); + float* outputSequences = outputs[0]->buffer().as() + + outputs[0]->getTensorDesc().getBlockingDesc().getOffsetPadding(); + + const size_t T = inputs[DATA_INDEX]->getTensorDesc().getDims()[0]; + const size_t B = inputs[DATA_INDEX]->getTensorDesc().getDims()[1]; + const int C = inputs[DATA_INDEX]->getTensorDesc().getDims()[2]; + const size_t BC = B * C; + const size_t CB1 = C * (B - 1); + + const int blankIndex = C - 1; + + std::vector sequenceLengths(B, 0); + parallel_for(B, [&](size_t b) { + size_t t = 0; + for (; t < T; t++) { + if (sequenceMask[B * t + b] == 0.f) + break; + } + sequenceLengths[b] = t; + }); + + size_t workAmount = 0; + for (size_t b = 0; b < B; b++) { + workAmount += sequenceLengths[b]; + } + + // Parallelization could not be made directly by T due to output index depends on merged classes and + // blank index, thus could not be shared between threads. Better to divide operation on two steps. + // At the first stage find the maximum index. At second stage merge if needed. + // Such approach makes parallelization more efficient. + auto threadBody = [&](const int ithr, const int nthr) { + size_t start(0lu), end(0lu); + splitter(workAmount, nthr, ithr, start, end); + if (start >= end) + return; + size_t tStart = 0lu, bStart = 0lu; + for (; bStart < B; bStart++) { + tStart += sequenceLengths[bStart]; + if (tStart >= start) { + tStart = start - (tStart - sequenceLengths[bStart]); + break; + } + } + + size_t workCounter = start; + + for (size_t b = bStart; b < B; ++b) { + size_t outputIndex = b * T + tStart; + const float* probs = probabilities + b * C + BC * tStart; + size_t sequenceLength = sequenceLengths[b]; + + for (size_t t = tStart; t < sequenceLength; ++t) { + int maxClassIdx = 0; + + float maxProb = probs[0]; + ++probs; + + for (int c = 1; c < C; ++c, ++probs) { + if (*probs > maxProb) { + maxClassIdx = c; + maxProb = *probs; + } + } + probs += CB1; + outputSequences[outputIndex++] = static_cast(maxClassIdx); + + if (++workCounter >= end) { + return; + } + } + tStart = 0lu; + } + }; // thread body + + parallel_nt(0, threadBody); + + parallel_for(B, [&](size_t b) { + int prevClassIdx = -1; + size_t outputIndex = b * T; + const size_t sequenceLength = sequenceLengths[b]; + float* shiftedOut = outputSequences + b * T; + for (size_t t = 0; t < sequenceLength; ++t) { + if (*shiftedOut < blankIndex && + !(mergeRepeated_ && *shiftedOut == prevClassIdx)) { + outputSequences[outputIndex++] = *shiftedOut; + } + prevClassIdx = *shiftedOut; + shiftedOut++; + } + std::fill(outputSequences + outputIndex, outputSequences + (b + 1) * T, -1.f); + }); + + return OK; + } + +private: + const size_t DATA_INDEX = 0lu; + const size_t SEQUENCE_LENGTH_INDEX = 1lu; + bool mergeRepeated_; +}; + +REG_FACTORY_FOR(CTCGreedyDecoderImpl, CTCGreedyDecoder); + +} // namespace Cpu +} // namespace Extensions +} // namespace InferenceEngine diff --git a/inference-engine/src/mkldnn_plugin/nodes/ctc_greedy_decoder_seq_len.cpp b/inference-engine/src/mkldnn_plugin/nodes/ctc_greedy_decoder_seq_len.cpp new file mode 100644 index 00000000000000..95d35f4d9b73ca --- /dev/null +++ b/inference-engine/src/mkldnn_plugin/nodes/ctc_greedy_decoder_seq_len.cpp @@ -0,0 +1,162 @@ +// Copyright (C) 2021 Intel Corporation +// SPDX-License-Identifier: Apache-2.0 +// + +#include "base.hpp" +#include "ie_parallel.hpp" + +#include +#include + +namespace InferenceEngine { +namespace Extensions { +namespace Cpu { + +class CTCGreedyDecoderSeqLenImpl: public ExtLayerBase { +public: + explicit CTCGreedyDecoderSeqLenImpl(const CNNLayer* layer) : mergeRepeated_(true) { + std::string errPrefix = "CTCGreedyDecoderSeqLen layer with name '" + layer->name + "' "; + if (layer->insData.size() < 2 || layer->insData.size() > 3) + THROW_IE_EXCEPTION << errPrefix << "has invalid number of input edges: " << layer->insData.size(); + if (layer->outData.size() != 2) + THROW_IE_EXCEPTION << errPrefix << "has invalid number of outputs edges: " << layer->outData.size(); + + auto inData = layer->insData[DATA_INDEX].lock(); + auto sequenceLenData = layer->insData[SEQUENCE_LENGTH_INDEX].lock(); + if (!inData || !sequenceLenData) + THROW_IE_EXCEPTION << errPrefix << "has nullable inputs."; + if (inData->getTensorDesc().getDims()[0] != sequenceLenData->getTensorDesc().getDims()[0]) + THROW_IE_EXCEPTION << errPrefix << "has invalid input shapes."; + if (inData->getTensorDesc().getPrecision() != Precision::FP32 && + inData->getTensorDesc().getPrecision() != Precision::BF16) + THROW_IE_EXCEPTION << errPrefix << "has unsupported 'data' input precision: " << inData->getTensorDesc().getPrecision(); + if (sequenceLenData->getTensorDesc().getPrecision() != Precision::I32 && + sequenceLenData->getTensorDesc().getPrecision() != Precision::I64) + THROW_IE_EXCEPTION << errPrefix << "has unsupported 'sequence_length' input precision: " << sequenceLenData->getTensorDesc().getPrecision(); + + std::vector inputConfigs{{ConfLayout::PLN, Precision::FP32}, {ConfLayout::PLN, Precision::I32}}; + + if (layer->insData.size() > BLANK_INDEX) { + auto blankIndexData = layer->insData[BLANK_INDEX].lock(); + if (!blankIndexData) + THROW_IE_EXCEPTION << errPrefix << "has nullable inputs."; + if (blankIndexData->getTensorDesc().getPrecision() != Precision::I32 && + blankIndexData->getTensorDesc().getPrecision() != Precision::I64) + THROW_IE_EXCEPTION << errPrefix << "has unsupported 'blank_index' input precision: " << blankIndexData->getTensorDesc().getPrecision(); + inputConfigs.push_back({ConfLayout::PLN, Precision::I32}); + } + std::vector outputConfigs{{ConfLayout::PLN, Precision::I32}, {ConfLayout::PLN, Precision::I32}}; + addConfig(layer, inputConfigs, outputConfigs); + + mergeRepeated_ = layer->GetParamAsBool("merge_repeated", true); + } + + StatusCode execute(std::vector& inputs, std::vector& outputs, + ResponseDesc *resp) noexcept override { + const float* probabilities = inputs[DATA_INDEX]->cbuffer().as() + + inputs[DATA_INDEX]->getTensorDesc().getBlockingDesc().getOffsetPadding(); + const int* sequenceLengths = inputs[SEQUENCE_LENGTH_INDEX]->cbuffer().as() + + inputs[SEQUENCE_LENGTH_INDEX]->getTensorDesc().getBlockingDesc().getOffsetPadding(); + int* decodedClasses = outputs[DECODED_CLASSES_INDEX]->buffer().as() + + outputs[DECODED_CLASSES_INDEX]->getTensorDesc().getBlockingDesc().getOffsetPadding(); + int* decodedClassesLength = outputs[DECODED_CLASSES_LENGTH_INDEX]->buffer().as() + + outputs[DECODED_CLASSES_LENGTH_INDEX]->getTensorDesc().getBlockingDesc().getOffsetPadding(); + + const auto& inDims = inputs[DATA_INDEX]->getTensorDesc().getDims(); + const size_t B = inDims[0]; + const size_t T = inDims[1]; + const int C = inDims[2]; + const size_t TC = T * C; + + int blankIndex = C - 1; + if (inputs.size() > BLANK_INDEX) + blankIndex = (inputs[BLANK_INDEX]->cbuffer().as() + + inputs[BLANK_INDEX]->getTensorDesc().getBlockingDesc().getOffsetPadding())[0]; + + size_t workAmount = 0; + for (size_t b = 0; b < B; b++) { + workAmount += sequenceLengths[b]; + } + // Parallelization could not be made directly by T due to output index depends on merged classes and + // blank index, thus could not be shared between threads. Better to divide operation on two steps. + // At the first stage find the maximum index. At second stage merge if needed. + // Such approach makes parallelization more efficient. + auto threadBody = [&](const int ithr, const int nthr) { + size_t start(0lu), end(0lu); + splitter(workAmount, nthr, ithr, start, end); + if (start >= end) + return; + size_t tStart = 0lu, bStart = 0lu; + for (; bStart < B; bStart++) { + tStart += sequenceLengths[bStart]; + if (tStart >= start) { + tStart = start - (tStart - sequenceLengths[bStart]); + break; + } + } + + size_t workCounter = start; + + for (size_t b = bStart; b < B; ++b) { + size_t outputIndex = b * T + tStart; + const float* probs = probabilities + b * TC + C * tStart; + const size_t actualSeqLen = sequenceLengths[b]; + + for (size_t t = tStart; t < actualSeqLen; ++t) { + int maxClassIdx = 0; + float maxProb = probs[0]; + probs++; + + for (int c = 1; c < C; c++, probs++) { + if (*probs > maxProb) { + maxClassIdx = c; + maxProb = *probs; + } + } + decodedClasses[outputIndex++] = maxClassIdx; + + if (++workCounter >= end) { + return; + } + } + tStart = 0lu; + } + }; // thread body + + parallel_nt(0, threadBody); + + parallel_for(B, [&](size_t b) { + int prevClassIdx = -1; + size_t outputIndex = b * T; + const size_t actualSeqLen = sequenceLengths[b]; + int* shiftedOut = decodedClasses + b * T; + + for (size_t t = 0; t < actualSeqLen; ++t) { + if (*shiftedOut != blankIndex && + !(mergeRepeated_ && *shiftedOut == prevClassIdx)) { + decodedClasses[outputIndex++] = *shiftedOut; + } + prevClassIdx = *shiftedOut; + shiftedOut++; + } + std::fill(decodedClasses + outputIndex, decodedClasses + (b + 1) * T, -1); + decodedClassesLength[b] = outputIndex - b * T; + }); + + return OK; + } + +private: + const size_t DATA_INDEX = 0lu; + const size_t SEQUENCE_LENGTH_INDEX = 1lu; + const size_t BLANK_INDEX = 2lu; + const size_t DECODED_CLASSES_INDEX = 0lu; + const size_t DECODED_CLASSES_LENGTH_INDEX = 1lu; + bool mergeRepeated_; +}; + +REG_FACTORY_FOR(CTCGreedyDecoderSeqLenImpl, CTCGreedyDecoderSeqLen); + +} // namespace Cpu +} // namespace Extensions +} // namespace InferenceEngine diff --git a/inference-engine/src/mkldnn_plugin/nodes/cum_sum.cpp b/inference-engine/src/mkldnn_plugin/nodes/cum_sum.cpp index 03a4f2dbc6f171..fccbefe1e34c46 100644 --- a/inference-engine/src/mkldnn_plugin/nodes/cum_sum.cpp +++ b/inference-engine/src/mkldnn_plugin/nodes/cum_sum.cpp @@ -16,11 +16,11 @@ namespace Cpu { class CumSumImpl: public ExtLayerBase { enum { CUM_SUM_DATA, AXIS, numOfInputs }; - enum { N, C, D, H, W, numOfDims }; bool exclusive; bool reverse; + size_t numOfDims; size_t axis = 0; - std::vector shape5d; + std::vector shape; public: explicit CumSumImpl(const CNNLayer* layer) { @@ -31,9 +31,10 @@ class CumSumImpl: public ExtLayerBase { const auto &dataTensor = layer->insData[CUM_SUM_DATA].lock()->getTensorDesc(); const auto &dataShape = dataTensor.getDims(); - if (dataShape.size() < 1 || dataShape.size() > 5) { + if (dataShape.size() < 1) { THROW_IE_EXCEPTION << "CumSum layer with name '" << layerName << "' doesn't support 'data' input tensor with rank: " << dataShape.size(); } + numOfDims = dataShape.size(); exclusive = layer->GetParamAsBool("exclusive", false); reverse = layer->GetParamAsBool("reverse", false); @@ -57,7 +58,7 @@ class CumSumImpl: public ExtLayerBase { if (dataShape != layer->outData[0]->getTensorDesc().getDims()) THROW_IE_EXCEPTION << "CumSum layer with name '" << layerName << "' has different 'data' input and output dimensions"; - shape5d = get5dShape(dataShape); + shape = dataShape; LayerConfig config; for (size_t i = 0; i < layer->insData.size(); i++) { @@ -65,7 +66,7 @@ class CumSumImpl: public ExtLayerBase { inConfig.inPlace = -1; inConfig.constant = false; - Precision inPrecision = layer->insData[i].lock()->getTensorDesc().getPrecision(); + Precision inPrecision = i == 1 ? Precision(Precision::I32) : layer->insData[i].lock()->getTensorDesc().getPrecision(); if (inPrecision == Precision::BF16) inPrecision = Precision::FP32; const SizeVector& inDims = layer->insData[i].lock()->getTensorDesc().getDims(); @@ -120,75 +121,121 @@ class CumSumImpl: public ExtLayerBase { void execImpl(const Blob::CPtr& _input, const Blob::Ptr& _output) { const auto *input = _input->cbuffer().as() + _input->getTensorDesc().getBlockingDesc().getOffsetPadding(); auto *output = _output->buffer().as() + _output->getTensorDesc().getBlockingDesc().getOffsetPadding(); - const size_t offset = _input->getTensorDesc().getBlockingDesc().getStrides()[axis]; + const std::vector strides = _input->getTensorDesc().getBlockingDesc().getStrides(); if (reverse) { if (exclusive) { - cumSum(input, output, offset); + cumSum(input, output, strides); } else { - cumSum(input, output, offset); + cumSum(input, output, strides); } } else { if (exclusive) { - cumSum(input, output, offset); + cumSum(input, output, strides); } else { - cumSum(input, output, offset); + cumSum(input, output, strides); } } } template - void cumSum(const dataType *input, dataType *output, const size_t &offset) { - std::vector iterationRange(numOfDims - 1); + void cumSum(const dataType *input, dataType *output, const std::vector &strides) { + SizeVector iterationRange(numOfDims - 1); size_t j = 0; - for (size_t i = 0; i < shape5d.size(); i++) { + for (size_t i = 0; i < shape.size(); i++) { if (i == axis) continue; - iterationRange[j++] = shape5d[i]; + iterationRange[j++] = shape[i]; } - parallel_for4d(iterationRange[0], iterationRange[1], iterationRange[2], iterationRange[3], [&](size_t ir0, size_t ir1, size_t ir2, size_t ir3) { - std::vector forStartOffset; - forStartOffset.push_back(ir0); forStartOffset.push_back(ir1); forStartOffset.push_back(ir2); forStartOffset.push_back(ir3); - forStartOffset.insert(forStartOffset.begin() + axis, 0); - size_t startOffset = getStartOffset(forStartOffset); - - const dataType *inputStart = input + startOffset; - dataType *outputStart = output + startOffset; - - if (reverse) { - if (exclusive) { - outputStart[offset*(shape5d[axis] - 1)] = 0; - for (int64_t i = shape5d[axis] - 2; i >= 0; i--) { - outputStart[i*offset] = inputStart[(i+1)*offset] + outputStart[(i+1)*offset]; - } - } else { - outputStart[offset*(shape5d[axis] - 1)] = inputStart[offset*(shape5d[axis] - 1)]; - for (int64_t i = shape5d[axis] - 2; i >= 0; i--) { - outputStart[i*offset] = inputStart[i*offset] + outputStart[(i+1)*offset]; + size_t work_amount_dst = std::accumulate(iterationRange.begin(), iterationRange.end(), 1, std::multiplies()); + parallel_nt(0, [&](const int ithr, const int nthr) { + size_t start = 0, end = 0; + SizeVector counters(numOfDims - 1, 0); + splitter(work_amount_dst, nthr, ithr, start, end); + + parallelItInit(start, counters, iterationRange); + + for (size_t iwork = start; iwork < end; ++iwork) { + std::vector forStartOffset(numOfDims); + forStartOffset[axis] = 0; + for (int64_t offsetIdx = 0, countersIdx = 0; offsetIdx < numOfDims; ++offsetIdx) { + if (offsetIdx == axis) { + continue; } + forStartOffset[offsetIdx] = counters[countersIdx++]; } - } else { - if (exclusive) { - outputStart[0] = 0; - for (size_t i = 1; i < shape5d[axis]; i++) { - outputStart[i*offset] = inputStart[(i-1)*offset] + outputStart[(i-1)*offset]; + + size_t startOffset = getStartOffset(forStartOffset, strides); + + const dataType *inputStart = input + startOffset; + dataType *outputStart = output + startOffset; + + size_t offset = strides[axis]; + if (reverse) { + if (exclusive) { + outputStart[offset*(shape[axis] - 1)] = 0; + for (int64_t i = shape[axis] - 2; i >= 0; i--) { + outputStart[i*offset] = inputStart[(i+1)*offset] + outputStart[(i+1)*offset]; + } + } else { + outputStart[offset*(shape[axis] - 1)] = inputStart[offset * (shape[axis] - 1)]; + for (int64_t i = shape[axis] - 2; i >= 0; i--) { + outputStart[i*offset] = inputStart[i*offset] + outputStart[(i+1)*offset]; + } } } else { - outputStart[0] = inputStart[0]; - for (size_t i = 1; i < shape5d[axis]; i++) { - outputStart[i*offset] = inputStart[i*offset] + outputStart[(i-1)*offset]; + if (exclusive) { + outputStart[0] = 0; + for (size_t i = 1; i < shape[axis]; i++) { + outputStart[i*offset] = inputStart[(i-1)*offset] + outputStart[(i-1)*offset]; + } + } else { + outputStart[0] = inputStart[0]; + for (size_t i = 1; i < shape[axis]; i++) { + outputStart[i*offset] = inputStart[i*offset] + outputStart[(i-1)*offset]; + } } } + + parallelItStep(counters, iterationRange); } }); } - size_t getStartOffset(std::vector &forStartOffset) { - return forStartOffset[N]*shape5d[C]*shape5d[D]*shape5d[H]*shape5d[W] + forStartOffset[C]*shape5d[D]*shape5d[H]*shape5d[W] + - forStartOffset[D]*shape5d[H]*shape5d[W] + forStartOffset[H]*shape5d[W] + forStartOffset[W]; + void parallelItInit(size_t start, std::vector& counters, const std::vector& iterationRange) { + auto itCounter = counters.rbegin(); + auto itWork = iterationRange.rbegin(); + while (itCounter != counters.rend()) { + *itCounter = start % *itWork; + start /= *itWork; + ++itCounter; + ++itWork; + } } - size_t getAxis(const Blob::CPtr& _axis, const Blob::CPtr& _data) { + inline void parallelItStep(std::vector& counters, const std::vector& iterationRange) { + auto itCounter = counters.rbegin(); + auto itWork = iterationRange.rbegin(); + + while (itCounter != counters.rend()) { + *itCounter = (*itCounter + 1) % *itWork; + if (*itCounter != 0) { + break; + } + ++itCounter; + ++itWork; + } + } + + inline size_t getStartOffset(const std::vector &forStartOffset, const std::vector& strides) const { + size_t startOffset = 0; + for (size_t idx = 0; idx < forStartOffset.size(); ++idx) { + startOffset += forStartOffset[idx] * strides[idx]; + } + return startOffset; + } + + size_t getAxis(const Blob::CPtr& _axis, const Blob::CPtr& _data) const { const auto& axisPrecision = _axis->getTensorDesc().getPrecision(); const int64_t dataShapeSize = static_cast(_data->getTensorDesc().getDims().size()); int64_t axisValueFromBlob; @@ -212,13 +259,6 @@ class CumSumImpl: public ExtLayerBase { return axisValueFromBlob >= 0 ? axisValueFromBlob : (axisValueFromBlob + dataShapeSize); } - std::vector get5dShape(const SizeVector& dims) { - std::vector shape5d(numOfDims, 1); - for (size_t i = 0; i < dims.size(); i++) - shape5d[i] = dims[i]; - return shape5d; - } - private: std::string layerName; }; @@ -227,4 +267,4 @@ REG_FACTORY_FOR(CumSumImpl, CumSum); } // namespace Cpu } // namespace Extensions -} // namespace InferenceEngine +} // namespace InferenceEngine \ No newline at end of file diff --git a/inference-engine/src/mkldnn_plugin/nodes/jit_eltwise_emitters.cpp b/inference-engine/src/mkldnn_plugin/nodes/jit_eltwise_emitters.cpp index 2aedeedaec4312..447c812f5ac191 100644 --- a/inference-engine/src/mkldnn_plugin/nodes/jit_eltwise_emitters.cpp +++ b/inference-engine/src/mkldnn_plugin/nodes/jit_eltwise_emitters.cpp @@ -25,7 +25,8 @@ jit_add_emitter::jit_add_emitter(jit_generator *host, cpu_isa_t host_isa, const size_t jit_add_emitter::get_inputs_num() { return 2; } void jit_add_emitter::emit_impl(const std::vector &in_vec_idxs, const std::vector &out_vec_idxs, - const std::vector &pool_vec_idxs, const std::vector &pool_gpr_idxs) { + const std::vector &pool_vec_idxs, const std::vector &pool_gpr_idxs, + const emitter_context *emit_context) { if (host_isa_ == cpu::x64::sse41) { emit_isa(in_vec_idxs, out_vec_idxs); } else if (host_isa_ == cpu::x64::avx2) { @@ -59,7 +60,8 @@ jit_mul_add_emitter::jit_mul_add_emitter(jit_generator *host, cpu_isa_t host_isa size_t jit_mul_add_emitter::get_inputs_num() { return 3; } void jit_mul_add_emitter::emit_impl(const std::vector &in_vec_idxs, const std::vector &out_vec_idxs, - const std::vector &pool_vec_idxs, const std::vector &pool_gpr_idxs) { + const std::vector &pool_vec_idxs, const std::vector &pool_gpr_idxs, + const emitter_context *emit_context) { if (host_isa_ == cpu::x64::sse41) { emit_isa(in_vec_idxs, out_vec_idxs); } else if (host_isa_ == cpu::x64::avx2) { @@ -118,7 +120,8 @@ jit_subtract_emitter::jit_subtract_emitter(jit_generator *host, cpu_isa_t host_i size_t jit_subtract_emitter::get_inputs_num() { return 2; } void jit_subtract_emitter::emit_impl(const std::vector &in_vec_idxs, const std::vector &out_vec_idxs, - const std::vector &pool_vec_idxs, const std::vector &pool_gpr_idxs) { + const std::vector &pool_vec_idxs, const std::vector &pool_gpr_idxs, + const emitter_context *emit_context) { if (host_isa_ == cpu::x64::sse41) { emit_isa(in_vec_idxs, out_vec_idxs); } else if (host_isa_ == cpu::x64::avx2) { @@ -153,7 +156,8 @@ jit_multiply_emitter::jit_multiply_emitter(jit_generator *host, cpu_isa_t host_i size_t jit_multiply_emitter::get_inputs_num() { return 2; } void jit_multiply_emitter::emit_impl(const std::vector &in_vec_idxs, const std::vector &out_vec_idxs, - const std::vector &pool_vec_idxs, const std::vector &pool_gpr_idxs) { + const std::vector &pool_vec_idxs, const std::vector &pool_gpr_idxs, + const emitter_context *emit_context) { if (host_isa_ == cpu::x64::sse41) { emit_isa(in_vec_idxs, out_vec_idxs); } else if (host_isa_ == cpu::x64::avx2) { @@ -188,7 +192,8 @@ jit_divide_emitter::jit_divide_emitter(jit_generator *host, cpu_isa_t host_isa, size_t jit_divide_emitter::get_inputs_num() { return 2; } void jit_divide_emitter::emit_impl(const std::vector &in_vec_idxs, const std::vector &out_vec_idxs, - const std::vector &pool_vec_idxs, const std::vector &pool_gpr_idxs) { + const std::vector &pool_vec_idxs, const std::vector &pool_gpr_idxs, + const emitter_context *emit_context) { if (host_isa_ == cpu::x64::sse41) { emit_isa(in_vec_idxs, out_vec_idxs); } else if (host_isa_ == cpu::x64::avx2) { @@ -252,7 +257,8 @@ jit_floor_mod_emitter::jit_floor_mod_emitter(jit_generator *host, cpu_isa_t host size_t jit_floor_mod_emitter::get_inputs_num() { return 2; } void jit_floor_mod_emitter::emit_impl(const std::vector &in_vec_idxs, const std::vector &out_vec_idxs, - const std::vector &pool_vec_idxs, const std::vector &pool_gpr_idxs) { + const std::vector &pool_vec_idxs, const std::vector &pool_gpr_idxs, + const emitter_context *emit_context) { if (host_isa_ == cpu::x64::sse41) { emit_isa(in_vec_idxs, out_vec_idxs); } else if (host_isa_ == cpu::x64::avx2) { @@ -301,7 +307,8 @@ jit_mod_emitter::jit_mod_emitter(jit_generator *host, cpu_isa_t host_isa, const size_t jit_mod_emitter::get_inputs_num() { return 2; } void jit_mod_emitter::emit_impl(const std::vector &in_vec_idxs, const std::vector &out_vec_idxs, - const std::vector &pool_vec_idxs, const std::vector &pool_gpr_idxs) { + const std::vector &pool_vec_idxs, const std::vector &pool_gpr_idxs, + const emitter_context *emit_context) { if (host_isa_ == cpu::x64::sse41) { emit_isa(in_vec_idxs, out_vec_idxs); } else if (host_isa_ == cpu::x64::avx2) { @@ -350,7 +357,8 @@ jit_maximum_emitter::jit_maximum_emitter(jit_generator *host, cpu_isa_t host_isa size_t jit_maximum_emitter::get_inputs_num() { return 2; } void jit_maximum_emitter::emit_impl(const std::vector &in_vec_idxs, const std::vector &out_vec_idxs, - const std::vector &pool_vec_idxs, const std::vector &pool_gpr_idxs) { + const std::vector &pool_vec_idxs, const std::vector &pool_gpr_idxs, + const emitter_context *emit_context) { if (host_isa_ == cpu::x64::sse41) { emit_isa(in_vec_idxs, out_vec_idxs); } else if (host_isa_ == cpu::x64::avx2) { @@ -397,7 +405,8 @@ jit_minimum_emitter::jit_minimum_emitter(jit_generator *host, cpu_isa_t host_isa size_t jit_minimum_emitter::get_inputs_num() { return 2; } void jit_minimum_emitter::emit_impl(const std::vector &in_vec_idxs, const std::vector &out_vec_idxs, - const std::vector &pool_vec_idxs, const std::vector &pool_gpr_idxs) { + const std::vector &pool_vec_idxs, const std::vector &pool_gpr_idxs, + const emitter_context *emit_context) { if (host_isa_ == cpu::x64::sse41) { emit_isa(in_vec_idxs, out_vec_idxs); } else if (host_isa_ == cpu::x64::avx2) { @@ -444,7 +453,8 @@ jit_squared_difference_emitter::jit_squared_difference_emitter(jit_generator *ho size_t jit_squared_difference_emitter::get_inputs_num() { return 2; } void jit_squared_difference_emitter::emit_impl(const std::vector &in_vec_idxs, const std::vector &out_vec_idxs, - const std::vector &pool_vec_idxs, const std::vector &pool_gpr_idxs) { + const std::vector &pool_vec_idxs, const std::vector &pool_gpr_idxs, + const emitter_context *emit_context) { if (host_isa_ == cpu::x64::sse41) { emit_isa(in_vec_idxs, out_vec_idxs); } else if (host_isa_ == cpu::x64::avx2) { @@ -482,7 +492,8 @@ jit_power_dynamic_emitter::jit_power_dynamic_emitter(jit_generator *host, cpu_is size_t jit_power_dynamic_emitter::get_inputs_num() { return 2; } void jit_power_dynamic_emitter::emit_impl(const std::vector &in_vec_idxs, const std::vector &out_vec_idxs, - const std::vector &pool_vec_idxs, const std::vector &pool_gpr_idxs) { + const std::vector &pool_vec_idxs, const std::vector &pool_gpr_idxs, + const emitter_context *emit_context) { if (host_isa_ == cpu::x64::sse41) { emit_isa(in_vec_idxs, out_vec_idxs); } else if (host_isa_ == cpu::x64::avx2) { @@ -590,7 +601,8 @@ jit_equal_emitter::jit_equal_emitter(jit_generator *host, cpu_isa_t host_isa, co size_t jit_equal_emitter::get_inputs_num() { return 2; } void jit_equal_emitter::emit_impl(const std::vector &in_vec_idxs, const std::vector &out_vec_idxs, - const std::vector &pool_vec_idxs, const std::vector &pool_gpr_idxs) { + const std::vector &pool_vec_idxs, const std::vector &pool_gpr_idxs, + const emitter_context *emit_context) { if (host_isa_ == cpu::x64::sse41) { emit_isa(in_vec_idxs, out_vec_idxs); } else if (host_isa_ == cpu::x64::avx2) { @@ -646,7 +658,8 @@ jit_not_equal_emitter::jit_not_equal_emitter(jit_generator *host, cpu_isa_t host size_t jit_not_equal_emitter::get_inputs_num() { return 2; } void jit_not_equal_emitter::emit_impl(const std::vector &in_vec_idxs, const std::vector &out_vec_idxs, - const std::vector &pool_vec_idxs, const std::vector &pool_gpr_idxs) { + const std::vector &pool_vec_idxs, const std::vector &pool_gpr_idxs, + const emitter_context *emit_context) { if (host_isa_ == cpu::x64::sse41) { emit_isa(in_vec_idxs, out_vec_idxs); } else if (host_isa_ == cpu::x64::avx2) { @@ -702,7 +715,8 @@ jit_greater_emitter::jit_greater_emitter(jit_generator *host, cpu_isa_t host_isa size_t jit_greater_emitter::get_inputs_num() { return 2; } void jit_greater_emitter::emit_impl(const std::vector &in_vec_idxs, const std::vector &out_vec_idxs, - const std::vector &pool_vec_idxs, const std::vector &pool_gpr_idxs) { + const std::vector &pool_vec_idxs, const std::vector &pool_gpr_idxs, + const emitter_context *emit_context) { if (host_isa_ == cpu::x64::sse41) { emit_isa(in_vec_idxs, out_vec_idxs); } else if (host_isa_ == cpu::x64::avx2) { @@ -758,7 +772,8 @@ jit_greater_equal_emitter::jit_greater_equal_emitter(jit_generator *host, cpu_is size_t jit_greater_equal_emitter::get_inputs_num() { return 2; } void jit_greater_equal_emitter::emit_impl(const std::vector &in_vec_idxs, const std::vector &out_vec_idxs, - const std::vector &pool_vec_idxs, const std::vector &pool_gpr_idxs) { + const std::vector &pool_vec_idxs, const std::vector &pool_gpr_idxs, + const emitter_context *emit_context) { if (host_isa_ == cpu::x64::sse41) { emit_isa(in_vec_idxs, out_vec_idxs); } else if (host_isa_ == cpu::x64::avx2) { @@ -814,7 +829,8 @@ jit_less_emitter::jit_less_emitter(jit_generator *host, cpu_isa_t host_isa, cons size_t jit_less_emitter::get_inputs_num() { return 2; } void jit_less_emitter::emit_impl(const std::vector &in_vec_idxs, const std::vector &out_vec_idxs, - const std::vector &pool_vec_idxs, const std::vector &pool_gpr_idxs) { + const std::vector &pool_vec_idxs, const std::vector &pool_gpr_idxs, + const emitter_context *emit_context) { if (host_isa_ == cpu::x64::sse41) { emit_isa(in_vec_idxs, out_vec_idxs); } else if (host_isa_ == cpu::x64::avx2) { @@ -870,7 +886,8 @@ jit_less_equal_emitter::jit_less_equal_emitter(jit_generator *host, cpu_isa_t ho size_t jit_less_equal_emitter::get_inputs_num() { return 2; } void jit_less_equal_emitter::emit_impl(const std::vector &in_vec_idxs, const std::vector &out_vec_idxs, - const std::vector &pool_vec_idxs, const std::vector &pool_gpr_idxs) { + const std::vector &pool_vec_idxs, const std::vector &pool_gpr_idxs, + const emitter_context *emit_context) { if (host_isa_ == cpu::x64::sse41) { emit_isa(in_vec_idxs, out_vec_idxs); } else if (host_isa_ == cpu::x64::avx2) { @@ -927,7 +944,8 @@ jit_logical_and_emitter::jit_logical_and_emitter(jit_generator *host, cpu_isa_t size_t jit_logical_and_emitter::get_inputs_num() { return 2; } void jit_logical_and_emitter::emit_impl(const std::vector &in_vec_idxs, const std::vector &out_vec_idxs, - const std::vector &pool_vec_idxs, const std::vector &pool_gpr_idxs) { + const std::vector &pool_vec_idxs, const std::vector &pool_gpr_idxs, + const emitter_context *emit_context) { if (host_isa_ == cpu::x64::sse41) { emit_isa(in_vec_idxs, out_vec_idxs); } else if (host_isa_ == cpu::x64::avx2) { @@ -1004,7 +1022,8 @@ jit_logical_or_emitter::jit_logical_or_emitter(jit_generator *host, cpu_isa_t ho size_t jit_logical_or_emitter::get_inputs_num() { return 2; } void jit_logical_or_emitter::emit_impl(const std::vector &in_vec_idxs, const std::vector &out_vec_idxs, - const std::vector &pool_vec_idxs, const std::vector &pool_gpr_idxs) { + const std::vector &pool_vec_idxs, const std::vector &pool_gpr_idxs, + const emitter_context *emit_context) { if (host_isa_ == cpu::x64::sse41) { emit_isa(in_vec_idxs, out_vec_idxs); } else if (host_isa_ == cpu::x64::avx2) { @@ -1080,7 +1099,8 @@ jit_logical_xor_emitter::jit_logical_xor_emitter(jit_generator *host, cpu_isa_t size_t jit_logical_xor_emitter::get_inputs_num() { return 2; } void jit_logical_xor_emitter::emit_impl(const std::vector &in_vec_idxs, const std::vector &out_vec_idxs, - const std::vector &pool_vec_idxs, const std::vector &pool_gpr_idxs) { + const std::vector &pool_vec_idxs, const std::vector &pool_gpr_idxs, + const emitter_context *emit_context) { if (host_isa_ == cpu::x64::sse41) { emit_isa(in_vec_idxs, out_vec_idxs); } else if (host_isa_ == cpu::x64::avx2) { @@ -1156,7 +1176,8 @@ jit_logical_not_emitter::jit_logical_not_emitter(jit_generator *host, cpu_isa_t size_t jit_logical_not_emitter::get_inputs_num() { return 1; } void jit_logical_not_emitter::emit_impl(const std::vector &in_vec_idxs, const std::vector &out_vec_idxs, - const std::vector &pool_vec_idxs, const std::vector &pool_gpr_idxs) { + const std::vector &pool_vec_idxs, const std::vector &pool_gpr_idxs, + const emitter_context *emit_context) { if (host_isa_ == cpu::x64::sse41) { emit_isa(in_vec_idxs, out_vec_idxs); } else if (host_isa_ == cpu::x64::avx2) { @@ -1211,7 +1232,8 @@ jit_power_static_emitter::jit_power_static_emitter(jit_generator *host, cpu_isa_ size_t jit_power_static_emitter::get_inputs_num() { return 1; } void jit_power_static_emitter::emit_impl(const std::vector &in_vec_idxs, const std::vector &out_vec_idxs, - const std::vector &pool_vec_idxs, const std::vector &pool_gpr_idxs) { + const std::vector &pool_vec_idxs, const std::vector &pool_gpr_idxs, + const emitter_context *emit_context) { if (host_isa_ == cpu::x64::sse41) { emit_isa(in_vec_idxs, out_vec_idxs); } else if (host_isa_ == cpu::x64::avx2) { @@ -1399,7 +1421,8 @@ jit_prelu_emitter::jit_prelu_emitter(jit_generator *host, cpu_isa_t host_isa, co size_t jit_prelu_emitter::get_inputs_num() { return 2; } void jit_prelu_emitter::emit_impl(const std::vector &in_vec_idxs, const std::vector &out_vec_idxs, - const std::vector &pool_vec_idxs, const std::vector &pool_gpr_idxs) { + const std::vector &pool_vec_idxs, const std::vector &pool_gpr_idxs, + const emitter_context *emit_context) { if (host_isa_ == cpu::x64::sse41) { emit_isa(in_vec_idxs, out_vec_idxs); } else if (host_isa_ == cpu::x64::avx2) { diff --git a/inference-engine/src/mkldnn_plugin/nodes/jit_eltwise_emitters.hpp b/inference-engine/src/mkldnn_plugin/nodes/jit_eltwise_emitters.hpp index 488a5ff808a13a..fb8d2e16fb1480 100644 --- a/inference-engine/src/mkldnn_plugin/nodes/jit_eltwise_emitters.hpp +++ b/inference-engine/src/mkldnn_plugin/nodes/jit_eltwise_emitters.hpp @@ -19,7 +19,8 @@ class jit_add_emitter : public jit_emitter { private: void emit_impl(const std::vector &in_vec_idxs, const std::vector &out_vec_idxs, - const std::vector &pool_vec_idxs, const std::vector &pool_gpr_idxs) override; + const std::vector &pool_vec_idxs, const std::vector &pool_gpr_idxs, + const emitter_context *emit_context) override; template void emit_isa(const std::vector &in_vec_idxs, const std::vector &out_vec_idxs) const; @@ -34,7 +35,8 @@ class jit_mul_add_emitter : public jit_emitter { private: void emit_impl(const std::vector &in_vec_idxs, const std::vector &out_vec_idxs, - const std::vector &pool_vec_idxs, const std::vector &pool_gpr_idxs) override; + const std::vector &pool_vec_idxs, const std::vector &pool_gpr_idxs, + const emitter_context *emit_context) override; template void emit_isa(const std::vector &in_vec_idxs, const std::vector &out_vec_idxs) const; @@ -52,7 +54,8 @@ class jit_subtract_emitter : public jit_emitter { private: void emit_impl(const std::vector &in_vec_idxs, const std::vector &out_vec_idxs, - const std::vector &pool_vec_idxs, const std::vector &pool_gpr_idxs) override; + const std::vector &pool_vec_idxs, const std::vector &pool_gpr_idxs, + const emitter_context *emit_context) override; template void emit_isa(const std::vector &in_vec_idxs, const std::vector &out_vec_idxs) const; @@ -68,7 +71,8 @@ class jit_multiply_emitter : public jit_emitter { private: void emit_impl(const std::vector &in_vec_idxs, const std::vector &out_vec_idxs, - const std::vector &pool_vec_idxs, const std::vector &pool_gpr_idxs) override; + const std::vector &pool_vec_idxs, const std::vector &pool_gpr_idxs, + const emitter_context *emit_context) override; template void emit_isa(const std::vector &in_vec_idxs, const std::vector &out_vec_idxs) const; @@ -85,7 +89,8 @@ class jit_divide_emitter : public jit_emitter { private: void emit_impl(const std::vector &in_vec_idxs, const std::vector &out_vec_idxs, - const std::vector &pool_vec_idxs, const std::vector &pool_gpr_idxs) override; + const std::vector &pool_vec_idxs, const std::vector &pool_gpr_idxs, + const emitter_context *emit_context) override; template void emit_isa(const std::vector &in_vec_idxs, const std::vector &out_vec_idxs) const; @@ -102,7 +107,8 @@ class jit_floor_mod_emitter : public jit_emitter { private: void emit_impl(const std::vector &in_vec_idxs, const std::vector &out_vec_idxs, - const std::vector &pool_vec_idxs, const std::vector &pool_gpr_idxs) override; + const std::vector &pool_vec_idxs, const std::vector &pool_gpr_idxs, + const emitter_context *emit_context) override; template void emit_isa(const std::vector &in_vec_idxs, const std::vector &out_vec_idxs) const; @@ -119,7 +125,8 @@ class jit_mod_emitter : public jit_emitter { private: void emit_impl(const std::vector &in_vec_idxs, const std::vector &out_vec_idxs, - const std::vector &pool_vec_idxs, const std::vector &pool_gpr_idxs) override; + const std::vector &pool_vec_idxs, const std::vector &pool_gpr_idxs, + const emitter_context *emit_context) override; template void emit_isa(const std::vector &in_vec_idxs, const std::vector &out_vec_idxs) const; @@ -137,7 +144,8 @@ class jit_maximum_emitter : public jit_emitter { private: void emit_impl(const std::vector &in_vec_idxs, const std::vector &out_vec_idxs, - const std::vector &pool_vec_idxs, const std::vector &pool_gpr_idxs) override; + const std::vector &pool_vec_idxs, const std::vector &pool_gpr_idxs, + const emitter_context *emit_context) override; template void emit_isa(const std::vector &in_vec_idxs, const std::vector &out_vec_idxs) const; @@ -154,7 +162,8 @@ class jit_minimum_emitter : public jit_emitter { private: void emit_impl(const std::vector &in_vec_idxs, const std::vector &out_vec_idxs, - const std::vector &pool_vec_idxs, const std::vector &pool_gpr_idxs) override; + const std::vector &pool_vec_idxs, const std::vector &pool_gpr_idxs, + const emitter_context *emit_context) override; template void emit_isa(const std::vector &in_vec_idxs, const std::vector &out_vec_idxs) const; @@ -170,7 +179,8 @@ class jit_squared_difference_emitter : public jit_emitter { private: void emit_impl(const std::vector &in_vec_idxs, const std::vector &out_vec_idxs, - const std::vector &pool_vec_idxs, const std::vector &pool_gpr_idxs) override; + const std::vector &pool_vec_idxs, const std::vector &pool_gpr_idxs, + const emitter_context *emit_context) override; template void emit_isa(const std::vector &in_vec_idxs, const std::vector &out_vec_idxs) const; @@ -186,7 +196,8 @@ class jit_power_dynamic_emitter : public jit_emitter { private: void emit_impl(const std::vector &in_vec_idxs, const std::vector &out_vec_idxs, - const std::vector &pool_vec_idxs, const std::vector &pool_gpr_idxs) override; + const std::vector &pool_vec_idxs, const std::vector &pool_gpr_idxs, + const emitter_context *emit_context) override; template void emit_isa(const std::vector &in_vec_idxs, const std::vector &out_vec_idxs) const; @@ -202,7 +213,8 @@ class jit_equal_emitter : public jit_emitter { private: void emit_impl(const std::vector &in_vec_idxs, const std::vector &out_vec_idxs, - const std::vector &pool_vec_idxs, const std::vector &pool_gpr_idxs) override; + const std::vector &pool_vec_idxs, const std::vector &pool_gpr_idxs, + const emitter_context *emit_context) override; template void emit_isa(const std::vector &in_vec_idxs, const std::vector &out_vec_idxs) const; @@ -221,7 +233,8 @@ class jit_not_equal_emitter : public jit_emitter { private: void emit_impl(const std::vector &in_vec_idxs, const std::vector &out_vec_idxs, - const std::vector &pool_vec_idxs, const std::vector &pool_gpr_idxs) override; + const std::vector &pool_vec_idxs, const std::vector &pool_gpr_idxs, + const emitter_context *emit_context) override; template void emit_isa(const std::vector &in_vec_idxs, const std::vector &out_vec_idxs) const; @@ -240,7 +253,8 @@ class jit_greater_emitter : public jit_emitter { private: void emit_impl(const std::vector &in_vec_idxs, const std::vector &out_vec_idxs, - const std::vector &pool_vec_idxs, const std::vector &pool_gpr_idxs) override; + const std::vector &pool_vec_idxs, const std::vector &pool_gpr_idxs, + const emitter_context *emit_context) override; template void emit_isa(const std::vector &in_vec_idxs, const std::vector &out_vec_idxs) const; @@ -259,7 +273,8 @@ class jit_greater_equal_emitter : public jit_emitter { private: void emit_impl(const std::vector &in_vec_idxs, const std::vector &out_vec_idxs, - const std::vector &pool_vec_idxs, const std::vector &pool_gpr_idxs) override; + const std::vector &pool_vec_idxs, const std::vector &pool_gpr_idxs, + const emitter_context *emit_context) override; template void emit_isa(const std::vector &in_vec_idxs, const std::vector &out_vec_idxs) const; @@ -278,7 +293,8 @@ class jit_less_emitter : public jit_emitter { private: void emit_impl(const std::vector &in_vec_idxs, const std::vector &out_vec_idxs, - const std::vector &pool_vec_idxs, const std::vector &pool_gpr_idxs) override; + const std::vector &pool_vec_idxs, const std::vector &pool_gpr_idxs, + const emitter_context *emit_context) override; template void emit_isa(const std::vector &in_vec_idxs, const std::vector &out_vec_idxs) const; @@ -297,7 +313,8 @@ class jit_less_equal_emitter : public jit_emitter { private: void emit_impl(const std::vector &in_vec_idxs, const std::vector &out_vec_idxs, - const std::vector &pool_vec_idxs, const std::vector &pool_gpr_idxs) override; + const std::vector &pool_vec_idxs, const std::vector &pool_gpr_idxs, + const emitter_context *emit_context) override; template void emit_isa(const std::vector &in_vec_idxs, const std::vector &out_vec_idxs) const; @@ -316,7 +333,8 @@ class jit_logical_and_emitter : public jit_emitter { private: void emit_impl(const std::vector &in_vec_idxs, const std::vector &out_vec_idxs, - const std::vector &pool_vec_idxs, const std::vector &pool_gpr_idxs) override; + const std::vector &pool_vec_idxs, const std::vector &pool_gpr_idxs, + const emitter_context *emit_context) override; template void emit_isa(const std::vector &in_vec_idxs, const std::vector &out_vec_idxs) const; @@ -335,7 +353,8 @@ class jit_logical_or_emitter : public jit_emitter { private: void emit_impl(const std::vector &in_vec_idxs, const std::vector &out_vec_idxs, - const std::vector &pool_vec_idxs, const std::vector &pool_gpr_idxs) override; + const std::vector &pool_vec_idxs, const std::vector &pool_gpr_idxs, + const emitter_context *emit_context) override; template void emit_isa(const std::vector &in_vec_idxs, const std::vector &out_vec_idxs) const; @@ -354,7 +373,8 @@ class jit_logical_xor_emitter : public jit_emitter { private: void emit_impl(const std::vector &in_vec_idxs, const std::vector &out_vec_idxs, - const std::vector &pool_vec_idxs, const std::vector &pool_gpr_idxs) override; + const std::vector &pool_vec_idxs, const std::vector &pool_gpr_idxs, + const emitter_context *emit_context) override; template void emit_isa(const std::vector &in_vec_idxs, const std::vector &out_vec_idxs) const; @@ -372,7 +392,8 @@ class jit_logical_not_emitter : public jit_emitter { private: void emit_impl(const std::vector &in_vec_idxs, const std::vector &out_vec_idxs, - const std::vector &pool_vec_idxs, const std::vector &pool_gpr_idxs) override; + const std::vector &pool_vec_idxs, const std::vector &pool_gpr_idxs, + const emitter_context *emit_context) override; template void emit_isa(const std::vector &in_vec_idxs, const std::vector &out_vec_idxs) const; @@ -390,7 +411,8 @@ class jit_power_static_emitter : public jit_emitter { private: void emit_impl(const std::vector &in_vec_idxs, const std::vector &out_vec_idxs, - const std::vector &pool_vec_idxs, const std::vector &pool_gpr_idxs) override; + const std::vector &pool_vec_idxs, const std::vector &pool_gpr_idxs, + const emitter_context *emit_context) override; template void emit_isa(const std::vector &in_vec_idxs, const std::vector &out_vec_idxs) const; @@ -408,7 +430,8 @@ class jit_prelu_emitter : public jit_emitter { private: void emit_impl(const std::vector &in_vec_idxs, const std::vector &out_vec_idxs, - const std::vector &pool_vec_idxs, const std::vector &pool_gpr_idxs) override; + const std::vector &pool_vec_idxs, const std::vector &pool_gpr_idxs, + const emitter_context *emit_context) override; template void emit_isa(const std::vector &in_vec_idxs, const std::vector &out_vec_idxs) const; diff --git a/inference-engine/src/mkldnn_plugin/nodes/jit_mkldnn_emitters.hpp b/inference-engine/src/mkldnn_plugin/nodes/jit_mkldnn_emitters.hpp index 4ca9038d6b5159..dd3bd9daa3c148 100644 --- a/inference-engine/src/mkldnn_plugin/nodes/jit_mkldnn_emitters.hpp +++ b/inference-engine/src/mkldnn_plugin/nodes/jit_mkldnn_emitters.hpp @@ -20,10 +20,14 @@ class jit_mkldnn_emitter : public jit_emitter { size_t get_inputs_num() override; void emit(const std::vector &in_vec_idxs, const std::vector &out_vec_idxs, - const std::vector &pool_vec_idxs, const std::vector &pool_gpr_idxs) override; + const std::vector &pool_vec_idxs = {}, const std::vector &pool_gpr_idxs = {}) override; void emit_table() override; + void emit_impl(const std::vector &in_idxs, const std::vector &out_idxs, + const std::vector &pool_vec_idxs, const std::vector &pool_gpr_idxs, + const emitter_context *emit_context = nullptr) override {}; + private: std::shared_ptr> eltwise_injector_sse42; std::shared_ptr> eltwise_injector_avx2; diff --git a/inference-engine/src/mkldnn_plugin/nodes/list_tbl.hpp b/inference-engine/src/mkldnn_plugin/nodes/list_tbl.hpp index f6622b13d4c7bd..fa132709c809fc 100644 --- a/inference-engine/src/mkldnn_plugin/nodes/list_tbl.hpp +++ b/inference-engine/src/mkldnn_plugin/nodes/list_tbl.hpp @@ -71,6 +71,7 @@ MKLDNN_EXTENSION_NODE(GRNImpl, GRN); MKLDNN_EXTENSION_NODE(SparseFillEmptyRowsImpl, SparseFillEmptyRows); MKLDNN_EXTENSION_NODE(BucketizeImpl, Bucketize); MKLDNN_EXTENSION_NODE(CTCGreedyDecoderImpl, CTCGreedyDecoder); +MKLDNN_EXTENSION_NODE(CTCGreedyDecoderSeqLenImpl, CTCGreedyDecoderSeqLen); MKLDNN_EXTENSION_NODE(GatherImpl, Gather); MKLDNN_EXTENSION_NODE(GatherElementsImpl, GatherElements); MKLDNN_EXTENSION_NODE(GatherNDImpl, GatherND); diff --git a/inference-engine/src/mkldnn_plugin/nodes/log_softmax.cpp b/inference-engine/src/mkldnn_plugin/nodes/log_softmax.cpp index d95309afc4797f..01872db73d7510 100644 --- a/inference-engine/src/mkldnn_plugin/nodes/log_softmax.cpp +++ b/inference-engine/src/mkldnn_plugin/nodes/log_softmax.cpp @@ -26,9 +26,6 @@ class LogSoftmaxImpl: public ExtLayerBase { if (layer->insData.size() != 1) THROW_IE_EXCEPTION << layer->name << " Incorrect number of input edges!"; - if (layer->insData[0].lock()->getTensorDesc().getPrecision() != Precision::FP32) - THROW_IE_EXCEPTION << layer->name << " Incorrect input data tensor precision. Only FP32 is supported!"; - SizeVector dims = layer->insData[0].lock()->getTensorDesc().getDims(); if (!dims.size()) dims = SizeVector(1, 1); diff --git a/inference-engine/src/mkldnn_plugin/nodes/mkldnn_deconv_node.cpp b/inference-engine/src/mkldnn_plugin/nodes/mkldnn_deconv_node.cpp index 94d2fc6b87ba62..caf74aa85952c3 100644 --- a/inference-engine/src/mkldnn_plugin/nodes/mkldnn_deconv_node.cpp +++ b/inference-engine/src/mkldnn_plugin/nodes/mkldnn_deconv_node.cpp @@ -29,13 +29,15 @@ void MKLDNNDeconvolutionNode::getSupportedDescriptors() { return; InferenceEngine::Precision precision = getCnnLayer()->insData[0].lock()->getPrecision(); - if (precision != InferenceEngine::Precision::FP32) + if (precision != InferenceEngine::Precision::FP32 && precision != InferenceEngine::Precision::BF16) precision = InferenceEngine::Precision::FP32; auto inputDataType = MKLDNNExtensionUtils::IEPrecisionToDataType(precision); precision = getCnnLayer()->outData[0]->getPrecision(); - if (precision != InferenceEngine::Precision::FP32) + if (precision != InferenceEngine::Precision::FP32 && precision != InferenceEngine::Precision::BF16) precision = InferenceEngine::Precision::FP32; auto outputDataType = MKLDNNExtensionUtils::IEPrecisionToDataType(precision); + if (inputDataType == memory::data_type::bf16 || outputDataType == memory::data_type::bf16) + inputDataType = outputDataType = memory::data_type::bf16; if (getParentEdges().empty() || getParentEdges().size() > 3) THROW_IE_EXCEPTION << "Incorrect number of input edges for layer " << getName(); diff --git a/inference-engine/src/mkldnn_plugin/nodes/mkldnn_eltwise_node.cpp b/inference-engine/src/mkldnn_plugin/nodes/mkldnn_eltwise_node.cpp index 88383ac782d77b..438a0161dc3440 100644 --- a/inference-engine/src/mkldnn_plugin/nodes/mkldnn_eltwise_node.cpp +++ b/inference-engine/src/mkldnn_plugin/nodes/mkldnn_eltwise_node.cpp @@ -652,17 +652,29 @@ struct jit_uni_eltwise_generic : public MKLDNNPlugin::jit_uni_eltwise_kernel, pu break; case Precision::I16: if (isa == x64::avx512_common) { - vmaxps(vmm_dst, vmm_zero, vmm_dst); - vpmovusdw(op, vmm_dst); + vpmovsdw(op, vmm_dst); } else { - uni_vpackusdw(vmm_dst, vmm_dst, vmm_dst); + uni_vpackssdw(vmm_dst, vmm_dst, vmm_dst); + if (isa != x64::sse41) { + vpermq(ymm_dst, ymm_dst, 0x08); + uni_vmovdqu(op, xmm_dst); + } else { + movq(op, xmm_dst); + } } break; case Precision::U16: if (isa == x64::avx512_common) { - vpmovsdw(op, vmm_dst); + vmaxsd(vmm_dst, vmm_zero, vmm_dst); + vpmovusdw(op, vmm_dst); } else { - uni_vpackssdw(vmm_dst, vmm_dst, vmm_dst); + uni_vpackusdw(vmm_dst, vmm_dst, vmm_dst); + if (isa != x64::sse41) { + vpermq(ymm_dst, ymm_dst, 0x08); + uni_vmovdqu(op, xmm_dst); + } else { + movq(op, xmm_dst); + } } break; case Precision::I8: diff --git a/inference-engine/src/mkldnn_plugin/nodes/mkldnn_interpolate_node.cpp b/inference-engine/src/mkldnn_plugin/nodes/mkldnn_interpolate_node.cpp index 6b6f19f29d2960..b9dec20c50eb2c 100644 --- a/inference-engine/src/mkldnn_plugin/nodes/mkldnn_interpolate_node.cpp +++ b/inference-engine/src/mkldnn_plugin/nodes/mkldnn_interpolate_node.cpp @@ -203,6 +203,10 @@ struct jit_uni_interpolate_kernel_f32 : public jit_uni_interpolate_kernel, publi Xmm xmm_weightL = Xmm(13); Vmm vmm_weightR = Vmm(12); Xmm xmm_weightR = Xmm(12); + Vmm vmm_weightF = Vmm(6); + Xmm xmm_weightF = Xmm(6); + Vmm vmm_weightE = Vmm(7); + Xmm xmm_weightE = Xmm(7); Vmm vmm_valTL = Vmm(11); Xmm xmm_valTL = Xmm(11); Vmm vmm_valTR = vmm_val; @@ -459,24 +463,48 @@ struct jit_uni_interpolate_kernel_f32 : public jit_uni_interpolate_kernel, publi void linear_onnx_c_gathered() { mov(reg_dst, ptr[reg_params + GET_OFF(dst)]); - + // load weight mov(reg_src, ptr[reg_params + GET_OFF(weight_ptr[0])]); mov(reg_src_aux, ptr[reg_params + GET_OFF(weight_ptr[0]) + sizeof(size_t)]); - mov(reg_src_aux1, ptr[reg_params + GET_OFF(weight_ptr[0]) + 2 * sizeof(size_t)]); - mov(reg_src_aux2, ptr[reg_params + GET_OFF(weight_ptr[0]) + 3 * sizeof(size_t)]); uni_vbroadcastss(vmm_weightL, ptr[reg_src]); uni_vbroadcastss(vmm_weightR, ptr[reg_src_aux]); - uni_vbroadcastss(vmm_weightT, ptr[reg_src_aux1]); - uni_vbroadcastss(vmm_weightB, ptr[reg_src_aux2]); - + if (jcp_.spatial_dim_size > 1) { + mov(reg_src_aux1, ptr[reg_params + GET_OFF(weight_ptr[0]) + 2 * sizeof(size_t)]); + mov(reg_src_aux2, ptr[reg_params + GET_OFF(weight_ptr[0]) + 3 * sizeof(size_t)]); + uni_vbroadcastss(vmm_weightT, ptr[reg_src_aux1]); + uni_vbroadcastss(vmm_weightB, ptr[reg_src_aux2]); + } + if (jcp_.spatial_dim_size > 2) { + mov(reg_src, ptr[reg_params + GET_OFF(weight_ptr[0]) + 4 * sizeof(size_t)]); + mov(reg_src_aux, ptr[reg_params + GET_OFF(weight_ptr[0]) + 5 * sizeof(size_t)]); + uni_vbroadcastss(vmm_weightF, ptr[reg_src]); + uni_vbroadcastss(vmm_weightE, ptr[reg_src_aux]); + } + // load src mov(reg_src, ptr[reg_params + GET_OFF(src_ptr[0])]); mov(reg_src_aux, ptr[reg_params + GET_OFF(src_ptr[0]) + sizeof(size_t)]); - mov(reg_src_aux1, ptr[reg_params + GET_OFF(src_ptr[0]) + 2 * sizeof(size_t)]); - mov(reg_src_aux2, ptr[reg_params + GET_OFF(src_ptr[0]) + 3 * sizeof(size_t)]); + if (jcp_.spatial_dim_size > 1) { + mov(reg_src_aux1, ptr[reg_params + GET_OFF(src_ptr[0]) + 2 * sizeof(size_t)]); + mov(reg_src_aux2, ptr[reg_params + GET_OFF(src_ptr[0]) + 3 * sizeof(size_t)]); + } + Xbyak::Reg64 reg_src_aux4 = r14; + Xbyak::Reg64 reg_src_aux5 = rdx; + Xbyak::Reg64 reg_src_aux6 = rsi; + Xbyak::Reg64 reg_src_aux7 = rbp; + if (jcp_.spatial_dim_size > 2) { + mov(reg_src_aux4, ptr[reg_params + GET_OFF(src_ptr[0]) + 4 * sizeof(size_t)]); + mov(reg_src_aux5, ptr[reg_params + GET_OFF(src_ptr[0]) + 5 * sizeof(size_t)]); + mov(reg_src_aux6, ptr[reg_params + GET_OFF(src_ptr[0]) + 6 * sizeof(size_t)]); + mov(reg_src_aux7, ptr[reg_params + GET_OFF(src_ptr[0]) + 7 * sizeof(size_t)]); + } mov(reg_work_amount, ptr[reg_params + GET_OFF(work_amount)]); int step = vlen / sizeof(float); int blk = (isa == cpu::x64::sse41) ? (2 * step) : step; + int dst_stride = (jcp_.layout == InterpolateLayoutType::by_channel) ? (step * jcp_.dst_data_size) : + (blk * jcp_.OW * jcp_.OH * jcp_.OD * jcp_.dst_data_size); + int src_stride = (jcp_.layout == InterpolateLayoutType::by_channel) ? (step * jcp_.src_data_size) : + (blk * jcp_.IW * jcp_.IH * jcp_.ID * jcp_.src_data_size); Xbyak::Label main_loop_label; Xbyak::Label main_loop_end_label; @@ -493,13 +521,31 @@ struct jit_uni_interpolate_kernel_f32 : public jit_uni_interpolate_kernel, publi cmp(reg_work_amount, 1); jl(tail_loop_end_label, T_NEAR); } - + // progressive manner load_vector(vmm_valTL, ptr[reg_src], jcp_.src_dt); load_vector(vmm_valTR, ptr[reg_src_aux], jcp_.src_dt); - load_vector(vmm_valBL, ptr[reg_src_aux1], jcp_.src_dt); - load_vector(vmm_valBR, ptr[reg_src_aux2], jcp_.src_dt); - - linear_onnx_worker(); + if (jcp_.spatial_dim_size == 1) { + linear_onnx_worker_1d(); + } + if (jcp_.spatial_dim_size > 1) { + load_vector(vmm_valBL, ptr[reg_src_aux1], jcp_.src_dt); + load_vector(vmm_valBR, ptr[reg_src_aux2], jcp_.src_dt); + linear_onnx_worker_2d(); + } + if (jcp_.spatial_dim_size > 2) { + uni_vmovups(vmm_d_bias, vmm_valTR); // temporally save front result to temp_vmm + + load_vector(vmm_valTL, ptr[reg_src_aux4], jcp_.src_dt); + load_vector(vmm_valTR, ptr[reg_src_aux5], jcp_.src_dt); + load_vector(vmm_valBL, ptr[reg_src_aux6], jcp_.src_dt); + load_vector(vmm_valBR, ptr[reg_src_aux7], jcp_.src_dt); + + // 2d for end depth + linear_onnx_worker_2d(); + // 3th dimension + uni_vmulps(vmm_valTR, vmm_valTR, vmm_weightE); // end_value * end_weight + uni_vfmadd231ps(vmm_valTR, vmm_d_bias, vmm_weightF); // start_value * start_weight + end_value * end_weight + } if (attr_.post_ops_.len() != 0) { apply_post_ops(jcp_.dst_dt, false); // vmm_val is vmm_valTR @@ -511,10 +557,28 @@ struct jit_uni_interpolate_kernel_f32 : public jit_uni_interpolate_kernel, publi int sse42_offset = 4; // vmm is xmm here load_vector(vmm_valTL, ptr[reg_src + sse42_offset * jcp_.src_data_size], jcp_.src_dt); load_vector(vmm_valTR, ptr[reg_src_aux + sse42_offset * jcp_.src_data_size], jcp_.src_dt); - load_vector(vmm_valBL, ptr[reg_src_aux1 + sse42_offset * jcp_.src_data_size], jcp_.src_dt); - load_vector(vmm_valBR, ptr[reg_src_aux2 + sse42_offset * jcp_.src_data_size], jcp_.src_dt); - - linear_onnx_worker(); + if (jcp_.spatial_dim_size == 1) { + linear_onnx_worker_1d(); + } + if (jcp_.spatial_dim_size > 1) { + load_vector(vmm_valBL, ptr[reg_src_aux1 + sse42_offset * jcp_.src_data_size], jcp_.src_dt); + load_vector(vmm_valBR, ptr[reg_src_aux2 + sse42_offset * jcp_.src_data_size], jcp_.src_dt); + linear_onnx_worker_2d(); + } + if (jcp_.spatial_dim_size > 2) { + uni_vmovups(vmm_d_bias, vmm_valTR); // temporally save front result to temp_vmm + + load_vector(vmm_valTL, ptr[reg_src_aux4 + sse42_offset * jcp_.src_data_size], jcp_.src_dt); + load_vector(vmm_valTR, ptr[reg_src_aux5 + sse42_offset * jcp_.src_data_size], jcp_.src_dt); + load_vector(vmm_valBL, ptr[reg_src_aux6 + sse42_offset * jcp_.src_data_size], jcp_.src_dt); + load_vector(vmm_valBR, ptr[reg_src_aux7 + sse42_offset * jcp_.src_data_size], jcp_.src_dt); + + // 2d for end depth + linear_onnx_worker_2d(); + // 3th dimension + uni_vmulps(vmm_valTR, vmm_valTR, vmm_weightE); // end_value * end_weight + uni_vfmadd231ps(vmm_valTR, vmm_d_bias, vmm_weightF); // start_value * start_weight + end_value * end_weight + } if (attr_.post_ops_.len() != 0) { apply_post_ops(jcp_.dst_dt, false); @@ -522,24 +586,23 @@ struct jit_uni_interpolate_kernel_f32 : public jit_uni_interpolate_kernel, publi } store_vector(ptr[reg_dst + sse42_offset * jcp_.dst_data_size], vmm_valTR, jcp_.dst_dt); } - if (jcp_.layout == InterpolateLayoutType::by_channel) { - int dst_stride = step * jcp_.dst_data_size; - int src_stride = step * jcp_.src_data_size; - add(reg_dst, dst_stride); - add(reg_src, src_stride); - add(reg_src_aux, src_stride); + add(reg_dst, dst_stride); + add(reg_src, src_stride); + add(reg_src_aux, src_stride); + if (jcp_.spatial_dim_size > 1) { add(reg_src_aux1, src_stride); add(reg_src_aux2, src_stride); + } + if (jcp_.spatial_dim_size > 2) { + add(reg_src_aux4, src_stride); + add(reg_src_aux5, src_stride); + add(reg_src_aux6, src_stride); + add(reg_src_aux7, src_stride); + } + if (jcp_.layout == InterpolateLayoutType::by_channel) { sub(reg_work_amount, step); // work_amount is c } else { - int dst_stride = blk * jcp_.OW * jcp_.OH * jcp_.dst_data_size; - int src_stride = blk * jcp_.IW * jcp_.IH * jcp_.src_data_size; - add(reg_dst, dst_stride); - add(reg_src, src_stride); - add(reg_src_aux, src_stride); - add(reg_src_aux1, src_stride); - add(reg_src_aux2, src_stride); - sub(reg_work_amount, 1); // work_amount = div_up(c, blk), no tails + sub(reg_work_amount, 1); // work_amount = div_up(c, blk), no tails } jmp(main_loop_label, T_NEAR); @@ -552,13 +615,28 @@ struct jit_uni_interpolate_kernel_f32 : public jit_uni_interpolate_kernel, publi cmp(reg_work_amount, step); jl(blk_tail_loop_end_label, T_NEAR); - // use xmm for 4s in tails + // load to xmm with 4s in tails, process on vmm load_xmm(xmm_valTL, ptr[reg_src], jcp_.src_dt); load_xmm(xmm_valTR, ptr[reg_src_aux], jcp_.src_dt); - load_xmm(xmm_valBL, ptr[reg_src_aux1], jcp_.src_dt); - load_xmm(xmm_valBR, ptr[reg_src_aux2], jcp_.src_dt); - - linear_onnx_worker(); + if (jcp_.spatial_dim_size == 1) { + linear_onnx_worker_1d(); + } + if (jcp_.spatial_dim_size > 1) { + load_xmm(xmm_valBL, ptr[reg_src_aux1], jcp_.src_dt); + load_xmm(xmm_valBR, ptr[reg_src_aux2], jcp_.src_dt); + linear_onnx_worker_2d(); + } + if (jcp_.spatial_dim_size > 2) { + uni_vmovups(vmm_d_bias, vmm_valTR); // temporally save front result to temp_vmm + load_xmm(xmm_valTL, ptr[reg_src_aux4], jcp_.src_dt); + load_xmm(xmm_valTR, ptr[reg_src_aux5], jcp_.src_dt); + load_xmm(xmm_valBL, ptr[reg_src_aux6], jcp_.src_dt); + load_xmm(xmm_valBR, ptr[reg_src_aux7], jcp_.src_dt); + linear_onnx_worker_2d(); + + uni_vmulps(vmm_valTR, vmm_valTR, vmm_weightE); // end_value * end_weight + uni_vfmadd231ps(vmm_valTR, vmm_d_bias, vmm_weightF); // start_value * start_weight + end_value * end_weight + } if (attr_.post_ops_.len() != 0) { apply_post_ops(jcp_.dst_dt, false); // vmm_val is vmm_valTR @@ -569,8 +647,16 @@ struct jit_uni_interpolate_kernel_f32 : public jit_uni_interpolate_kernel, publi add(reg_dst, step * jcp_.dst_data_size); add(reg_src, step * jcp_.src_data_size); add(reg_src_aux, step * jcp_.src_data_size); - add(reg_src_aux1, step * jcp_.src_data_size); - add(reg_src_aux2, step * jcp_.src_data_size); + if (jcp_.spatial_dim_size > 1) { + add(reg_src_aux1, step * jcp_.src_data_size); + add(reg_src_aux2, step * jcp_.src_data_size); + } + if (jcp_.spatial_dim_size > 2) { + add(reg_src_aux4, step * jcp_.src_data_size); + add(reg_src_aux5, step * jcp_.src_data_size); + add(reg_src_aux6, step * jcp_.src_data_size); + add(reg_src_aux7, step * jcp_.src_data_size); + } sub(reg_work_amount, step); jmp(blk_tail_loop_label, T_NEAR); @@ -583,13 +669,28 @@ struct jit_uni_interpolate_kernel_f32 : public jit_uni_interpolate_kernel, publi cmp(reg_work_amount, 1); jl(tail_loop_end_label, T_NEAR); - // still use xmm on avx2/avx512 + // load on xmm, process on vmm load_scalar(xmm_valTL, ptr[reg_src], jcp_.src_dt); load_scalar(xmm_valTR, ptr[reg_src_aux], jcp_.src_dt); - load_scalar(xmm_valBL, ptr[reg_src_aux1], jcp_.src_dt); - load_scalar(xmm_valBR, ptr[reg_src_aux2], jcp_.src_dt); - - linear_onnx_worker(); + if (jcp_.spatial_dim_size == 1) { + linear_onnx_worker_1d(); + } + if (jcp_.spatial_dim_size > 1) { + load_scalar(xmm_valBL, ptr[reg_src_aux1], jcp_.src_dt); + load_scalar(xmm_valBR, ptr[reg_src_aux2], jcp_.src_dt); + linear_onnx_worker_2d(); + } + if (jcp_.spatial_dim_size > 2) { + uni_vmovups(vmm_d_bias, vmm_valTR); // temporally save front result to temp_vmm + load_scalar(xmm_valTL, ptr[reg_src_aux4], jcp_.src_dt); + load_scalar(xmm_valTR, ptr[reg_src_aux5], jcp_.src_dt); + load_scalar(xmm_valBL, ptr[reg_src_aux6], jcp_.src_dt); + load_scalar(xmm_valBR, ptr[reg_src_aux7], jcp_.src_dt); + linear_onnx_worker_2d(); + + uni_vmulps(vmm_valTR, vmm_valTR, vmm_weightE); // end_value * end_weight + uni_vfmadd231ps(vmm_valTR, vmm_d_bias, vmm_weightF); // start_value * start_weight + end_value * end_weight + } if (attr_.post_ops_.len() != 0) { apply_post_ops(jcp_.dst_dt, false); // vmm_val is vmm_valTR @@ -600,8 +701,16 @@ struct jit_uni_interpolate_kernel_f32 : public jit_uni_interpolate_kernel, publi add(reg_dst, step * jcp_.dst_data_size); add(reg_src, step * jcp_.src_data_size); add(reg_src_aux, step * jcp_.src_data_size); - add(reg_src_aux1, step * jcp_.src_data_size); - add(reg_src_aux2, step * jcp_.src_data_size); + if (jcp_.spatial_dim_size > 1) { + add(reg_src_aux1, step * jcp_.src_data_size); + add(reg_src_aux2, step * jcp_.src_data_size); + } + if (jcp_.spatial_dim_size > 2) { + add(reg_src_aux4, step * jcp_.src_data_size); + add(reg_src_aux5, step * jcp_.src_data_size); + add(reg_src_aux6, step * jcp_.src_data_size); + add(reg_src_aux7, step * jcp_.src_data_size); + } sub(reg_work_amount, step); jmp(tail_loop_label, T_NEAR); @@ -617,8 +726,8 @@ struct jit_uni_interpolate_kernel_f32 : public jit_uni_interpolate_kernel, publi mov(reg_work_amount, ptr[reg_params + GET_OFF(work_amount)]); int step = vlen / sizeof(float); - int index_stride = jcp_.OW * jcp_.OH * jcp_.indices_size; - int weight_stride = jcp_.OW * jcp_.OH * sizeof(float); + int index_stride = jcp_.OW * jcp_.OH * jcp_.OD * jcp_.indices_size; + int weight_stride = jcp_.OW * jcp_.OH * jcp_.OD * sizeof(float); Xbyak::Label main_loop_label; Xbyak::Label main_loop_end_label; @@ -637,21 +746,55 @@ struct jit_uni_interpolate_kernel_f32 : public jit_uni_interpolate_kernel, publi uni_vpcmpeqd(vmm_mask, vmm_mask, vmm_mask); vgatherdps(vmm_valTR, ptr[reg_src + vmm_index], vmm_mask); - uni_vmovdqu(vmm_index, ptr[reg_index + 2 * index_stride]); - uni_vpcmpeqd(vmm_mask, vmm_mask, vmm_mask); - vgatherdps(vmm_valBL, ptr[reg_src + vmm_index], vmm_mask); - - uni_vmovdqu(vmm_index, ptr[reg_index + 3 * index_stride]); - uni_vpcmpeqd(vmm_mask, vmm_mask, vmm_mask); - vgatherdps(vmm_valBR, ptr[reg_src + vmm_index], vmm_mask); - - // reg_src_aux point to weight load_vector(vmm_weightL, ptr[reg_src_aux], memory::data_type::f32); load_vector(vmm_weightR, ptr[reg_src_aux + weight_stride], memory::data_type::f32); - load_vector(vmm_weightT, ptr[reg_src_aux + 2 * weight_stride], memory::data_type::f32); - load_vector(vmm_weightB, ptr[reg_src_aux + 3 * weight_stride], memory::data_type::f32); - linear_onnx_worker(); + // progressive manner + if (jcp_.spatial_dim_size == 1) { + linear_onnx_worker_1d(); + } + if (jcp_.spatial_dim_size > 1) { + uni_vmovdqu(vmm_index, ptr[reg_index + 2 * index_stride]); + uni_vpcmpeqd(vmm_mask, vmm_mask, vmm_mask); + vgatherdps(vmm_valBL, ptr[reg_src + vmm_index], vmm_mask); + + uni_vmovdqu(vmm_index, ptr[reg_index + 3 * index_stride]); + uni_vpcmpeqd(vmm_mask, vmm_mask, vmm_mask); + vgatherdps(vmm_valBR, ptr[reg_src + vmm_index], vmm_mask); + + load_vector(vmm_weightT, ptr[reg_src_aux + 2 * weight_stride], memory::data_type::f32); + load_vector(vmm_weightB, ptr[reg_src_aux + 3 * weight_stride], memory::data_type::f32); + + linear_onnx_worker_2d(); + } + if (jcp_.spatial_dim_size > 2) { + uni_vmovups(vmm_d_bias, vmm_valTR); // temporally save front result to temp_vmm + + // for end depth + uni_vmovdqu(vmm_index, ptr[reg_index + 4 * index_stride]); + uni_vpcmpeqd(vmm_mask, vmm_mask, vmm_mask); + vgatherdps(vmm_valTL, ptr[reg_src + vmm_index], vmm_mask); + + uni_vmovdqu(vmm_index, ptr[reg_index + 5 * index_stride]); + uni_vpcmpeqd(vmm_mask, vmm_mask, vmm_mask); + vgatherdps(vmm_valTR, ptr[reg_src + vmm_index], vmm_mask); + + uni_vmovdqu(vmm_index, ptr[reg_index + 6 * index_stride]); + uni_vpcmpeqd(vmm_mask, vmm_mask, vmm_mask); + vgatherdps(vmm_valBL, ptr[reg_src + vmm_index], vmm_mask); + + uni_vmovdqu(vmm_index, ptr[reg_index + 7 * index_stride]); + uni_vpcmpeqd(vmm_mask, vmm_mask, vmm_mask); + vgatherdps(vmm_valBR, ptr[reg_src + vmm_index], vmm_mask); + + linear_onnx_worker_2d(); + + load_vector(vmm_weightE, ptr[reg_src_aux + 5 * weight_stride], memory::data_type::f32); + load_vector(vmm_weightF, ptr[reg_src_aux + 4 * weight_stride], memory::data_type::f32); + + uni_vmulps(vmm_valTR, vmm_valTR, vmm_weightE); // end_value * end_weight + uni_vfmadd231ps(vmm_valTR, vmm_d_bias, vmm_weightF); // start_value * start_weight + end_value * end_weight + } if (attr_.post_ops_.len() != 0) { apply_post_ops(jcp_.dst_dt, true); // vmm_val is vmm_valTR, broadcase is true @@ -673,7 +816,7 @@ struct jit_uni_interpolate_kernel_f32 : public jit_uni_interpolate_kernel, publi cmp(reg_work_amount, 1); jl(tail_loop_end_label, T_NEAR); - // still use xmm on avx2/avx512 + // load to xmm, process on ymm/zmm mov(reg_src_aux1, reg_src); mov(reg_index_offset, dword[reg_index]); add(reg_src_aux1, reg_index_offset); @@ -684,22 +827,60 @@ struct jit_uni_interpolate_kernel_f32 : public jit_uni_interpolate_kernel, publi add(reg_src_aux1, reg_index_offset); load_scalar(xmm_valTR, ptr[reg_src_aux1], jcp_.src_dt); - mov(reg_src_aux1, reg_src); - mov(reg_index_offset, dword[reg_index + 2 * index_stride]); - add(reg_src_aux1, reg_index_offset); - load_scalar(xmm_valBL, ptr[reg_src_aux1], jcp_.src_dt); - - mov(reg_src_aux1, reg_src); - mov(reg_index_offset, dword[reg_index + 3 * index_stride]); - add(reg_src_aux1, reg_index_offset); - load_scalar(xmm_valBR, ptr[reg_src_aux1], jcp_.src_dt); - load_scalar(xmm_weightL, ptr[reg_src_aux], memory::data_type::f32); load_scalar(xmm_weightR, ptr[reg_src_aux + weight_stride], memory::data_type::f32); - load_scalar(xmm_weightT, ptr[reg_src_aux + 2 * weight_stride], memory::data_type::f32); - load_scalar(xmm_weightB, ptr[reg_src_aux + 3 * weight_stride], memory::data_type::f32); - linear_onnx_worker(); + if (jcp_.spatial_dim_size == 1) { + linear_onnx_worker_1d(); + } + if (jcp_.spatial_dim_size > 1) { + mov(reg_src_aux1, reg_src); + mov(reg_index_offset, dword[reg_index + 2 * index_stride]); + add(reg_src_aux1, reg_index_offset); + load_scalar(xmm_valBL, ptr[reg_src_aux1], jcp_.src_dt); + + mov(reg_src_aux1, reg_src); + mov(reg_index_offset, dword[reg_index + 3 * index_stride]); + add(reg_src_aux1, reg_index_offset); + load_scalar(xmm_valBR, ptr[reg_src_aux1], jcp_.src_dt); + + load_scalar(xmm_weightT, ptr[reg_src_aux + 2 * weight_stride], memory::data_type::f32); + load_scalar(xmm_weightB, ptr[reg_src_aux + 3 * weight_stride], memory::data_type::f32); + + linear_onnx_worker_2d(); + } + if (jcp_.spatial_dim_size > 2) { + uni_vmovups(vmm_d_bias, vmm_valTR); // save from front result to temp_vmm + + // for end depth + mov(reg_src_aux1, reg_src); + mov(reg_index_offset, dword[reg_index + 4 * index_stride]); + add(reg_src_aux1, reg_index_offset); + load_scalar(xmm_valTL, ptr[reg_src_aux1], jcp_.src_dt); + + mov(reg_src_aux1, reg_src); + mov(reg_index_offset, dword[reg_index + 5 * index_stride]); + add(reg_src_aux1, reg_index_offset); + load_scalar(xmm_valTR, ptr[reg_src_aux1], jcp_.src_dt); + + mov(reg_src_aux1, reg_src); + mov(reg_index_offset, dword[reg_index + 6 * index_stride]); + add(reg_src_aux1, reg_index_offset); + load_scalar(xmm_valBL, ptr[reg_src_aux1], jcp_.src_dt); + + mov(reg_src_aux1, reg_src); + mov(reg_index_offset, dword[reg_index + 7 * index_stride]); + add(reg_src_aux1, reg_index_offset); + load_scalar(xmm_valBR, ptr[reg_src_aux1], jcp_.src_dt); + + linear_onnx_worker_2d(); + + load_scalar(xmm_weightE, ptr[reg_src_aux + 5 * weight_stride], memory::data_type::f32); + load_scalar(xmm_weightF, ptr[reg_src_aux + 4 * weight_stride], memory::data_type::f32); + + uni_vmulps(vmm_valTR, vmm_valTR, xmm_weightE); // end_value * end_weight + uni_vfmadd231ps(vmm_valTR, vmm_d_bias, xmm_weightF); // start_value * start_weight + end_value * end_weight + } if (attr_.post_ops_.len() != 0) { apply_post_ops(jcp_.dst_dt, true); // process on vmm_val, vmm_val is vmm_valTR, and bc @@ -716,9 +897,14 @@ struct jit_uni_interpolate_kernel_f32 : public jit_uni_interpolate_kernel, publi L(tail_loop_end_label); } + inline void linear_onnx_worker_1d() { + uni_vmulps(vmm_valTR, vmm_valTR, vmm_weightR); + uni_vfmadd231ps(vmm_valTR, vmm_valTL, vmm_weightL); + } + // weightT * (srcTL * weightL + srcTR * weightR) + // weightB * (srcBL * weightL + srcBR * weightR) - inline void linear_onnx_worker() { + inline void linear_onnx_worker_2d() { uni_vmulps(vmm_valTR, vmm_valTR, vmm_weightR); uni_vmulps(vmm_valBR, vmm_valBR, vmm_weightR); uni_vfmadd231ps(vmm_valTR, vmm_valTL, vmm_weightL); @@ -1420,19 +1606,27 @@ SizeVector getBlockND(SizeVector& shape) { } return blockND; } - +// w/hw/ncw/nchw/ncdhw to ncdhw SizeVector to5Dim(SizeVector casesDim) { size_t caseSize = casesDim.size(); SizeVector dim5(5, 1lu); + dim5[4] = casesDim[caseSize - 1]; + if (caseSize > 1) { + dim5[3] = casesDim[caseSize - 2]; + } if (caseSize > 2) { dim5[0] = casesDim[0]; + } + if (caseSize > 3) { dim5[1] = casesDim[1]; } - if (caseSize == 5) { + if (caseSize > 4) { dim5[2] = casesDim[2]; } - dim5[3] = casesDim[caseSize - 2]; - dim5[4] = casesDim[caseSize - 1]; + if (caseSize == 3) { // nhw -> ncw + dim5[1] = dim5[3]; + dim5[3] = 1lu; + } return dim5; } @@ -1462,13 +1656,27 @@ void MKLDNNInterpolateNode::getSupportedDescriptors() { } srcDim = getParentEdgeAt(DATA_ID)->getDims().ToSizeVector(); int dataRank = srcDim.size(); - if (dataRank != 2 && dataRank != 4 && dataRank != 5) { - THROW_IE_EXCEPTION << "Interpolate layer with name '" << getName() << - "' does not support input tensor of rank :" << dataRank; - } - if ((mode == InterpolateMode::cubic || mode == InterpolateMode::linear_onnx) && (dataRank == 5)) { - THROW_IE_EXCEPTION << "Interpolate layer with name '" << getName() << - "' of 'linear_onnx' or 'cubic' mode only support input tensor of 2 or 4 rank"; + switch (dataRank) { + case 1: + case 3: + spatialDimSize = 1; + break; + case 2: + case 4: + spatialDimSize = 2; + break; + case 5: + if (mode != InterpolateMode::cubic) { + spatialDimSize = 3; + } else { + THROW_IE_EXCEPTION << "Interpolate layer with name '" << getName() << + "' of 'cubic' mode only support input tensor of 2 or 4 rank"; + } + break; + default: + THROW_IE_EXCEPTION << "Interpolate layer with name '" << getName() << + "' does not support input tensor of rank :" << dataRank; + break; } modeString = layer->GetParamAsString("coordinate_transformation_mode", "half_pixel"); @@ -1611,6 +1819,10 @@ void MKLDNNInterpolateNode::initSupportedPrimitiveDescriptors() { } } + if (!mayiuse(cpu::x64::sse41)) { + inputPrecision = outputPrecision = Precision::FP32; + } + auto inputDataType = MKLDNNExtensionUtils::IEPrecisionToDataType(inputPrecision); auto outputDataType = MKLDNNExtensionUtils::IEPrecisionToDataType(outputPrecision); srcDataSize = MKLDNNExtensionUtils::sizeOfDataType(inputDataType); @@ -1655,49 +1867,45 @@ void MKLDNNInterpolateNode::initSupportedPrimitiveDescriptors() { }; auto channels = getParentEdgeAt(DATA_ID)->getDims().ndims() > 1 ? getParentEdgeAt(DATA_ID)->getDims()[1] : 1; - if (mode != InterpolateMode::linear) { - // blk and by_channel JIT kernel on sse42 or above machine - if (mayiuse(cpu::x64::sse41)) { - if (getParentEdgeAt(DATA_ID)->getDims().ndims() == 4) { - if (mayiuse(cpu::x64::avx512_common)) { - pushDesc(memory::format_tag::nhwc, jit_avx512); - if (channels != 1) - pushDesc(memory::format_tag::nChw16c, jit_avx512); - } else if (mayiuse(cpu::x64::avx2)) { - pushDesc(memory::format_tag::nhwc, jit_avx2); - if (channels != 1) - pushDesc(memory::format_tag::nChw8c, jit_avx2); - } else { - pushDesc(memory::format_tag::nhwc, jit_sse42); - if (channels != 1) - pushDesc(memory::format_tag::nChw8c, jit_sse42); - } - } else if (getParentEdgeAt(DATA_ID)->getDims().ndims() == 5 && mode == InterpolateMode::nearest) { - if (mayiuse(cpu::x64::avx512_common)) { - pushDesc(memory::format_tag::ndhwc, jit_avx512); - if (channels != 1) - pushDesc(memory::format_tag::nCdhw16c, jit_avx512); - } else if (mayiuse(cpu::x64::avx2)) { - pushDesc(memory::format_tag::ndhwc, jit_avx2); - if (channels != 1) - pushDesc(memory::format_tag::nCdhw8c, jit_avx2); - } else { - pushDesc(memory::format_tag::ndhwc, jit_sse42); - if (channels != 1) - pushDesc(memory::format_tag::nCdhw8c, jit_sse42); - } + + if (!mayiuse(cpu::x64::sse41) || mode == InterpolateMode::linear) { + pushDesc(MKLDNNMemory::GetPlainFormat(getParentEdgeAt(DATA_ID)->getDims()), ref); + } else { + // blk and by_channel JIT kernel on sse41 or above machine + if (getParentEdgeAt(DATA_ID)->getDims().ndims() == 4) { + if (mayiuse(cpu::x64::avx512_common)) { + pushDesc(memory::format_tag::nhwc, jit_avx512); + if (channels != 1) + pushDesc(memory::format_tag::nChw16c, jit_avx512); + } else if (mayiuse(cpu::x64::avx2)) { + pushDesc(memory::format_tag::nhwc, jit_avx2); + if (channels != 1) + pushDesc(memory::format_tag::nChw8c, jit_avx2); + } else { + pushDesc(memory::format_tag::nhwc, jit_sse42); + if (channels != 1) + pushDesc(memory::format_tag::nChw8c, jit_sse42); + } + } else if (getParentEdgeAt(DATA_ID)->getDims().ndims() == 5 && mode != InterpolateMode::cubic) { + if (mayiuse(cpu::x64::avx512_common)) { + pushDesc(memory::format_tag::ndhwc, jit_avx512); + if (channels != 1) + pushDesc(memory::format_tag::nCdhw16c, jit_avx512); + } else if (mayiuse(cpu::x64::avx2)) { + pushDesc(memory::format_tag::ndhwc, jit_avx2); + if (channels != 1) + pushDesc(memory::format_tag::nCdhw8c, jit_avx2); + } else { + pushDesc(memory::format_tag::ndhwc, jit_sse42); + if (channels != 1) + pushDesc(memory::format_tag::nCdhw8c, jit_sse42); } } - // planar for 1.ref on machine without sse42(if no sse42, canFuse() is false). 2.JIT kernel for f32 && avx2(gather).(with fuse) - if (!mayiuse(cpu::x64::sse41)) - pushDesc(MKLDNNMemory::GetPlainFormat(getParentEdgeAt(DATA_ID)->getDims()), ref); - + // planar for 1.ref on machine without sse41(if no sse41, canFuse() is false). 2.JIT kernel for f32 && avx2(gather).(with fuse) if (mayiuse(cpu::x64::avx2) && inputPrec == Precision::FP32) { pushDesc(MKLDNNMemory::GetPlainFormat(getParentEdgeAt(DATA_ID)->getDims()), jit_avx2); } - } else { - pushDesc(MKLDNNMemory::GetPlainFormat(getParentEdgeAt(DATA_ID)->getDims()), ref); } } @@ -1731,10 +1939,15 @@ void MKLDNNInterpolateNode::createPrimitive() { jcp.dst_data_size = MKLDNNExtensionUtils::sizeOfDataType(jcp.dst_dt); jcp.indices_size = sizeof(int); size_t dimSize = dstDim.size(); - jcp.OW = dstDim[dimSize - 1]; - jcp.OH = dstDim[dimSize - 2]; - jcp.IW = srcDimPad[dimSize - 1]; - jcp.IH = srcDimPad[dimSize - 2]; + auto srcDimPad5d = to5Dim(srcDimPad); + auto dstDim5d = to5Dim(dstDim); + jcp.OW = dstDim5d[4]; + jcp.OH = dstDim5d[3]; + jcp.OD = dstDim5d[2]; + jcp.IW = srcDimPad5d[4]; + jcp.IH = srcDimPad5d[3]; + jcp.ID = srcDimPad5d[2]; + jcp.spatial_dim_size = spatialDimSize; if (getChildEdgeAt(0)->getMemory().GetDesc().isPlainFormat()) { jcp.layout = InterpolateLayoutType::planar; @@ -1770,8 +1983,6 @@ void MKLDNNInterpolateNode::createPrimitive() { if (dimSize > 2 && (dataScales[0] != 1.f || dataScales[1] != 1.f)) { THROW_IE_EXCEPTION << "Interpolate layer only supports resize on spatial dimensions(depth, height and width)"; } - auto srcDimPad5d = to5Dim(srcDimPad); - auto dstDim5d = to5Dim(dstDim); switch (mode) { case InterpolateMode::nearest: { @@ -1833,106 +2044,125 @@ void MKLDNNInterpolateNode::buildTblNN(SizeVector& srcDimPad5d, SizeVector& dstD } } +void MKLDNNInterpolateNode::linearOnnxCF(int outCoord, float scale, int inShape, int outShape, int& index0, int& index1, float& weight0, float& weight1) { + float inCoord = coordTransToInput(outCoord, scale, inShape, outShape); + inCoord = std::max(0.0f, std::min(inCoord, static_cast(inShape - 1))); + index0 = std::min(static_cast(inCoord), inShape - 1); + index1 = std::min(index0 + 1, inShape - 1); + + weight1 = std::fabs(inCoord - index0); + weight0 = std::fabs(inCoord - index1); + if (index0 == index1) { + weight0 = 0.5f; + weight1 = 0.5f; + } +} + void MKLDNNInterpolateNode::buildTblLinearOnnx(SizeVector& srcDimPad5d, SizeVector& dstDim5d, std::vector& dataScales, InterpolateLayoutType layout) { int dimSize = srcDim.size(); - float fy = dataScales[dimSize - 2]; + float fz = (spatialDimSize > 2) ? dataScales[dimSize - 3] : 1.f; + float fy = (spatialDimSize > 1) ? dataScales[dimSize - 2] : 1.f; float fx = dataScales[dimSize - 1]; - int IH = srcDimPad5d[3], IW = srcDimPad5d[4]; - int OH = dstDim5d[3], OW = dstDim5d[4]; + int ID = srcDimPad5d[2], IH = srcDimPad5d[3], IW = srcDimPad5d[4]; + int OD = dstDim5d[2], OH = dstDim5d[3], OW = dstDim5d[4]; + + std::vector indexPtr(MAX_INPUT_INTERPOLATE, 0); + std::vector weightPtr(MAX_INPUT_INTERPOLATE, 0); if (layout == InterpolateLayoutType::planar) { - int eltInGrid = 4; + // FrontTopLeft:0, FrontTopRight:1, FrontBottomLeft:2, FrontBottomRight:3, + // EndTopLeft:4, EndTopRight:5, EndBottomLeft:6, EndBottomRight:7 + // weight: Left:0, ritht:1, top:2, bottom:3, front:4, end:5 + int eltInGrid = (spatialDimSize > 2) ? MAX_INPUT_INTERPOLATE : ((spatialDimSize > 1) ? 4 : 2); int idxType = 2; - int scratchLen = rnd_up(eltInGrid * OW * OH, 16); + int scratchLen = rnd_up(eltInGrid * OW * OH * OD, 16); indexTable.resize(idxType * scratchLen); - int *indexTopLeft = static_cast(&indexTable[0]); - int *indexTopRight = static_cast(&indexTable[OW * OH]); - int *indexBottomLeft = static_cast(&indexTable[2 * OW * OH]); - int *indexBottomRight = static_cast(&indexTable[3 * OW * OH]); - - float *weightLeft = reinterpret_cast(&indexTable[scratchLen]); - float *weightRight = reinterpret_cast(&indexTable[scratchLen + OW * OH]); - float *weightTop = reinterpret_cast(&indexTable[scratchLen + 2 * OW * OH]); - float *weightBottom = reinterpret_cast(&indexTable[scratchLen + 3 * OW * OH]); - - for (int oy = 0; oy < OH; oy++) { - float iy = coordTransToInput(oy, fy, IH, OH); - iy = std::max(0.0f, std::min(iy, static_cast(IH - 1))); - int iyT = std::min(static_cast(iy), IH - 1); - int iyB = std::min(iyT + 1, IH - 1); + indexPtr[0] = static_cast(&indexTable[0]); + indexPtr[1] = static_cast(&indexTable[OW * OH * OD]); + weightPtr[0] = reinterpret_cast(&indexTable[scratchLen]); + weightPtr[1] = reinterpret_cast(&indexTable[scratchLen + OW * OH * OD]); + if (spatialDimSize > 1) { + indexPtr[2] = static_cast(&indexTable[2 * OW * OH * OD]); + indexPtr[3] = static_cast(&indexTable[3 * OW * OH * OD]); + weightPtr[2] = reinterpret_cast(&indexTable[scratchLen + 2 * OW * OH * OD]); + weightPtr[3] = reinterpret_cast(&indexTable[scratchLen + 3 * OW * OH * OD]); + } + if (spatialDimSize > 2) { + indexPtr[4] = static_cast(&indexTable[4 * OW * OH * OD]); + indexPtr[5] = static_cast(&indexTable[5 * OW * OH * OD]); + indexPtr[6] = static_cast(&indexTable[6 * OW * OH * OD]); + indexPtr[7] = static_cast(&indexTable[7 * OW * OH * OD]); + weightPtr[4] = reinterpret_cast(&indexTable[scratchLen + 4 * OW * OH * OD]); + weightPtr[5] = reinterpret_cast(&indexTable[scratchLen + 5 * OW * OH * OD]); + } + int scale = mayiuse(cpu::x64::sse41) ? srcDataSize : 1; - float weightB = std::fabs(iy - iyT); - float weightT = std::fabs(iy - iyB); - if (iyT == iyB) { - weightB = 0.5f; - weightT = 0.5f; - } - int idxOy = oy * OW; - for (int ox = 0; ox < OW; ox++) { - float ix = coordTransToInput(ox, fx, IW, OW); - ix = std::max(0.0f, std::min(ix, static_cast(IW - 1))); - int ixL = std::min(static_cast(ix), IW - 1); - int ixR = std::min(ixL + 1, IW - 1); - - float weightR = std::fabs(ix - ixL); - float weightL = std::fabs(ix - ixR); - if (ixL == ixR) { - weightR = 0.5f; - weightL = 0.5f; + for (int oz = 0; oz < OD; oz++) { + int izF, izE; + float weightF, weightE; + linearOnnxCF(oz, fz, ID, OD, izF, izE, weightF, weightE); + int idxOz = oz * OH * OW; + for (int oy = 0; oy < OH; oy++) { + int iyT, iyB; + float weightT, weightB; + linearOnnxCF(oy, fy, IH, OH, iyT, iyB, weightT, weightB); + int idxOzOy = idxOz + oy * OW; + for (int ox = 0; ox < OW; ox++) { + int ixL, ixR; + float weightL, weightR; + linearOnnxCF(ox, fx, IW, OW, ixL, ixR, weightL, weightR); + + int idxOzOyOx = idxOzOy + ox; + indexPtr[0][idxOzOyOx] = (izF * IH * IW + iyT * IW + ixL) * scale; + indexPtr[1][idxOzOyOx] = (izF * IH * IW + iyT * IW + ixR) * scale; + weightPtr[0][idxOzOyOx] = weightL; + weightPtr[1][idxOzOyOx] = weightR; + if (spatialDimSize > 1) { + indexPtr[2][idxOzOyOx] = (izF * IH * IW + iyB * IW + ixL) * scale; + indexPtr[3][idxOzOyOx] = (izF * IH * IW + iyB * IW + ixR) * scale; + weightPtr[2][idxOzOyOx] = weightT; + weightPtr[3][idxOzOyOx] = weightB; + } + if (spatialDimSize > 2) { + indexPtr[4][idxOzOyOx] = (izE * IH * IW + iyT * IW + ixL) * scale; + indexPtr[5][idxOzOyOx] = (izE * IH * IW + iyT * IW + ixR) * scale; + indexPtr[6][idxOzOyOx] = (izE * IH * IW + iyB * IW + ixL) * scale; + indexPtr[7][idxOzOyOx] = (izE * IH * IW + iyB * IW + ixR) * scale; + weightPtr[4][idxOzOyOx] = weightF; + weightPtr[5][idxOzOyOx] = weightE; + } } - int idxOyOx = idxOy + ox; - indexTopLeft[idxOyOx] = (iyT * IW + ixL) * srcDataSize; - indexTopRight[idxOyOx] = (iyT * IW + ixR) * srcDataSize; - indexBottomLeft[idxOyOx] = (iyB * IW + ixL) * srcDataSize; - indexBottomRight[idxOyOx] = (iyB * IW + ixR) * srcDataSize; - weightLeft[idxOyOx] = weightL; - weightRight[idxOyOx] = weightR; - weightTop[idxOyOx] = weightT; - weightBottom[idxOyOx] = weightB; } } } else { - // left:OW right:OW Top:OH Bottom:OH - size_t scratchLen = rnd_up(OW + OW + OH + OH, 16); + // index: left:OW right:OW Top:OH Bottom:OH, Front:OD, End:OD + // weight:same as index + size_t scratchLen = rnd_up(OW + OW + OH + OH + OD + OD, 16); int idxType = 2; indexTable.resize(idxType * scratchLen); - int *indexLeft = static_cast(&indexTable[0]); - int *indexRight = static_cast(&indexTable[OW]); - int *indexTop = static_cast(&indexTable[2 * OW]); - int *indexBottom = static_cast(&indexTable[2 * OW + OH]); - - float *weightLeft = reinterpret_cast(&indexTable[scratchLen]); - float *weightRight = reinterpret_cast(&indexTable[scratchLen + OW]); - float *weightTop = reinterpret_cast(&indexTable[scratchLen + 2 * OW]); - float *weightBottom = reinterpret_cast(&indexTable[scratchLen + 2 * OW + OH]); + indexPtr[0] = static_cast(&indexTable[0]); + indexPtr[1] = static_cast(&indexTable[OW]); + indexPtr[2] = static_cast(&indexTable[2 * OW]); + indexPtr[3] = static_cast(&indexTable[2 * OW + OH]); + indexPtr[4] = static_cast(&indexTable[2 * OW + 2 * OH]); + indexPtr[5] = static_cast(&indexTable[2 * OW + 2 * OH + OD]); + + weightPtr[0] = reinterpret_cast(&indexTable[scratchLen]); + weightPtr[1] = reinterpret_cast(&indexTable[scratchLen + OW]); + weightPtr[2] = reinterpret_cast(&indexTable[scratchLen + 2 * OW]); + weightPtr[3] = reinterpret_cast(&indexTable[scratchLen + 2 * OW + OH]); + weightPtr[4] = reinterpret_cast(&indexTable[scratchLen + 2 * OW + 2 * OH]); + weightPtr[5] = reinterpret_cast(&indexTable[scratchLen + 2 * OW + 2 * OH + OD]); for (int ox = 0; ox < OW; ox++) { - float ix = coordTransToInput(ox, fx, IW, OW); - ix = std::max(0.0f, std::min(ix, static_cast(IW - 1))); - indexLeft[ox] = std::min(static_cast(ix), IW - 1); - indexRight[ox] = std::min(indexLeft[ox] + 1, IW - 1); - - weightRight[ox] = std::fabs(ix - indexLeft[ox]); - weightLeft[ox] = std::fabs(ix - indexRight[ox]); - if (indexLeft[ox] == indexRight[ox]) { - weightRight[ox] = 0.5f; - weightLeft[ox] = 0.5f; - } + linearOnnxCF(ox, fx, IW, OW, indexPtr[0][ox], indexPtr[1][ox], weightPtr[0][ox], weightPtr[1][ox]); } - for (int oy = 0; oy < OH; oy++) { - float iy = coordTransToInput(oy, fy, IH, OH); - iy = std::max(0.0f, std::min(iy, static_cast(IH - 1))); - indexTop[oy] = std::min(static_cast(iy), IH - 1); - indexBottom[oy] = std::min(indexTop[oy] + 1, IH - 1); - - weightBottom[oy] = std::fabs(iy - indexTop[oy]); - weightTop[oy] = std::fabs(iy - indexBottom[oy]); - if (indexTop[oy] == indexBottom[oy]) { - weightBottom[oy] = 0.5f; - weightTop[oy] = 0.5f; - } + linearOnnxCF(oy, fy, IH, OH, indexPtr[2][oy], indexPtr[3][oy], weightPtr[2][oy], weightPtr[3][oy]); + } + for (int oz = 0; oz < OD; oz++) { + linearOnnxCF(oz, fz, ID, OD, indexPtr[4][oz], indexPtr[5][oz], weightPtr[4][oz], weightPtr[5][oz]); } } } @@ -2246,12 +2476,12 @@ void MKLDNNInterpolateNode::execute(mkldnn::stream strm) { case InterpolateMode::linear_onnx: { if (interpolateKernel) { if (configured_for_layout == InterpolateLayoutType::planar) { - linearOnnxPlanar(src_data, dst_data, N, C, IH, IW, OH, OW); + linearOnnxPlanar(src_data, dst_data, N, C, ID, IH, IW, OD, OH, OW); } else { - linearOnnxCGathered(src_data, dst_data, N, C, IH, IW, OH, OW); + linearOnnxCGathered(src_data, dst_data, N, C, ID, IH, IW, OD, OH, OW); } } else { - linearOnnxRef(src_data, dst_data, N, C, IH, IW, OH, OW); + linearOnnxRef(src_data, dst_data, N, C, ID, IH, IW, OD, OH, OW); } break; } @@ -2372,77 +2602,107 @@ void MKLDNNInterpolateNode::NNRef(const uint8_t *in_ptr_, uint8_t *out_ptr_, int int *index_h = static_cast(&indexTable[OD]); int *index_w = static_cast(&indexTable[OD + OH]); + const float *in_ptr_f32 = reinterpret_cast(in_ptr_); + float *out_ptr_f32 = reinterpret_cast(out_ptr_); + parallel_for3d(B, C, OD, [&](size_t b, size_t c, size_t od) { - const uint8_t *in_ptr = in_ptr_ + (IW * IH * ID * C * b + IW * IH * ID * c + IW * IH * index_d[od]) * srcDataSize; - uint8_t *out_ptr = out_ptr_ + (OW * OH * OD * C * b + OW * OH * OD * c + OW * OH * od) * dstDataSize; + const float *in_ptr = in_ptr_f32 + (IW * IH * ID * C * b + IW * IH * ID * c + IW * IH * index_d[od]); + float *out_ptr = out_ptr_f32 + (OW * OH * OD * C * b + OW * OH * OD * c + OW * OH * od); for (int oh = 0; oh < OH; oh++) { - const uint8_t *in_ptr_h = in_ptr + (IW * index_h[oh]) * srcDataSize; - uint8_t *out_ptr_h = out_ptr + (OW * oh) * dstDataSize; + const float *in_ptr_h = in_ptr + (IW * index_h[oh]); + float *out_ptr_h = out_ptr + (OW * oh); for (int ow = 0; ow < OW; ow++) { - float dstValue = getValue(in_ptr_h, index_w[ow] * srcDataSize, inputPrec); - setValue(out_ptr_h, ow * dstDataSize, dstValue, outputPrec); + out_ptr_h[ow] = in_ptr_h[index_w[ow]]; } } }); } -void MKLDNNInterpolateNode::linearOnnxPlanar(const uint8_t *in_ptr_, uint8_t *out_ptr_, int B, int C, int IH, int IW, int OH, int OW) { +void MKLDNNInterpolateNode::linearOnnxPlanar(const uint8_t *in_ptr_, uint8_t *out_ptr_, int B, int C, int ID, int IH, int IW, int OD, int OH, int OW) { + // FrontTopLeft:0, FrontTopRight:1, FrontBottomLeft:2, FrontBottomRight:3, EndTopLeft:4, EndTopRight:5, EndBottomLeft:6, EndBottomRight:7 + // weight: Left:0, ritht:1, top:2, bottom:3, front:4, end:5 int *index = static_cast(&indexTable[0]); - int eltInGrid = 4; - int scratchLen = rnd_up(eltInGrid * OW * OH, 16); + int eltInGrid = (spatialDimSize > 2) ? MAX_INPUT_INTERPOLATE : ((spatialDimSize > 1) ? 4 : 2); + int scratchLen = rnd_up(eltInGrid * OW * OH * OD, 16); float *weight = reinterpret_cast(&indexTable[scratchLen]); parallel_for2d(B, C, [&](size_t b, size_t c) { - uint8_t *out_ptr_nc = out_ptr_ + (OH * OW * C * b + OH * OW * c) * dstDataSize; - const uint8_t *in_ptr_nc = in_ptr_ + (IH * IW * C * b + IH * IW * c) * srcDataSize; + uint8_t *out_ptr_nc = out_ptr_ + (OH * OW * OD * C * b + OH * OW * OD * c) * dstDataSize; + const uint8_t *in_ptr_nc = in_ptr_ + (IH * IW * ID * C * b + IH * IW * ID * c) * srcDataSize; auto arg = jit_interpolate_call_args(); arg.src_ptr[0] = in_ptr_nc; arg.index = static_cast(&index[0]); arg.weight_ptr[0] = static_cast(&weight[0]); arg.dst = out_ptr_nc; - arg.work_amount = OW * OH; + arg.work_amount = OW * OH * OD; arg.oc_off = static_cast(c * sizeof(float)); (*interpolateKernel)(&arg); }); } -void MKLDNNInterpolateNode::linearOnnxCGathered(const uint8_t *in_ptr_, uint8_t *out_ptr_, int B, int C, int IH, int IW, int OH, int OW) { - // left:OW right:OW Top:OH Bottom:OH - size_t scratchLen = rnd_up(OW + OW + OH + OH, 16); - int *indexLeft = static_cast(&indexTable[0]); - int *indexRight = static_cast(&indexTable[OW]); - int *indexTop = static_cast(&indexTable[2 * OW]); - int *indexBottom = static_cast(&indexTable[2 * OW + OH]); - - float *weightLeft = reinterpret_cast(&indexTable[scratchLen]); - float *weightRight = reinterpret_cast(&indexTable[scratchLen + OW]); - float *weightTop = reinterpret_cast(&indexTable[scratchLen + 2 * OW]); - float *weightBottom = reinterpret_cast(&indexTable[scratchLen + 2 * OW + OH]); +void MKLDNNInterpolateNode::linearOnnxCGathered(const uint8_t *in_ptr_, uint8_t *out_ptr_, int B, int C, int ID, int IH, int IW, int OD, int OH, int OW) { + // left:OW right:OW Top:OH Bottom:OH Front:OD End:OD + std::vector indexPtr(MAX_INPUT_INTERPOLATE, 0); + std::vector weightPtr(MAX_INPUT_INTERPOLATE, 0); + size_t scratchLen = rnd_up(OW + OW + OH + OH + OD + OD, 16); + indexPtr[0] = static_cast(&indexTable[0]); + indexPtr[1] = static_cast(&indexTable[OW]); + indexPtr[2] = static_cast(&indexTable[2 * OW]); + indexPtr[3] = static_cast(&indexTable[2 * OW + OH]); + indexPtr[4] = static_cast(&indexTable[2 * OW + 2 * OH]); + indexPtr[5] = static_cast(&indexTable[2 * OW + 2 * OH + OD]); + + weightPtr[0] = reinterpret_cast(&indexTable[scratchLen]); + weightPtr[1] = reinterpret_cast(&indexTable[scratchLen + OW]); + weightPtr[2] = reinterpret_cast(&indexTable[scratchLen + 2 * OW]); + weightPtr[3] = reinterpret_cast(&indexTable[scratchLen + 2 * OW + OH]); + weightPtr[4] = reinterpret_cast(&indexTable[scratchLen + 2 * OW + 2 * OH]); + weightPtr[5] = reinterpret_cast(&indexTable[scratchLen + 2 * OW + 2 * OH + OD]); bool isByChannel = (configured_for_layout == by_channel) ? true : false; int blkSize = mayiuse(cpu::x64::avx512_common) ? 16 : 8; - int CB = div_up(C, blkSize); - int CSize = isByChannel ? C : blkSize * CB; + int CB = isByChannel ? 1 : div_up(C, blkSize); int CGatherLen = isByChannel ? C : blkSize; int workAmount = isByChannel ? C : CB; - parallel_for2d(B, OH, [&](size_t b, size_t h) { - uint8_t *out_ptr_nh = out_ptr_ + (OH * OW * CSize * b + OW * CGatherLen * h) * dstDataSize; - const uint8_t *in_ptr_n = in_ptr_ + (IH * IW * CSize * b) * srcDataSize; - const uint8_t *in_ptr_nh_t = in_ptr_n + (indexTop[h] * IW * CGatherLen) * srcDataSize; - const uint8_t *in_ptr_nh_b = in_ptr_n + (indexBottom[h] * IW * CGatherLen) * srcDataSize; + // n_CB(1)_d_h_w_8[16](c), () for by-channel + int C0 = OW * CGatherLen; + int C1 = OH * C0; + int C2 = OD * C1; + int C3 = CB * C2; + int I0 = IW * CGatherLen; + int I1 = IH * I0; + int I2 = ID * I1; + int I3 = CB * I2; + parallel_for3d(B, OD, OH, [&](size_t b, size_t d, size_t h) { + uint8_t *out_ptr_ndh = out_ptr_ + (C3 * b + C1 * d + C0 * h) * dstDataSize; + + const uint8_t *in_ptr_n = in_ptr_ + (I3 * b) * srcDataSize; + const uint8_t *in_ptr_nf = in_ptr_n + (indexPtr[4][d] * I1) * srcDataSize; + const uint8_t *in_ptr_nft = in_ptr_nf + (indexPtr[2][h] * I0) * srcDataSize; + const uint8_t *in_ptr_nfb = in_ptr_nf + (indexPtr[3][h] * I0) * srcDataSize; + const uint8_t *in_ptr_ne = in_ptr_n + (indexPtr[5][d] * I1) * srcDataSize; + const uint8_t *in_ptr_net = in_ptr_ne + (indexPtr[2][h] * I0) * srcDataSize; + const uint8_t *in_ptr_neb = in_ptr_ne + (indexPtr[3][h] * I0) * srcDataSize; auto arg = jit_interpolate_call_args(); for (int w = 0; w < OW; ++w) { - uint8_t *out_ptr_nhw = out_ptr_nh + CGatherLen * w * dstDataSize; - arg.src_ptr[0] = in_ptr_nh_t + (indexLeft[w] * CGatherLen) * srcDataSize; - arg.src_ptr[1] = in_ptr_nh_t + (indexRight[w] * CGatherLen) * srcDataSize; - arg.src_ptr[2] = in_ptr_nh_b + (indexLeft[w] * CGatherLen) * srcDataSize; - arg.src_ptr[3] = in_ptr_nh_b + (indexRight[w] * CGatherLen) * srcDataSize; - arg.weight_ptr[0] = static_cast(&weightLeft[w]); - arg.weight_ptr[1] = static_cast(&weightRight[w]); - arg.weight_ptr[2] = static_cast(&weightTop[h]); - arg.weight_ptr[3] = static_cast(&weightBottom[h]); - arg.dst = out_ptr_nhw; + uint8_t *out_ptr_ndhw = out_ptr_ndh + CGatherLen * w * dstDataSize; + + arg.src_ptr[0] = in_ptr_nft + (indexPtr[0][w] * CGatherLen) * srcDataSize; + arg.src_ptr[1] = in_ptr_nft + (indexPtr[1][w] * CGatherLen) * srcDataSize; + arg.src_ptr[2] = in_ptr_nfb + (indexPtr[0][w] * CGatherLen) * srcDataSize; + arg.src_ptr[3] = in_ptr_nfb + (indexPtr[1][w] * CGatherLen) * srcDataSize; + arg.src_ptr[4] = in_ptr_net + (indexPtr[0][w] * CGatherLen) * srcDataSize; + arg.src_ptr[5] = in_ptr_net + (indexPtr[1][w] * CGatherLen) * srcDataSize; + arg.src_ptr[6] = in_ptr_neb + (indexPtr[0][w] * CGatherLen) * srcDataSize; + arg.src_ptr[7] = in_ptr_neb + (indexPtr[1][w] * CGatherLen) * srcDataSize; + arg.weight_ptr[0] = static_cast(&weightPtr[0][w]); + arg.weight_ptr[1] = static_cast(&weightPtr[1][w]); + arg.weight_ptr[2] = static_cast(&weightPtr[2][h]); + arg.weight_ptr[3] = static_cast(&weightPtr[3][h]); + arg.weight_ptr[4] = static_cast(&weightPtr[4][d]); + arg.weight_ptr[5] = static_cast(&weightPtr[5][d]); + arg.dst = out_ptr_ndhw; arg.work_amount = workAmount; arg.oc_off = 0; (*interpolateKernel)(&arg); @@ -2450,33 +2710,99 @@ void MKLDNNInterpolateNode::linearOnnxCGathered(const uint8_t *in_ptr_, uint8_t }); } -void MKLDNNInterpolateNode::linearOnnxRef(const uint8_t *in_ptr_, uint8_t *out_ptr_, int B, int C, int IH, int IW, int OH, int OW) { - int eltInGrid = 4; - int scratchLen = rnd_up(eltInGrid * OW * OH, 16); - - int *indexTopLeft = static_cast(&indexTable[0]); - int *indexTopRight = static_cast(&indexTable[OW * OH]); - int *indexBottomLeft = static_cast(&indexTable[2 * OW * OH]); - int *indexBottomRight = static_cast(&indexTable[3 * OW * OH]); - - float *weightLeft = reinterpret_cast(&indexTable[scratchLen]); - float *weightRight = reinterpret_cast(&indexTable[scratchLen + OW * OH]); - float *weightTop = reinterpret_cast(&indexTable[scratchLen + 2 * OW * OH]); - float *weightBottom = reinterpret_cast(&indexTable[scratchLen + 3 * OW * OH]); +void MKLDNNInterpolateNode::linearOnnxRef(const uint8_t *in_ptr_, uint8_t *out_ptr_, int B, int C, int ID, int IH, int IW, int OD, int OH, int OW) { + std::vector indexPtr(MAX_INPUT_INTERPOLATE, 0); + std::vector weightPtr(MAX_INPUT_INTERPOLATE, 0); + // FrontTopLeft:0, FrontTopRight:1, FrontBottomLeft:2, FrontBottomRight:3, + // EndTopLeft:4, EndTopRight:5, EndBottomLeft:6, EndBottomRight:7 + // weight: Left:0, ritht:1, top:2, bottom:3, front:4, end:5 + + int eltInGrid = (spatialDimSize > 2) ? MAX_INPUT_INTERPOLATE : ((spatialDimSize > 1) ? 4 : 2); + int scratchLen = rnd_up(eltInGrid * OW * OH * OD, 16); + + indexPtr[0] = static_cast(&indexTable[0]); + indexPtr[1] = static_cast(&indexTable[OW * OH * OD]); + weightPtr[0] = reinterpret_cast(&indexTable[scratchLen]); + weightPtr[1] = reinterpret_cast(&indexTable[scratchLen + OW * OH * OD]); + if (spatialDimSize > 1) { + indexPtr[2] = static_cast(&indexTable[2 * OW * OH * OD]); + indexPtr[3] = static_cast(&indexTable[3 * OW * OH * OD]); + weightPtr[2] = reinterpret_cast(&indexTable[scratchLen + 2 * OW * OH * OD]); + weightPtr[3] = reinterpret_cast(&indexTable[scratchLen + 3 * OW * OH * OD]); + } + if (spatialDimSize > 2) { + indexPtr[4] = static_cast(&indexTable[4 * OW * OH * OD]); + indexPtr[5] = static_cast(&indexTable[5 * OW * OH * OD]); + indexPtr[6] = static_cast(&indexTable[6 * OW * OH * OD]); + indexPtr[7] = static_cast(&indexTable[7 * OW * OH * OD]); + weightPtr[4] = reinterpret_cast(&indexTable[scratchLen + 4 * OW * OH * OD]); + weightPtr[5] = reinterpret_cast(&indexTable[scratchLen + 5 * OW * OH * OD]); + } + + const float *in_ptr_f32 = reinterpret_cast(in_ptr_); + float *out_ptr_f32 = reinterpret_cast(out_ptr_); parallel_for2d(B, C, [&](size_t b, size_t c) { - uint8_t *out_ptr_nc = out_ptr_ + (OH * OW * C * b + OH * OW * c) * dstDataSize; - const uint8_t *in_ptr_nc = in_ptr_ + (IH * IW * C * b + IH * IW * c) * srcDataSize; - for (int i = 0; i < OH * OW; i++) { - float srcTL = getValue(in_ptr_nc, indexTopLeft[i], inputPrec); - float srcTR = getValue(in_ptr_nc, indexTopRight[i], inputPrec); - float srcBL = getValue(in_ptr_nc, indexBottomLeft[i], inputPrec); - float srcBR = getValue(in_ptr_nc, indexBottomRight[i], inputPrec); - - float dstValue = srcTL * weightTop[i] * weightLeft[i] + srcTR * weightTop[i] * weightRight[i] + - srcBL * weightBottom[i] * weightLeft[i] + srcBR * weightBottom[i] * weightRight[i]; - - setValue(out_ptr_nc, i * dstDataSize, dstValue, outputPrec); + float *out_ptr_nc = out_ptr_f32 + (OD * OH * OW * C * b + OD * OH * OW * c); + const float *in_ptr_nc = in_ptr_f32 + (ID * IH * IW * C * b + ID * IH * IW * c); + // do not combined 1d/2d to 3d unified process to get rid of invalid computing. + switch (spatialDimSize) { + case 1: + for (int i = 0; i < OW; i++) { + float src0 = in_ptr_nc[indexPtr[0][i]]; + float src1 = in_ptr_nc[indexPtr[1][i]]; + + out_ptr_nc[i] = src0 * weightPtr[0][i] + + src1 * weightPtr[1][i]; + } + break; + case 2: + for (int i = 0; i < OH * OW; i++) { + float src00 = in_ptr_nc[indexPtr[0][i]]; + float src01 = in_ptr_nc[indexPtr[1][i]]; + float src10 = in_ptr_nc[indexPtr[2][i]]; + float src11 = in_ptr_nc[indexPtr[3][i]]; + + out_ptr_nc[i] = src00 * weightPtr[2][i] * weightPtr[0][i] + + src01 * weightPtr[2][i] * weightPtr[1][i] + + src10 * weightPtr[3][i] * weightPtr[0][i] + + src11 * weightPtr[3][i] * weightPtr[1][i]; + } + break; + case 3: + for (int i = 0; i < OD * OH * OW; i++) { + float src000 = in_ptr_nc[indexPtr[0][i]]; + float src001 = in_ptr_nc[indexPtr[1][i]]; + float src010 = in_ptr_nc[indexPtr[2][i]]; + float src011 = in_ptr_nc[indexPtr[3][i]]; + float src100 = in_ptr_nc[indexPtr[4][i]]; + float src101 = in_ptr_nc[indexPtr[5][i]]; + float src110 = in_ptr_nc[indexPtr[6][i]]; + float src111 = in_ptr_nc[indexPtr[7][i]]; + + // float dstValue = + // weightPtr[4][i] * weightPtr[2][i] * weightPtr[0][i] * src000 + + // weightPtr[4][i] * weightPtr[2][i] * weightPtr[1][i] * src001 + + // weightPtr[4][i] * weightPtr[3][i] * weightPtr[0][i] * src010 + + // weightPtr[4][i] * weightPtr[3][i] * weightPtr[1][i] * src011 + + // weightPtr[5][i] * weightPtr[2][i] * weightPtr[0][i] * src100 + + // weightPtr[5][i] * weightPtr[2][i] * weightPtr[1][i] * src101 + + // weightPtr[5][i] * weightPtr[3][i] * weightPtr[0][i] * src110 + + // weightPtr[5][i] * weightPtr[3][i] * weightPtr[1][i] * src111; + + out_ptr_nc[i] = + weightPtr[4][i] * (weightPtr[2][i] * (weightPtr[0][i] * src000 + + weightPtr[1][i] * src001) + + weightPtr[3][i] * (weightPtr[0][i] * src010 + + weightPtr[1][i] * src011)) + + weightPtr[5][i] * (weightPtr[2][i] * (weightPtr[0][i] * src100 + + weightPtr[1][i] * src101) + + weightPtr[3][i] * (weightPtr[0][i] * src110 + + weightPtr[1][i] * src111)); + } + break; + default: + break; } }); } @@ -2683,9 +3009,12 @@ void MKLDNNInterpolateNode::cubicRef(const uint8_t *in_ptr_, uint8_t *out_ptr_, int *yOrigin = static_cast(&indexTable[(CUBIC_GRID_LEN + idxNum) * OW]); float *yFactor = reinterpret_cast(&indexTable[(CUBIC_GRID_LEN + idxNum) * OW + OH]); + const float *in_ptr_f32 = reinterpret_cast(in_ptr_); + float *out_ptr_f32 = reinterpret_cast(out_ptr_); + parallel_for4d(B, C, OH, OW, [&](size_t n, size_t c, size_t oy, size_t ox) { - const uint8_t *in_ptr_nc = in_ptr_ + (IW * IH * C * n + IW * IH * c) * srcDataSize; - uint8_t *out_ptr_nc = out_ptr_ + (OW * OH * C * n + OW * OH * c) * dstDataSize; + const float *in_ptr_nc = in_ptr_f32 + (IW * IH * C * n + IW * IH * c); + float *out_ptr_nc = out_ptr_f32 + (OW * OH * C * n + OW * OH * c); int iy = yOrigin[oy]; int ix = xOrigin[ox]; @@ -2693,16 +3022,15 @@ void MKLDNNInterpolateNode::cubicRef(const uint8_t *in_ptr_, uint8_t *out_ptr_, float retY = 0.f; for (int y = iy - 1, i = 0; y <= iy + 2; y++, i++) { int yInRange = std::max(0, std::min(y, IH - 1)); - const uint8_t *in_ptr_nch = in_ptr_nc + IW * yInRange * srcDataSize; + const float *in_ptr_nch = in_ptr_nc + IW * yInRange; float retX = 0.f; for (int x = ix - 1, j = 0; x <= ix + 2; x++, j++) { int xInRange = std::max(0, std::min(x, IW - 1)); - retX += xFactor[ox * CUBIC_GRID_LEN + j] * getValue(in_ptr_nch, xInRange * srcDataSize, inputPrec); + retX += xFactor[ox * CUBIC_GRID_LEN + j] * in_ptr_nch[xInRange]; } retY += yFactor[oy * CUBIC_GRID_LEN + i] * retX; } - - setValue(out_ptr_nc, (oy * OW + ox) * dstDataSize, retY, outputPrec); + out_ptr_nc[oy * OW + ox] = retY; }); } diff --git a/inference-engine/src/mkldnn_plugin/nodes/mkldnn_interpolate_node.h b/inference-engine/src/mkldnn_plugin/nodes/mkldnn_interpolate_node.h index 02146a28bdd814..24b00e2cc6695b 100644 --- a/inference-engine/src/mkldnn_plugin/nodes/mkldnn_interpolate_node.h +++ b/inference-engine/src/mkldnn_plugin/nodes/mkldnn_interpolate_node.h @@ -10,7 +10,7 @@ #include #include -#define MAX_INPUT_INTERPOLATE 4 +#define MAX_INPUT_INTERPOLATE 8 using namespace InferenceEngine; @@ -52,7 +52,8 @@ struct jit_interpolate_config_params { int src_data_size; int dst_data_size; int indices_size; - int IH, IW, OH, OW; + int spatial_dim_size; + int ID, IH, IW, OD, OH, OW; }; struct jit_interpolate_call_args { @@ -104,9 +105,10 @@ class MKLDNNInterpolateNode : public MKLDNNNode { void NNRef(const uint8_t *in_ptr_, uint8_t *out_ptr_, int B, int C, int ID, int IH, int IW, int OD, int OH, int OW); // onnx linear - void linearOnnxPlanar(const uint8_t *in_ptr_, uint8_t *out_ptr_, int B, int C, int IH, int IW, int OH, int OW); - void linearOnnxCGathered(const uint8_t *in_ptr_, uint8_t *out_ptr_, int B, int C, int IH, int IW, int OH, int OW); - void linearOnnxRef(const uint8_t *in_ptr_, uint8_t *out_ptr_, int B, int C, int IH, int IW, int OH, int OW); + void linearOnnxCF(int outCoord, float scale, int inShape, int outShape, int& index0, int& index1, float& weight0, float& weight1); + void linearOnnxPlanar(const uint8_t *in_ptr_, uint8_t *out_ptr_, int B, int C, int ID, int IH, int IW, int OD, int OH, int OW); + void linearOnnxCGathered(const uint8_t *in_ptr_, uint8_t *out_ptr_, int B, int C, int ID, int IH, int IW, int OD, int OH, int OW); + void linearOnnxRef(const uint8_t *in_ptr_, uint8_t *out_ptr_, int B, int C, int ID, int IH, int IW, int OD, int OH, int OW); // cubic std::vector getCubicCoeffs(float mantissa, float a); @@ -158,6 +160,7 @@ class MKLDNNInterpolateNode : public MKLDNNNode { std::string shapeInferMode; SizeVector srcDim; SizeVector srcDimPad; + int spatialDimSize; mkldnn::primitive_attr attr; std::vector PostOpsIntBlobMemory; @@ -169,7 +172,7 @@ class MKLDNNInterpolateNode : public MKLDNNNode { std::vector indexTable; - std::shared_ptr interpolateKernel; + std::shared_ptr interpolateKernel = nullptr; }; } // namespace MKLDNNPlugin diff --git a/inference-engine/src/mkldnn_plugin/nodes/mkldnn_mvn_node.cpp b/inference-engine/src/mkldnn_plugin/nodes/mkldnn_mvn_node.cpp index e036369bbf38a7..dd4179b2d22c09 100644 --- a/inference-engine/src/mkldnn_plugin/nodes/mkldnn_mvn_node.cpp +++ b/inference-engine/src/mkldnn_plugin/nodes/mkldnn_mvn_node.cpp @@ -1,4 +1,4 @@ -// Copyright (C) 2018-2020 Intel Corporation +// Copyright (C) 2018-2021 Intel Corporation // SPDX-License-Identifier: Apache-2.0 // @@ -16,6 +16,7 @@ #include #include "ie_parallel.hpp" #include +#include "common/jit_load_store_emitters.h" #include #include @@ -23,6 +24,8 @@ #include #include +#include + using namespace mkldnn; using namespace MKLDNNPlugin; using namespace InferenceEngine; @@ -56,126 +59,158 @@ struct jit_uni_mvn_mean_variance_kernel_f32 : public jit_uni_mvn_mean_variance_k } void generate() override { + load_emitter.reset(new jit_load_emitter(this, isa, nullptr)); + this->preamble(); mov(reg_src, ptr[reg_params + GET_OFF(src)]); if (jcp_.normalize_variance) { mov(reg_mean, ptr[reg_params + GET_OFF(mean)]); mov(reg_variance, ptr[reg_params + GET_OFF(variance)]); + uni_vpxor(vmm_variance, vmm_variance, vmm_variance); } else { mov(reg_sum, ptr[reg_params + GET_OFF(sum)]); + uni_vpxor(vmm_sum, vmm_sum, vmm_sum); } mov(reg_work_amount, ptr[reg_params + GET_OFF(work_amount)]); mov(reg_stride, ptr[reg_params + GET_OFF(src_stride)]); + mov(reg_oc_off, ptr[reg_params + GET_OFF(oc_off)]); - int repeats = (!jcp_.planar_layout && !jcp_.across_channels && isa == cpu::x64::sse41) ? 2 : 1; // block size is also 8 on cpu::x64::sse41 - for (int i = 0; i < repeats; i++) { - int offset_sse42 = i * 4; - if (i > 0) { - mov(reg_src, ptr[reg_params + GET_OFF(src)]); - mov(reg_work_amount, ptr[reg_params + GET_OFF(work_amount)]); - - add(reg_src, offset_sse42 * jcp_.src_data_size); - if (jcp_.normalize_variance) { - add(reg_mean, offset_sse42 * sizeof(float)); - add(reg_variance, offset_sse42 * sizeof(float)); - } else { - add(reg_sum, offset_sse42 * sizeof(float)); - } - } - - Xbyak::Label loop_label; - Xbyak::Label loop_end_label; - - if (jcp_.normalize_variance) { - uni_vpxor(vmm_variance, vmm_variance, vmm_variance); - - if (jcp_.planar_layout || jcp_.across_channels) { - uni_vbroadcastss(vmm_mean, ptr[reg_mean]); - } else { - uni_vmovups(vmm_mean, ptr[reg_mean]); - } + if (jcp_.normalize_variance) { + if (jcp_.planar_layout || jcp_.across_channels) { + uni_vbroadcastss(vmm_mean, ptr[reg_mean]); } else { - uni_vpxor(vmm_sum, vmm_sum, vmm_sum); + uni_vmovups(vmm_mean, ptr[reg_mean]); } + } - L(loop_label); - { - cmp(reg_work_amount, 0); - jle(loop_end_label, T_NEAR); + tail_num = jcp_.planar_layout ? (jcp_.D * jcp_.H * jcp_.W) - ((jcp_.D * jcp_.H * jcp_.W) / step) * step : + jcp_.C - (jcp_.C / step) * step; - load_vector(vmm_val, ptr[reg_src], jcp_.src_dt); + load_pool_gpr_idxs = {static_cast(reg_load_store_mask.getIdx()), static_cast(reg_load_table.getIdx())}; - if (jcp_.normalize_variance) { - if (!isFloatCompatible(jcp_.src_dt)) - uni_vcvtdq2ps(vmm_val, vmm_val); + if (jcp_.planar_layout) { + worker_unroll(); + if (tail_num != 0) { + worker_tail_planar(); + } - uni_vsubps(vmm_val, vmm_val, vmm_mean); - uni_vfmadd231ps(vmm_variance, vmm_val, vmm_val); - } else { - if (!isFloatCompatible(jcp_.src_dt)) - uni_vpaddd(vmm_sum, vmm_sum, vmm_val); - else - uni_vaddps(vmm_sum, vmm_sum, vmm_val); + // hsum+store + if (!jcp_.normalize_variance && !isFloatCompatible(jcp_.src_prc)) + uni_vcvtdq2ps(vmm_sum, vmm_sum); + Vmm vmm_dst = jcp_.normalize_variance ? vmm_variance : vmm_sum; + if (isa == cpu::x64::sse41) { + hsum_store(vmm_dst); + } else if (isa == cpu::x64::avx2) { + Xbyak::Ymm ymm_sum = Xbyak::Ymm(vmm_dst.getIdx()); + vextractf128(xmm_aux1, ymm_sum, 0); + vextractf128(xmm_aux2, ymm_sum, 1); + addps(xmm_aux1, xmm_aux2); + hsum_store(xmm_aux1); + } else { + Xbyak::Zmm zmm_sum = Xbyak::Zmm(vmm_dst.getIdx()); + vextractf32x4(xmm_aux1, zmm_sum, 0); + vextractf32x4(xmm_aux2, zmm_sum, 1); + addps(xmm_aux1, xmm_aux2); + vextractf32x4(xmm_aux2, zmm_sum, 2); + vextractf32x4(xmm_aux3, zmm_sum, 3); + addps(xmm_aux2, xmm_aux3); + addps(xmm_aux1, xmm_aux2); + hsum_store(xmm_aux1); + } + } else { + // blk+nspc + int repeats = (isa == cpu::x64::sse41) ? 2 : 1; // block size is also 8 on cpu::x64::sse41 with two step process + int sse42_step = 4; + for (int i = 0; i < repeats; i++) { + int offset_sse42 = i * sse42_step; + if (i > 0) { + mov(reg_src, ptr[reg_params + GET_OFF(src)]); + mov(reg_work_amount, ptr[reg_params + GET_OFF(work_amount)]); + + add(reg_src, offset_sse42 * jcp_.src_data_size); + + if (jcp_.normalize_variance) { + // mean and vaiance for variance kernel + if (!jcp_.across_channels) { + // mean is bc when across_channel, no need shift + add(reg_mean, offset_sse42 * sizeof(float)); + uni_vmovups(vmm_mean, ptr[reg_mean]); + } + add(reg_variance, offset_sse42 * sizeof(float)); + uni_vpxor(vmm_variance, vmm_variance, vmm_variance); + } else { + // sum for mean kernel + add(reg_sum, offset_sse42 * sizeof(float)); + uni_vpxor(vmm_sum, vmm_sum, vmm_sum); + } + add(reg_oc_off, offset_sse42 * sizeof(float)); } - add(reg_src, reg_stride); - sub(reg_work_amount, 1); + Xbyak::Label label_empty_2half_sse42; + if (tail_num == 0) { + cmp(reg_oc_off, static_cast(jcp_.C * sizeof(float))); + jae(label_empty_2half_sse42, T_NEAR); - jmp(loop_label, T_NEAR); - } - L(loop_end_label); - - if (jcp_.planar_layout) { - Vmm vmm_dst = jcp_.normalize_variance ? vmm_variance : vmm_sum; - // hsum+store - if (isa == cpu::x64::sse41) { - hsum_store(vmm_dst); - } else if (isa == cpu::x64::avx2) { - Xbyak::Ymm ymm_sum = Xbyak::Ymm(vmm_dst.getIdx()); - vextractf128(xmm_aux1, ymm_sum, 0); - vextractf128(xmm_aux2, ymm_sum, 1); - addps(xmm_aux1, xmm_aux2); - hsum_store(xmm_aux1); + worker_unroll(); } else { - Xbyak::Zmm zmm_sum = Xbyak::Zmm(vmm_dst.getIdx()); - vextractf32x4(xmm_aux1, zmm_sum, 0); - vextractf32x4(xmm_aux2, zmm_sum, 1); - addps(xmm_aux1, xmm_aux2); - vextractf32x4(xmm_aux2, zmm_sum, 2); - vextractf32x4(xmm_aux3, zmm_sum, 3); - addps(xmm_aux2, xmm_aux3); - addps(xmm_aux1, xmm_aux2); - hsum_store(xmm_aux1); + // maybe tail blk + cmp(reg_oc_off, static_cast(jcp_.C * sizeof(float))); + jae(label_empty_2half_sse42, T_NEAR); + + Xbyak::Label label_full_size; + Xbyak::Label label_size_end; + cmp(reg_oc_off, static_cast((jcp_.C - step) * sizeof(float))); + jle(label_full_size, T_NEAR); + + // no need care and fill rest + // for per_channel, do not use tail mean(variance), do not store computed tail values. + // for across_channel, partial sum for tail one time out of kernel from perf. + worker_unroll(true); + + jmp(label_size_end, T_NEAR); + L(label_full_size); + { + worker_unroll(); + } + L(label_size_end); } - } else { + + // add input_base value and store for per_channel + // store for across_channels if (jcp_.normalize_variance) { - if (!jcp_.planar_layout && !jcp_.across_channels) { + if (!jcp_.across_channels) { uni_vmovups(vmm_val, ptr[reg_variance]); uni_vaddps(vmm_variance, vmm_variance, vmm_val); } - uni_vmovups(ptr[reg_variance], vmm_variance); } else { - if (!isFloatCompatible(jcp_.src_dt)) + if (!isFloatCompatible(jcp_.src_prc)) // add with int for int-family data type, other compute go with float uni_vcvtdq2ps(vmm_sum, vmm_sum); - if (!jcp_.planar_layout && !jcp_.across_channels) { + if (!jcp_.across_channels) { uni_vmovups(vmm_val, ptr[reg_sum]); uni_vaddps(vmm_sum, vmm_sum, vmm_val); } - uni_vmovups(ptr[reg_sum], vmm_sum); } + + L(label_empty_2half_sse42); } } this->postamble(); + + load_emitter->emit_table(); } private: using Vmm = typename conditional3::type; + const int vlen = cpu_isa_traits::vlen; + const int step = vlen / sizeof(float); + int tail_num = 0; + Xbyak::Reg64 reg_src = r8; Xbyak::Reg64 reg_mean = r9; Xbyak::Reg64 reg_variance = r10; @@ -183,6 +218,11 @@ struct jit_uni_mvn_mean_variance_kernel_f32 : public jit_uni_mvn_mean_variance_k Xbyak::Reg64 reg_stride = r12; Xbyak::Reg64 reg_sum = reg_mean; Xbyak::Reg64 reg_params = abi_param1; + Xbyak::Reg64 reg_load_table = r13; + Xbyak::Reg64 reg_load_store_mask = r14; + Xbyak::Reg64 reg_aux = r15; + + Xbyak::Reg64 reg_oc_off = rax; Vmm vmm_val = Vmm(0); Vmm vmm_mean = Vmm(1); @@ -191,6 +231,117 @@ struct jit_uni_mvn_mean_variance_kernel_f32 : public jit_uni_mvn_mean_variance_k Xbyak::Xmm xmm_aux1 = Xbyak::Xmm(3); Xbyak::Xmm xmm_aux2 = Xbyak::Xmm(4); Xbyak::Xmm xmm_aux3 = Xbyak::Xmm(5); + Vmm vmm_zero = Vmm(6); + + Xbyak::Opmask k_mask = Xbyak::Opmask(7); + + std::unique_ptr load_emitter = nullptr; + + std::vector load_pool_gpr_idxs; + + inline void worker_full_size() { + Precision dst_prc = isFloatCompatible(jcp_.src_prc) ? Precision::FP32 : Precision::I32; + load_emitter->emit({static_cast(reg_src.getIdx())}, {static_cast(vmm_val.getIdx())}, + std::make_shared(jcp_.src_prc, dst_prc, step), + {}, {load_pool_gpr_idxs}); + + if (jcp_.normalize_variance) { + // all with float + if (!isFloatCompatible(jcp_.src_prc)) + uni_vcvtdq2ps(vmm_val, vmm_val); + + uni_vsubps(vmm_val, vmm_val, vmm_mean); + uni_vfmadd231ps(vmm_variance, vmm_val, vmm_val); + } else { + // for sum, int execute prc for int-family data type + if (!isFloatCompatible(jcp_.src_prc)) + uni_vpaddd(vmm_sum, vmm_sum, vmm_val); + else + uni_vaddps(vmm_sum, vmm_sum, vmm_val); + } + } + + inline void worker_tail_blk() { + Precision dst_prc = isFloatCompatible(jcp_.src_prc) ? Precision::FP32 : Precision::I32; + load_emitter->emit({static_cast(reg_src.getIdx())}, {static_cast(vmm_val.getIdx())}, + std::make_shared(jcp_.src_prc, dst_prc, tail_num), + {}, {load_pool_gpr_idxs}); + + if (jcp_.normalize_variance) { + // all with float + if (!isFloatCompatible(jcp_.src_prc)) + uni_vcvtdq2ps(vmm_val, vmm_val); + + uni_vsubps(vmm_val, vmm_val, vmm_mean); + uni_vfmadd231ps(vmm_variance, vmm_val, vmm_val); + } else { + // for sum, int execute prc for int-family data type + if (!isFloatCompatible(jcp_.src_prc)) + uni_vpaddd(vmm_sum, vmm_sum, vmm_val); + else + uni_vaddps(vmm_sum, vmm_sum, vmm_val); + } + } + + inline void worker_unroll(bool is_tail = false) { + Xbyak::Label loop_label; + Xbyak::Label loop_end_label; + L(loop_label); + { + cmp(reg_work_amount, 0); + jle(loop_end_label, T_NEAR); + + if (!jcp_.planar_layout && is_tail) { + worker_tail_blk(); + } else { + worker_full_size(); + } + + add(reg_src, reg_stride); + sub(reg_work_amount, 1); + + jmp(loop_label, T_NEAR); + } + L(loop_end_label); + } + + inline void worker_tail_planar() { + Precision dst_prc = isFloatCompatible(jcp_.src_prc) ? Precision::FP32 : Precision::I32; + load_emitter->emit({static_cast(reg_src.getIdx())}, {static_cast(vmm_val.getIdx())}, + std::make_shared(jcp_.src_prc, dst_prc, tail_num, true, "zero"), + {}, {load_pool_gpr_idxs}); + + if (jcp_.normalize_variance) { + if (!isFloatCompatible(jcp_.src_prc)) + uni_vcvtdq2ps(vmm_val, vmm_val); + + uni_vsubps(vmm_val, vmm_val, vmm_mean); + + uni_vpxor(vmm_zero, vmm_zero, vmm_zero); + if (isa == cpu::x64::sse41) { + uint8 imm = 1; + imm = ~((imm << tail_num) - imm); + blendps(vmm_val, vmm_zero, imm); + } else if (isa == cpu::x64::avx2) { + uint8 imm = 1; + imm = ~((imm << tail_num) - imm); + vblendps(vmm_val, vmm_val, vmm_zero, imm); + } else if (isa == cpu::x64::avx512_common) { + uint64_t tail_mask = 1; + tail_mask = ~((tail_mask << tail_num) - tail_mask); + mov(reg_aux, tail_mask); + kmovq(k_mask, reg_aux); + vblendmps(vmm_val | k_mask, vmm_val, vmm_zero); + } + + uni_vfmadd231ps(vmm_variance, vmm_val, vmm_val); + } else { + if (!isFloatCompatible(jcp_.src_prc)) + uni_vpaddd(vmm_sum, vmm_sum, vmm_val); + else + uni_vaddps(vmm_sum, vmm_sum, vmm_val); + } + } inline void hsum_store(Xbyak::Xmm xmm_sum) { movshdup(xmm_aux3, xmm_sum); // sum:1,2,3,4; aux3:2,2,4,4 @@ -203,27 +354,6 @@ struct jit_uni_mvn_mean_variance_kernel_f32 : public jit_uni_mvn_mean_variance_k movss(ptr[reg_sum], xmm_sum); } } - - inline void load_vector(Vmm vmm_src, const Xbyak::Address &op, memory::data_type src_dt) { - switch (src_dt) { - case memory::data_type::f32: - case memory::data_type::s32: - uni_vmovups(vmm_src, op); - break; - case memory::data_type::s8: - uni_vpmovsxbd(vmm_src, op); - break; - case memory::data_type::u8: - uni_vpmovzxbd(vmm_src, op); - break; - case memory::data_type::bf16: - uni_vpmovzxwd(vmm_src, op); - uni_vpslld(vmm_src, vmm_src, 16); - break; - default: - assert(!"unknown dst_dt"); - } - } }; // mean,variance->mvn @@ -254,82 +384,103 @@ struct jit_uni_mvn_kernel_f32 : public jit_uni_mvn_kernel, public jit_generator } } - if (!mayiuse(avx512_core_bf16) && mayiuse(avx512_core)) - emu_vcvtneps2bf16.reset(new jit_emu_vcvtneps2bf16(this, isa, nullptr)); + load_emitter.reset(new jit_load_emitter(this, isa, nullptr)); + store_emitter.reset(new jit_store_emitter(this, isa, nullptr)); this->preamble(); mov(reg_src, ptr[reg_params + GET_OFF(src)]); mov(reg_mean, ptr[reg_params + GET_OFF(mean)]); - mov(reg_variance_inv, ptr[reg_params + GET_OFF(variance)]); + if (jcp_.normalize_variance) + mov(reg_variance_inv, ptr[reg_params + GET_OFF(variance)]); mov(reg_dst, ptr[reg_params + GET_OFF(dst)]); mov(reg_work_amount, ptr[reg_params + GET_OFF(work_amount)]); mov(reg_src_stride, ptr[reg_params + GET_OFF(src_stride)]); mov(reg_dst_stride, ptr[reg_params + GET_OFF(dst_stride)]); - if (attr_.post_ops_.len() != 0) - mov(reg_oc_off, ptr[reg_params + GET_OFF(oc_off)]); + mov(reg_oc_off, ptr[reg_params + GET_OFF(oc_off)]); - if (isa == avx512_common) - uni_vpxor(vmm_zero, vmm_zero, vmm_zero); - - int repeats = (!jcp_.planar_layout && !jcp_.across_channels && isa == cpu::x64::sse41) ? 2 : 1; // block size is also 8 on cpu::x64::sse41 - for (int i = 0; i < repeats; i++) { - int offset_sse42 = i * 4; - if (i > 0) { - mov(reg_src, ptr[reg_params + GET_OFF(src)]); - mov(reg_dst, ptr[reg_params + GET_OFF(dst)]); - mov(reg_work_amount, ptr[reg_params + GET_OFF(work_amount)]); - - add(reg_src, offset_sse42 * jcp_.src_data_size); - add(reg_dst, offset_sse42 * jcp_.dst_data_size); - add(reg_mean, offset_sse42 * sizeof(float)); - add(reg_variance_inv, offset_sse42 * sizeof(float)); - if (attr_.post_ops_.len() != 0) - add(reg_oc_off, offset_sse42 * sizeof(float)); - } + if (jcp_.planar_layout || jcp_.across_channels) { + uni_vbroadcastss(vmm_mean, ptr[reg_mean]); + if (jcp_.normalize_variance) + uni_vbroadcastss(vmm_variance_inv, ptr[reg_variance_inv]); + } else { + uni_vmovups(vmm_mean, ptr[reg_mean]); + if (jcp_.normalize_variance) + uni_vmovups(vmm_variance_inv, ptr[reg_variance_inv]); + } - if (jcp_.planar_layout || jcp_.across_channels) { - uni_vbroadcastss(vmm_mean, ptr[reg_mean]); - if (jcp_.normalize_variance) - uni_vbroadcastss(vmm_variance_inv, ptr[reg_variance_inv]); + uni_vpxor(vmm_zero, vmm_zero, vmm_zero); - } else { - uni_vmovups(vmm_mean, ptr[reg_mean]); - if (jcp_.normalize_variance) - uni_vmovups(vmm_variance_inv, ptr[reg_variance_inv]); - } + tail_num = jcp_.planar_layout ? (jcp_.D * jcp_.H * jcp_.W) - ((jcp_.D * jcp_.H * jcp_.W) / step) * step : + jcp_.C - (jcp_.C / step) * step; - Xbyak::Label mvn_loop_label; - Xbyak::Label mvn_loop_end_label; + load_pool_gpr_idxs = {static_cast(reg_load_store_mask.getIdx()), static_cast(reg_load_table.getIdx())}; + store_pool_gpr_idxs = {static_cast(reg_load_store_mask.getIdx())}; + store_pool_vec_idxs = {static_cast(vmm_zero.getIdx())}; - L(mvn_loop_label); - { - cmp(reg_work_amount, 0); - jle(mvn_loop_end_label, T_NEAR); + if (jcp_.planar_layout) { + worker_mvn_unroll(); + if (tail_num != 0) { + worker_mvn(true); + } + } else { + // blk+nspc + int repeats = (isa == cpu::x64::sse41) ? 2 : 1; // block size is also 8 on cpu::x64::sse41 + for (int i = 0; i < repeats; i++) { + int offset_sse42 = i * 4; + if (i > 0) { + // reset modified input + mov(reg_src, ptr[reg_params + GET_OFF(src)]); + mov(reg_dst, ptr[reg_params + GET_OFF(dst)]); + mov(reg_work_amount, ptr[reg_params + GET_OFF(work_amount)]); + + add(reg_src, offset_sse42 * jcp_.src_data_size); + add(reg_dst, offset_sse42 * jcp_.dst_data_size); + add(reg_oc_off, offset_sse42 * sizeof(float)); - load_vector(vmm_val, ptr[reg_src], jcp_.src_dt); + if (!jcp_.across_channels) { + add(reg_mean, offset_sse42 * sizeof(float)); + uni_vmovups(vmm_mean, ptr[reg_mean]); + if (jcp_.normalize_variance) { + add(reg_variance_inv, offset_sse42 * sizeof(float)); + uni_vmovups(vmm_variance_inv, ptr[reg_variance_inv]); + } + } + } - uni_vsubps(vmm_val, vmm_val, vmm_mean); - if (jcp_.normalize_variance) - uni_vmulps(vmm_val, vmm_val, vmm_variance_inv); + Xbyak::Label label_empty_2half_sse42; + if (tail_num == 0) { + cmp(reg_oc_off, static_cast(jcp_.C * sizeof(float))); + jae(label_empty_2half_sse42, T_NEAR); + worker_mvn_unroll(); + } else { + cmp(reg_oc_off, static_cast(jcp_.C * sizeof(float))); + jae(label_empty_2half_sse42, T_NEAR); - apply_post_ops(jcp_.dst_dt); + Xbyak::Label label_full_size_block; + Xbyak::Label label_size_end; - store_vector(ptr[reg_dst], vmm_val, jcp_.dst_dt); + cmp(reg_oc_off, static_cast((jcp_.C - step) * sizeof(float))); + jle(label_full_size_block, T_NEAR); - add(reg_src, reg_src_stride); - add(reg_dst, reg_dst_stride); - sub(reg_work_amount, 1); + worker_mvn_unroll(true); + jmp(label_size_end, T_NEAR); - jmp(mvn_loop_label, T_NEAR); + L(label_full_size_block); + { + worker_mvn_unroll(); + } + L(label_size_end); + } + L(label_empty_2half_sse42); } - L(mvn_loop_end_label); } this->postamble(); - if (!mayiuse(avx512_core_bf16) && mayiuse(avx512_core)) - emu_vcvtneps2bf16->emit_table(); + load_emitter->emit_table(); + if (!mayiuse(avx512_core_bf16) && mayiuse(avx512_core) && store_emitter != nullptr && store_emitter->get_emu_vcvtneps2bf16() != nullptr) + store_emitter->get_emu_vcvtneps2bf16()->emit_table(); for (auto& inj : eltwise_injectors) inj->prepare_table(); @@ -340,6 +491,8 @@ struct jit_uni_mvn_kernel_f32 : public jit_uni_mvn_kernel, public jit_generator Xbyak::Ymm, Xbyak::Zmm>::type; const int vlen = cpu_isa_traits::vlen; + const int step = vlen / sizeof(float); + int tail_num = 0; Xbyak::Reg64 reg_src = r8; Xbyak::Reg64 reg_mean = r9; @@ -354,6 +507,9 @@ struct jit_uni_mvn_kernel_f32 : public jit_uni_mvn_kernel, public jit_generator Xbyak::Reg64 reg_d_weights = rbx; Xbyak::Reg64 reg_d_bias = rdx; + Xbyak::Reg64 reg_load_table = r15; + Xbyak::Reg64 reg_load_store_mask = rcx; + Vmm vmm_val = Vmm(0); Vmm vmm_mean = Vmm(1); Vmm vmm_variance_inv = Vmm(2); @@ -362,83 +518,55 @@ struct jit_uni_mvn_kernel_f32 : public jit_uni_mvn_kernel, public jit_generator Vmm vmm_d_weights = Vmm(5); Vmm vmm_d_bias = Vmm(6); - std::unique_ptr emu_vcvtneps2bf16; - - Xbyak::Label l_table; + std::unique_ptr load_emitter = nullptr; + std::unique_ptr store_emitter = nullptr; std::vector>> eltwise_injectors; std::vector>> depthwise_injectors; std::vector>> quantization_injectors; - inline void load_vector(Vmm vmm_src, const Xbyak::Address &op, memory::data_type src_dt) { - switch (src_dt) { - case memory::data_type::f32: - case memory::data_type::s32: - uni_vmovups(vmm_src, op); - break; - case memory::data_type::s8: - uni_vpmovsxbd(vmm_src, op); - break; - case memory::data_type::u8: - uni_vpmovzxbd(vmm_src, op); - break; - case memory::data_type::bf16: - uni_vpmovzxwd(vmm_src, op); - uni_vpslld(vmm_src, vmm_src, 16); - break; - default: - assert(!"unknown dst_dt"); - } + std::vector store_pool_gpr_idxs; + std::vector store_pool_vec_idxs; + std::vector load_pool_gpr_idxs; + + inline void worker_mvn(bool is_tail) { + int elt_num = is_tail ? tail_num : step; + load_emitter->emit({static_cast(reg_src.getIdx())}, {static_cast(vmm_val.getIdx())}, + std::make_shared(jcp_.src_prc, Precision::FP32, elt_num), + {}, {load_pool_gpr_idxs}); + + uni_vsubps(vmm_val, vmm_val, vmm_mean); + if (jcp_.normalize_variance) + uni_vmulps(vmm_val, vmm_val, vmm_variance_inv); + + apply_post_ops(jcp_.dst_prc, jcp_.planar_layout); - if (!isFloatCompatible(src_dt)) - uni_vcvtdq2ps(vmm_src, vmm_src); + store_emitter->emit({static_cast(vmm_val.getIdx())}, {static_cast(reg_dst.getIdx())}, + std::make_shared(Precision::FP32, jcp_.dst_prc, elt_num), + {store_pool_vec_idxs}, {store_pool_gpr_idxs}); } - inline void store_vector(const Xbyak::Address &op, Vmm vmm_dst, memory::data_type dst_dt) { - Ymm ymm_dst = Ymm(vmm_dst.getIdx()); - Xmm xmm_dst = Xmm(vmm_dst.getIdx()); + inline void worker_mvn_unroll(bool is_tail = false) { + Xbyak::Label mvn_loop_label; + Xbyak::Label mvn_loop_end_label; - if (dst_dt == memory::data_type::f32) { - uni_vmovups(op, vmm_dst); - } else if (dst_dt == memory::data_type::bf16) { - if (mayiuse(avx512_core_bf16)) - vcvtneps2bf16(ymm_dst, vmm_dst); - else - emu_vcvtneps2bf16->emit({static_cast(vmm_dst.getIdx())}, {static_cast(ymm_dst.getIdx())}); - vmovdqu16(op, ymm_dst); - } else if (dst_dt == memory::data_type::u8) { - uni_vcvtps2dq(vmm_dst, vmm_dst); - if (isa == cpu::x64::avx512_common) { - vpmaxsd(vmm_dst, vmm_dst, vmm_zero); - vpmovusdb(op, vmm_dst); - } else { - uni_vpackusdw(vmm_dst, vmm_dst, vmm_dst); - if (isa != cpu::x64::sse41) - vpermq(ymm_dst, ymm_dst, 0x08); - uni_vpackuswb(vmm_dst, vmm_dst, vmm_dst); - if (isa != cpu::x64::sse41) - vmovq(op, xmm_dst); - else - movd(op, xmm_dst); - } - } else if (dst_dt == memory::data_type::s8) { - uni_vcvtps2dq(vmm_dst, vmm_dst); - if (isa == cpu::x64::avx512_common) { - vpmovsdb(op, vmm_dst); - } else { - uni_vpackssdw(vmm_dst, vmm_dst, vmm_dst); - if (isa != cpu::x64::sse41) - vpermq(ymm_dst, ymm_dst, 0x08); - uni_vpacksswb(vmm_dst, vmm_dst, vmm_dst); - if (isa != cpu::x64::sse41) - vmovq(op, xmm_dst); - else - movd(op, xmm_dst); - } + L(mvn_loop_label); + { + cmp(reg_work_amount, 0); + jle(mvn_loop_end_label, T_NEAR); + + worker_mvn(is_tail); + + add(reg_src, reg_src_stride); + add(reg_dst, reg_dst_stride); + sub(reg_work_amount, 1); + + jmp(mvn_loop_label, T_NEAR); } + L(mvn_loop_end_label); } - void apply_post_ops(memory::data_type dst_dt) { + void apply_post_ops(InferenceEngine::Precision dst_prc, bool is_broadcast) { const auto &p = attr_.post_ops_; int eltwise_inj_idx = 0; int depthwise_inj_idx = 0; @@ -453,21 +581,21 @@ struct jit_uni_mvn_kernel_f32 : public jit_uni_mvn_kernel, public jit_generator mov(reg_d_bias, reinterpret_cast(post_op.depthwise.biases_data)); add(reg_d_weights, reg_oc_off); add(reg_d_bias, reg_oc_off); - depthwise_injectors[depthwise_inj_idx]->compute_vector_range(vmm_val.getIdx(), vmm_val.getIdx() + 1, reg_d_weights, reg_d_bias); + depthwise_injectors[depthwise_inj_idx]->compute_vector_range(vmm_val.getIdx(), vmm_val.getIdx() + 1, reg_d_weights, reg_d_bias, is_broadcast); depthwise_inj_idx++; } else if (post_op.is_quantization()) { bool do_dequantization = post_op.quantization.alg == alg_kind::quantization_quantize_dequantize; - bool do_rounding = do_dequantization || isFloatCompatible(dst_dt) || i != p.len() - 1; + bool do_rounding = do_dequantization || isFloatCompatible(dst_prc) || i != p.len() - 1; int s_idx = vmm_val.getIdx(); quantization_injectors[quantization_inj_idx]->init_crop_ptrs(reg_oc_off); - quantization_injectors[quantization_inj_idx]->compute_crop(s_idx, s_idx + 1, 0); + quantization_injectors[quantization_inj_idx]->compute_crop(s_idx, s_idx + 1, 0, 0, is_broadcast); quantization_injectors[quantization_inj_idx]->init_input_scale_shift_ptrs(reg_oc_off); - quantization_injectors[quantization_inj_idx]->compute_input_scale_shift(s_idx, s_idx + 1, 0, do_rounding); + quantization_injectors[quantization_inj_idx]->compute_input_scale_shift(s_idx, s_idx + 1, 0, do_rounding, 0, is_broadcast); quantization_injectors[quantization_inj_idx]->init_output_scale_shift_ptrs(reg_oc_off); - quantization_injectors[quantization_inj_idx]->compute_output_scale_shift(s_idx, s_idx + 1, 0); + quantization_injectors[quantization_inj_idx]->compute_output_scale_shift(s_idx, s_idx + 1, 0, 0, is_broadcast); quantization_inj_idx++; } @@ -477,28 +605,43 @@ struct jit_uni_mvn_kernel_f32 : public jit_uni_mvn_kernel, public jit_generator ////////////////////////////////////////////////////////////////////////////////// MKLDNNMVNNode::MKLDNNMVNNode(const InferenceEngine::CNNLayerPtr& layer, const mkldnn::engine& eng, MKLDNNWeightsSharing::Ptr &cache) - : MKLDNNNode(layer, eng, cache) {} + : MKLDNNNode(layer, eng, cache), epsMode_(insideSqrt) {} void MKLDNNMVNNode::getSupportedDescriptors() { if (!descs.empty()) return; - const auto& numOfDims = getParentEdgeAt(0)->getDims().ndims(); - if (numOfDims < 1 || numOfDims > 5) - THROW_IE_EXCEPTION << "MVN layer with name '" << getCnnLayer()->name << "' doesn't support input with size of dimensions: " << numOfDims; + std::string errPrefix = "MVN node with name '" + getName() + "' "; + + auto cnnLayer = getCnnLayer(); + if (cnnLayer == nullptr) + THROW_IE_EXCEPTION << errPrefix << "does not have CNN layer."; - auto * mvnLayer = dynamic_cast(getCnnLayer().get()); - if (mvnLayer == nullptr) - THROW_IE_EXCEPTION << "Cannot convert MVN layer."; + if (getParentEdges().size() > 2) + THROW_IE_EXCEPTION << errPrefix << "has incorrect number of input edges."; - if (getParentEdges().size() != 1) - THROW_IE_EXCEPTION << "Incorrect number of input edges for layer " << getName(); if (getChildEdges().empty()) - THROW_IE_EXCEPTION << "Incorrect number of output edges for layer " << getName(); + THROW_IE_EXCEPTION << errPrefix << "has incorrect number of output edges."; + + const auto& numOfDims = getParentEdgeAt(0)->getDims().ndims(); + if (numOfDims < 1 || numOfDims > 5) + THROW_IE_EXCEPTION << errPrefix << "doesn't support input with size of dimensions: " << numOfDims; - across_channels = mvnLayer->across_channels; - normalize_variance = mvnLayer->normalize; - eps = mvnLayer->GetParamAsFloat("eps"); + across_channels = false; + if (getParentEdges().size() == 1) { + across_channels = cnnLayer->GetParamAsBool("across_channels"); + } else { + if (numOfDims == getParentEdgeAt(1)->getDims().size() + 1 || numOfDims == 1) + across_channels = true; + } + normalize_variance = cnnLayer->GetParamAsBool("normalize_variance", true); + eps = cnnLayer->GetParamAsFloat("eps"); + auto epsMode = cnnLayer->GetParamAsString("eps_mode", ""); + if (details::CaselessEq()(epsMode, "inside_sqrt")) { + epsMode_ = insideSqrt; + } else if (details::CaselessEq()(epsMode, "outside_sqrt")) { + epsMode_ = outsideSqrt; + } } void MKLDNNMVNNode::initSupportedPrimitiveDescriptors() { @@ -508,7 +651,17 @@ void MKLDNNMVNNode::initSupportedPrimitiveDescriptors() { setPostOps(attr, true); Precision inputPrecision = getCnnLayer()->insData[0].lock()->getPrecision(); + if (getParentEdgeAt(0)->getDims().ndims() < 3 || getParentEdgeAt(0)->getDims().ndims() > 5 + || across_channels != 0 || normalize_variance != 1) { + if (!isFloatCompatible(inputPrecision)) { + inputPrecision = Precision::FP32; + } + } Precision outputPrecision = getCnnLayer()->outData[0]->getPrecision(); + if (!mayiuse(avx512_core)) { + if (outputPrecision == Precision::BF16) + outputPrecision = Precision::FP32; + } if (!fusedWith.empty()) { auto lastFusedLayer = fusedWith[fusedWith.size() - 1].get()->getCnnLayer(); @@ -517,19 +670,9 @@ void MKLDNNMVNNode::initSupportedPrimitiveDescriptors() { } } - if (getParentEdgeAt(0)->getDims().ndims() < 4 || getParentEdgeAt(0)->getDims().ndims() > 5 - || across_channels != 0 || normalize_variance != 1) { - if (!isFloatCompatible(inputPrecision)) { - inputPrecision = Precision::FP32; - } - if (!isFloatCompatible(outputPrecision)) { - outputPrecision = Precision::FP32; - } - } - - if (!mayiuse(avx512_core)) { - if (outputPrecision == Precision::BF16) - outputPrecision = Precision::FP32; + // ref with float planar and no fusion + if (!mayiuse(cpu::x64::sse41)) { + inputPrecision = outputPrecision = Precision::FP32; } auto inputDataType = MKLDNNExtensionUtils::IEPrecisionToDataType(inputPrecision); @@ -540,16 +683,26 @@ void MKLDNNMVNNode::initSupportedPrimitiveDescriptors() { src_data_size = MKLDNNExtensionUtils::sizeOfDataType(inputDataType); dst_data_size = MKLDNNExtensionUtils::sizeOfDataType(outputDataType); - bool canBeInplace = src_data_size == dst_data_size && getParentEdgeAt(0)->getParent()->getChildEdges().size() == 1; + bool canBeInplace = (src_data_size == dst_data_size) && + (getParentEdgeAt(0)->getParent()->getChildEdges().size() == 1) && + !getParentEdgeAt(0)->getParent()->isConstant(); + const size_t inputsNum = getCnnLayer()->insData.size(); InferenceEngine::LayerConfig config; config.dynBatchSupport = false; - config.inConfs.resize(1); + config.inConfs.resize(inputsNum); config.outConfs.resize(1); config.inConfs[0].constant = false; config.outConfs[0].constant = false; config.inConfs[0].inPlace = -1; config.outConfs[0].inPlace = canBeInplace ? 0 : -1; + if (inputsNum == 2) { + const auto& dims = getCnnLayer()->insData[1].lock()->getTensorDesc().getDims(); + config.inConfs[1].desc = TensorDesc(Precision::I32, + dims, + TensorDesc::getLayoutByDims(dims)); + config.inConfs[1].constant = true; + } auto pushDesc = [&](memory::format_tag format, impl_desc_type impl_type) { config.inConfs[0].desc = MKLDNNMemoryDesc(getParentEdgeAt(0)->getDims(), inputDataType, format); @@ -568,15 +721,14 @@ void MKLDNNMVNNode::initSupportedPrimitiveDescriptors() { impl_type = impl_desc_type::ref; } - if (across_channels == 0 && normalize_variance == 1) { + if (mayiuse(cpu::x64::sse41)) { + // nspc if (getParentEdgeAt(0)->getDims().ndims() == 4) { pushDesc(memory::format_tag::nhwc, impl_type); } else if (getParentEdgeAt(0)->getDims().ndims() == 5) { pushDesc(memory::format_tag::ndhwc, impl_type); } - } - - if (isFloatCompatible(inputPrecision) && isFloatCompatible(outputPrecision)) { + // blk if (impl_desc_type::jit_avx512 == impl_type) { if (getParentEdgeAt(0)->getDims().ndims() == 4) { pushDesc(memory::format_tag::nChw16c, impl_type); @@ -590,13 +742,25 @@ void MKLDNNMVNNode::initSupportedPrimitiveDescriptors() { pushDesc(memory::format_tag::nCdhw8c, impl_type); } } + } - if (fusedWith.empty()) { - if (canBeInplace) - config.inConfs[0].inPlace = 0; - pushDesc(MKLDNNMemory::GetPlainFormat(getChildEdgeAt(0)->getDims()), impl_type); - } + // planar + if (canBeInplace) + config.inConfs[0].inPlace = 0; + pushDesc(MKLDNNMemory::GetPlainFormat(getChildEdgeAt(0)->getDims()), impl_type); +} + +std::tuple MKLDNNMVNNode::get5dShapes(const SizeVector& dims) { + std::tuple shapes; + switch (dims.size()) { + case 1 : { shapes = std::make_tuple(1, dims[0], 1, 1, 1); break; } + case 2 : { shapes = std::make_tuple(dims[0], dims[1], 1, 1, 1); break; } + case 3 : { shapes = std::make_tuple(dims[0], dims[1], 1, dims[2], 1); break; } + case 4 : { shapes = std::make_tuple(dims[0], dims[1], 1, dims[2], dims[3]); break; } + case 5 : { shapes = std::make_tuple(dims[0], dims[1], dims[2], dims[3], dims[4]); break; } + default : { THROW_IE_EXCEPTION << "MVN layer with name '" << getCnnLayer()->name << "' doesn't support planar layout with rank: " << dims.size(); } } + return shapes; } void MKLDNNMVNNode::createPrimitive() { @@ -611,13 +775,16 @@ void MKLDNNMVNNode::createPrimitive() { auto selectedPD = getSelectedPrimitiveDescriptor(); auto jcp = jit_mvn_config_params(); - jcp.src_dt = MKLDNNExtensionUtils::IEPrecisionToDataType(selectedPD->getConfig().inConfs[0].desc.getPrecision()); - jcp.dst_dt = MKLDNNExtensionUtils::IEPrecisionToDataType(selectedPD->getConfig().outConfs[0].desc.getPrecision()); - jcp.src_data_size = MKLDNNExtensionUtils::sizeOfDataType(jcp.src_dt); - jcp.dst_data_size = MKLDNNExtensionUtils::sizeOfDataType(jcp.dst_dt); + jcp.src_prc = selectedPD->getConfig().inConfs[0].desc.getPrecision(); + jcp.dst_prc = selectedPD->getConfig().outConfs[0].desc.getPrecision(); + jcp.src_data_size = MKLDNNExtensionUtils::sizeOfDataType(MKLDNNExtensionUtils::IEPrecisionToDataType(jcp.src_prc)); + jcp.dst_data_size = MKLDNNExtensionUtils::sizeOfDataType(MKLDNNExtensionUtils::IEPrecisionToDataType(jcp.dst_prc)); jcp.planar_layout = MKLDNNMemory::GetPlainLayout(getChildEdgeAt(0)->getDims()) == selectedPD->getConfig().inConfs[0].desc.getLayout(); jcp.normalize_variance = normalize_variance; jcp.across_channels = across_channels; + SizeVector in_dims = getParentEdgeAt(0)->getDims().ToSizeVector(); + int N = 0; + std::tie(N, jcp.C, jcp.D, jcp.H, jcp.W) = get5dShapes(in_dims); if (mayiuse(cpu::x64::avx512_common)) { mvn_kernel.reset(new jit_uni_mvn_kernel_f32(jcp, *attr.get())); @@ -647,6 +814,7 @@ void MKLDNNMVNNode::createPrimitive() { mvn_variance_kernel.reset(new jit_uni_mvn_mean_variance_kernel_f32(jcp)); } } + if (mvn_kernel) mvn_kernel->create_ker(); @@ -659,7 +827,6 @@ void MKLDNNMVNNode::createPrimitive() { void MKLDNNMVNNode::setPostOps(mkldnn::primitive_attr &attr, bool initWeights) { mkldnn::post_ops ops; - for (auto &node : fusedWith) { auto* quantizeNode = dynamic_cast(node.get()); if (quantizeNode) { @@ -672,10 +839,8 @@ void MKLDNNMVNNode::setPostOps(mkldnn::primitive_attr &attr, bool initWeights) { eltwiseNode->appendPostOps(ops); continue; } - THROW_IE_EXCEPTION << "Fusing of " << NameFromType(node->getType()) << " operation to " << NameFromType(this->getType()) << " node is not implemented"; } - attr.set_post_ops(ops); } @@ -683,124 +848,26 @@ void MKLDNNMVNNode::execute(mkldnn::stream strm) { auto &dstMemPtr = getChildEdgeAt(0)->getMemoryPtr(); auto &srcMemPtr = getParentEdgeAt(0)->getMemoryPtr(); - Layout layout = getParentEdgeAt(0)->getDesc().getLayout(); + uint8_t *dst_data = reinterpret_cast(dstMemPtr->GetPtr()); + uint8_t *src_data = reinterpret_cast(srcMemPtr->GetPtr()); - if (layout == C || layout == NC || layout == CHW || layout == NCHW || layout == NCDHW) { - if (input_prec == Precision::FP32) { - auto src_data = reinterpret_cast(srcMemPtr->GetData()); - if (output_prec == Precision::FP32) { - auto dst_data = reinterpret_cast(dstMemPtr->GetData()); - mvn_pln(src_data, dst_data, getParentEdgeAt(0)->getDesc().getDims()); - } else if (output_prec == Precision::BF16) { - auto dst_data = reinterpret_cast(dstMemPtr->GetData()); - mvn_pln(src_data, dst_data, getParentEdgeAt(0)->getDesc().getDims()); - } else { - THROW_IE_EXCEPTION << "Unsupported output precision: " << output_prec.name(); - } - } else if (input_prec == Precision::BF16) { - auto src_data = reinterpret_cast(srcMemPtr->GetData()); - if (output_prec == Precision::FP32) { - auto dst_data = reinterpret_cast(dstMemPtr->GetData()); - mvn_pln(src_data, dst_data, getParentEdgeAt(0)->getDesc().getDims()); - } else if (output_prec == Precision::BF16) { - auto dst_data = reinterpret_cast(dstMemPtr->GetData()); - mvn_pln(src_data, dst_data, getParentEdgeAt(0)->getDesc().getDims()); - } else { - THROW_IE_EXCEPTION << "Unsupported output precision: " << output_prec.name(); - } - } else { - THROW_IE_EXCEPTION << "Unsupported input precision: " << input_prec.name(); + auto dim = getParentEdgeAt(0)->getDesc().getDims(); + if (mayiuse(cpu::x64::sse41)) { + if (!mvn_mean_kernel || (normalize_variance && !mvn_variance_kernel) || !mvn_kernel) { + THROW_IE_EXCEPTION << "MVN layer with name '" << getCnnLayer()->name << "' doesn't create kernel to execute on sse41 above platform."; } - } else { - if (output_prec == Precision::U8) { - auto dst_data = reinterpret_cast(dstMemPtr->GetData()); - if (input_prec == Precision::U8) { - auto src_data = reinterpret_cast(srcMemPtr->GetData()); - mvn_blk(src_data, dst_data, getParentEdgeAt(0)->getDesc().getDims()); - } else if (input_prec == Precision::I8) { - auto src_data = reinterpret_cast(srcMemPtr->GetData()); - mvn_blk(src_data, dst_data, getParentEdgeAt(0)->getDesc().getDims()); - } else if (input_prec == Precision::FP32) { - auto src_data = reinterpret_cast(srcMemPtr->GetData()); - mvn_blk(src_data, dst_data, getParentEdgeAt(0)->getDesc().getDims()); - } else if (input_prec == Precision::BF16) { - auto src_data = reinterpret_cast(srcMemPtr->GetData()); - mvn_blk(src_data, dst_data, getParentEdgeAt(0)->getDesc().getDims()); - } else { - THROW_IE_EXCEPTION << "Unsupported input precision: " << input_prec.name(); - } - } else if (output_prec == Precision::I8) { - auto dst_data = reinterpret_cast(dstMemPtr->GetData()); - if (input_prec == Precision::U8) { - auto src_data = reinterpret_cast(srcMemPtr->GetData()); - mvn_blk(src_data, dst_data, getParentEdgeAt(0)->getDesc().getDims()); - } else if (input_prec == Precision::I8) { - auto src_data = reinterpret_cast(srcMemPtr->GetData()); - mvn_blk(src_data, dst_data, getParentEdgeAt(0)->getDesc().getDims()); - } else if (input_prec == Precision::FP32) { - auto src_data = reinterpret_cast(srcMemPtr->GetData()); - mvn_blk(src_data, dst_data, getParentEdgeAt(0)->getDesc().getDims()); - } else if (input_prec == Precision::BF16) { - auto src_data = reinterpret_cast(srcMemPtr->GetData()); - mvn_blk(src_data, dst_data, getParentEdgeAt(0)->getDesc().getDims()); - } else { - THROW_IE_EXCEPTION << "Unsupported input precision: " << input_prec.name(); - } - } else if (output_prec == Precision::FP32) { - auto dst_data = reinterpret_cast(dstMemPtr->GetData()); - if (input_prec == Precision::U8) { - auto src_data = reinterpret_cast(srcMemPtr->GetData()); - mvn_blk(src_data, dst_data, getParentEdgeAt(0)->getDesc().getDims()); - } else if (input_prec == Precision::I8) { - auto src_data = reinterpret_cast(srcMemPtr->GetData()); - mvn_blk(src_data, dst_data, getParentEdgeAt(0)->getDesc().getDims()); - } else if (input_prec == Precision::FP32) { - auto src_data = reinterpret_cast(srcMemPtr->GetData()); - mvn_blk(src_data, dst_data, getParentEdgeAt(0)->getDesc().getDims()); - } else if (input_prec == Precision::BF16) { - auto src_data = reinterpret_cast(srcMemPtr->GetData()); - mvn_blk(src_data, dst_data, getParentEdgeAt(0)->getDesc().getDims()); - } else { - THROW_IE_EXCEPTION << "Unsupported input precision: " << input_prec.name(); - } - } else if (output_prec == Precision::BF16) { - auto dst_data = reinterpret_cast(dstMemPtr->GetData()); - if (input_prec == Precision::U8) { - auto src_data = reinterpret_cast(srcMemPtr->GetData()); - mvn_blk(src_data, dst_data, getParentEdgeAt(0)->getDesc().getDims()); - } else if (input_prec == Precision::I8) { - auto src_data = reinterpret_cast(srcMemPtr->GetData()); - mvn_blk(src_data, dst_data, getParentEdgeAt(0)->getDesc().getDims()); - } else if (input_prec == Precision::FP32) { - auto src_data = reinterpret_cast(srcMemPtr->GetData()); - mvn_blk(src_data, dst_data, getParentEdgeAt(0)->getDesc().getDims()); - } else if (input_prec == Precision::BF16) { - auto src_data = reinterpret_cast(srcMemPtr->GetData()); - mvn_blk(src_data, dst_data, getParentEdgeAt(0)->getDesc().getDims()); - } else { - THROW_IE_EXCEPTION << "Unsupported input precision: " << input_prec.name(); - } + Layout layout = getParentEdgeAt(0)->getDesc().getLayout(); + if (layout == C || layout == NC || layout == CHW || layout == NCHW || layout == NCDHW) { + mvn_pln(src_data, dst_data, dim); } else { - THROW_IE_EXCEPTION << "Unsupported output precision: " << output_prec.name(); + mvn_blk(src_data, dst_data, dim); } + } else { + mvn_ref(src_data, dst_data, dim); } } -std::tuple MKLDNNMVNNode::get5dShapes(const SizeVector& dims) { - std::tuple shapes; - switch (dims.size()) { - case 1 : { shapes = std::make_tuple(1, dims[0], 1, 1, 1); break; } - case 2 : { shapes = std::make_tuple(dims[0], dims[1], 1, 1, 1); break; } - case 3 : { shapes = std::make_tuple(dims[0], dims[1], 1, dims[2], 1); break; } - case 4 : { shapes = std::make_tuple(dims[0], dims[1], 1, dims[2], dims[3]); break; } - case 5 : { shapes = std::make_tuple(dims[0], dims[1], dims[2], dims[3], dims[4]); break; } - default : { THROW_IE_EXCEPTION << "MVN layer with name '" << getCnnLayer()->name << "' doesn't support planar layout with rank: " << dims.size(); } - } - return shapes; -} - -template -void MKLDNNMVNNode::mvn_pln(const in_data_t* src_data, out_data_t* dst_data, const SizeVector& dims) { +void MKLDNNMVNNode::mvn_pln(const uint8_t* src_data, uint8_t* dst_data, const SizeVector& dims) { size_t blk_size = 1; // blk size in vmm if (mayiuse(cpu::x64::avx512_common)) { blk_size = 16; @@ -817,6 +884,9 @@ void MKLDNNMVNNode::mvn_pln(const in_data_t* src_data, out_data_t* dst_data, con size_t C2 = C1 * D; size_t C3 = C2 * C; + size_t src_stride_size = static_cast(blk_size * src_data_size); + size_t dst_stride_size = static_cast(blk_size * dst_data_size); + for (size_t b = 0lu; b < N; b++) { size_t cb = b * C3; if (across_channels) { @@ -824,231 +894,229 @@ void MKLDNNMVNNode::mvn_pln(const in_data_t* src_data, out_data_t* dst_data, con // Parallel sum for each channel float C3inv = 1.f / static_cast(C3); float mean_temp = 0.0f; - size_t tail_across_channels = (C2 / blk_size) * blk_size; - if (mvn_mean_kernel) { - mean_temp = parallel_sum(C, mean_temp, [&](size_t c)->float { - float mean_internal = 0.0f; - size_t cc = cb + c * C2; - auto arg = jit_mvn_call_args(); - arg.src = src_data + cc; - arg.sum = static_cast(&mean_internal); - arg.src_stride = static_cast(blk_size * sizeof(in_data_t)); - arg.work_amount = static_cast(C2 / blk_size); - (*mvn_mean_kernel)(&arg); - for (size_t tail = tail_across_channels; tail < C2; tail++) { - mean_internal += src_data[cc + tail]; - } - return mean_internal; - }); - } else { - mean_temp = parallel_sum(C, mean_temp, [&](size_t c)->float { - float mean_internal = 0.0f; - size_t cc = cb + c * C2; - for (size_t tail = 0lu; tail < C2; tail++) { - mean_internal += src_data[cc + tail]; - } - return mean_internal; - }); - } + mean_temp = parallel_sum(C, mean_temp, [&](size_t c)->float { + float mean_internal = 0.0f; + size_t cc = cb + c * C2; + auto arg = jit_mvn_call_args(); + arg.src = src_data + cc * src_data_size; + arg.sum = static_cast(&mean_internal); + arg.src_stride = src_stride_size; + arg.work_amount = static_cast(C2 / blk_size); // for vector part + (*mvn_mean_kernel)(&arg); + return mean_internal; + }); + float mean = mean_temp * C3inv; // calculate variance value for one instance in batch // parallel sum for each channel if (normalize_variance) { float variance_temp = 0.0f; - if (mvn_variance_kernel) { - variance_temp = parallel_sum(C, variance_temp, [&](size_t c)->float { - float variance_internal = 0.0f; - size_t cc = cb + c * C2; - auto arg = jit_mvn_call_args(); - arg.src = src_data + cc; - arg.mean = static_cast(&mean); - arg.variance = static_cast(&variance_internal); - arg.src_stride = static_cast(blk_size * sizeof(in_data_t)); - arg.work_amount = static_cast(C2 / blk_size); - (*mvn_variance_kernel)(&arg); + variance_temp = parallel_sum(C, variance_temp, [&](size_t c)->float { + float variance_internal = 0.0f; + size_t cc = cb + c * C2; + auto arg = jit_mvn_call_args(); + arg.src = src_data + cc * src_data_size; + arg.mean = static_cast(&mean); + arg.variance = static_cast(&variance_internal); + arg.src_stride = src_stride_size; + arg.work_amount = static_cast(C2 / blk_size); // vector part + (*mvn_variance_kernel)(&arg); + return variance_internal; + }); - for (size_t tail = tail_across_channels; tail < C2; tail++) { - variance_internal += (src_data[cc + tail] - mean) * (src_data[cc + tail] - mean); - } - return variance_internal; - }); - } else { - variance_temp = parallel_sum(C, variance_temp, [&](size_t c)->float { - float variance_internal = 0.0f; - size_t cc = cb + c * C2; - for (size_t tail = 0lu; tail < C2; tail++) { - variance_internal += (src_data[cc + tail] - mean) * (src_data[cc + tail] - mean); - } - return variance_internal; - }); - } - float variance = 1.f / sqrtf(variance_temp * C3inv + eps); + float variance = 1.f; + if (epsMode_ == insideSqrt) + variance /= sqrtf(variance_temp * C3inv + eps); + else if (epsMode_ == outsideSqrt) + variance /= sqrtf(variance_temp * C3inv) + eps; // mvn for one instance in batch - if (mvn_kernel) { - parallel_for(C, [&](int c) { - size_t cc = cb + c * C2; - auto arg = jit_mvn_call_args(); - arg.src = src_data + cc; - arg.dst = dst_data + cc; - arg.mean = static_cast(&mean); - arg.variance = static_cast(&variance); - arg.src_stride = static_cast(blk_size * sizeof(in_data_t)); - arg.dst_stride = static_cast(blk_size * sizeof(out_data_t)); - arg.work_amount = static_cast(C2 / blk_size); - (*mvn_kernel)(&arg); - - for (size_t tail = tail_across_channels; tail < C2; tail++) { - dst_data[cc + tail] = (src_data[cc + tail] - mean) * variance; - } - }); - } else { - parallel_for(C, [&](int c) { - size_t cc = cb + c * C2; - for (size_t tail = 0lu; tail < C2; tail++) { - dst_data[cc + tail] = (src_data[cc + tail] - mean) * variance; - } - }); - } + parallel_for(C, [&](int c) { + size_t cc = cb + c * C2; + auto arg = jit_mvn_call_args(); + arg.src = src_data + cc * src_data_size; + arg.dst = dst_data + cc * dst_data_size; + arg.mean = static_cast(&mean); + arg.variance = static_cast(&variance); + arg.src_stride = src_stride_size; + arg.dst_stride = dst_stride_size; + arg.work_amount = static_cast(C2 / blk_size); // work amount for vector part + arg.oc_off = static_cast(c * sizeof(float)); + (*mvn_kernel)(&arg); + }); } else { // mvn for one instance in batch - if (mvn_kernel) { - parallel_for(C, [&](int c) { - size_t cc = cb + c * C2; - auto arg = jit_mvn_call_args(); - arg.src = src_data + cc; - arg.dst = dst_data + cc; - arg.mean = static_cast(&mean); - arg.src_stride = static_cast(blk_size * sizeof(in_data_t)); - arg.dst_stride = static_cast(blk_size * sizeof(out_data_t)); - arg.work_amount = static_cast(C2 / blk_size); - (*mvn_kernel)(&arg); - - for (size_t tail = tail_across_channels; tail < C2; tail++) { - dst_data[cc + tail] = src_data[cc + tail] - mean; - } - }); - } else { - parallel_for(C, [&](int c) { - size_t cc = cb + c * C2; - for (size_t tail = 0lu; tail < C2; tail++) { - dst_data[cc + tail] = src_data[cc + tail] - mean; - } - }); - } - } - } else { // per channel - float C2inv = 1.f / static_cast(C2); - if (mvn_mean_kernel && mvn_variance_kernel && mvn_kernel) { - parallel_for(C, [&](size_t c) { - // mean for this channel - size_t tail_per_channel = (C2 / blk_size) * blk_size; - float mean = 0.f; + parallel_for(C, [&](int c) { size_t cc = cb + c * C2; - // the same arg for three kernels auto arg = jit_mvn_call_args(); - arg.src = src_data + cc; - arg.dst = dst_data + cc; - arg.sum = static_cast(&mean); - arg.src_stride = static_cast(blk_size * sizeof(in_data_t)); - arg.dst_stride = static_cast(blk_size * sizeof(out_data_t)); + arg.src = src_data + cc * src_data_size; + arg.dst = dst_data + cc * dst_data_size; + arg.mean = static_cast(&mean); + arg.src_stride = src_stride_size; + arg.dst_stride = dst_stride_size; arg.work_amount = static_cast(C2 / blk_size); - (*mvn_mean_kernel)(&arg); - - for (size_t tail = tail_per_channel; tail < C2; tail++) { - mean += src_data[cc + tail]; - } - mean *= C2inv; - + arg.oc_off = static_cast(c * sizeof(float)); + (*mvn_kernel)(&arg); + }); + } + } else { // per channel + float C2inv = 1.f / static_cast(C2); + parallel_for(C, [&](size_t c) { + // mean for this channel + float mean = 0.f; + size_t cc = cb + c * C2; + // the same arg for three kernels + auto arg = jit_mvn_call_args(); + arg.src = src_data + cc * src_data_size; + arg.dst = dst_data + cc * dst_data_size; + arg.sum = static_cast(&mean); + arg.src_stride = src_stride_size; + arg.dst_stride = dst_stride_size; + arg.work_amount = static_cast(C2 / blk_size); + arg.oc_off = static_cast(c * sizeof(float)); + (*mvn_mean_kernel)(&arg); + + mean *= C2inv; + + if (normalize_variance) { // variance for this channel - if (normalize_variance) { - float variance = 0.f; - arg.mean = static_cast(&mean); - arg.variance = static_cast(&variance); - (*mvn_variance_kernel)(&arg); + float variance = 0.f; + arg.mean = static_cast(&mean); + arg.variance = static_cast(&variance); + (*mvn_variance_kernel)(&arg); - for (size_t tail = tail_per_channel; tail < C2; tail++) { - variance += (src_data[cc + tail] - mean) * (src_data[cc + tail] - mean); - } + if (epsMode_ == insideSqrt) variance = 1.f / sqrtf(variance * C2inv + eps); + else if (epsMode_ == outsideSqrt) + variance = 1.f / (sqrtf(variance * C2inv) + eps); - // mvn for this channel - (*mvn_kernel)(&arg); - for (size_t tail = tail_per_channel; tail < C2; tail++) { - dst_data[cc + tail] = (src_data[cc + tail] - mean) * variance; - } - } else { - // mvn for this channel - arg.mean = static_cast(&mean); - (*mvn_kernel)(&arg); + // mvn for this channel + (*mvn_kernel)(&arg); + } else { + // mvn for this channel + arg.mean = static_cast(&mean); + (*mvn_kernel)(&arg); + } + }); + } + } +} - for (size_t tail = tail_per_channel; tail < C2; tail++) { - dst_data[cc + tail] = src_data[cc + tail] - mean; - } +void MKLDNNMVNNode::mvn_ref(const uint8_t* src_data, uint8_t* dst_data, const SizeVector& dims) { + const float *src_data_ptr = reinterpret_cast(src_data); + float *dst_data_ptr = reinterpret_cast(dst_data); + size_t N = 0; size_t C = 0; size_t D = 0; size_t H = 0; size_t W = 0; + std::tie(N, C, D, H, W) = get5dShapes(dims); + + size_t C1 = H * W; + size_t C2 = C1 * D; + size_t C3 = C2 * C; + + for (size_t b = 0lu; b < N; b++) { + size_t cb = b * C3; + if (across_channels) { + // Parallel sum for each channel for mean + float C3inv = 1.f / static_cast(C3); + float mean_temp = 0.0f; + + mean_temp = parallel_sum(C, mean_temp, [&](size_t c)->float { + float mean_internal = 0.0f; + size_t cc = cb + c * C2; + for (size_t sp = 0lu; sp < C2; sp++) { + mean_internal += src_data_ptr[cc + sp]; + } + return mean_internal; + }); + + float mean = mean_temp * C3inv; + + if (normalize_variance) { + // parallel sum for each channel for variance + float variance_temp = 0.0f; + variance_temp = parallel_sum(C, variance_temp, [&](size_t c)->float { + float variance_internal = 0.0f; + size_t cc = cb + c * C2; + for (size_t sp = 0lu; sp < C2; sp++) { + variance_internal += (src_data_ptr[cc + sp] - mean) * (src_data_ptr[cc + sp] - mean); + } + return variance_internal; + }); + + float variance = 1.f; + if (epsMode_ == insideSqrt) + variance = 1.f / sqrtf(variance_temp * C3inv + eps); + else if (epsMode_ == outsideSqrt) + variance = 1.f / (sqrtf(variance_temp * C3inv) + eps); + + parallel_for(C, [&](int c) { + size_t cc = cb + c * C2; + for (size_t sp = 0lu; sp < C2; sp++) { + dst_data_ptr[cc + sp] = (src_data_ptr[cc + sp] - mean) * variance; } }); } else { - parallel_for(C, [&](size_t c) { - // mean for this channel - float mean = 0.f; + parallel_for(C, [&](int c) { size_t cc = cb + c * C2; - for (size_t tail = 0lu; tail < C2; tail++) { - mean += src_data[cc + tail]; + for (size_t sp = 0lu; sp < C2; sp++) { + dst_data_ptr[cc + sp] = src_data_ptr[cc + sp] - mean; } - mean *= C2inv; + }); + } + } else { // per channel + float C2inv = 1.f / static_cast(C2); + parallel_for(C, [&](size_t c) { + // mean for this channel + float mean = 0.f; + size_t cc = cb + c * C2; + for (size_t sp = 0lu; sp < C2; sp++) { + mean += src_data_ptr[cc + sp]; + } + mean *= C2inv; + if (normalize_variance) { // variance for this channel - if (normalize_variance) { - float variance = 0.f; - for (size_t tail = 0lu; tail < C2; tail++) { - variance += (src_data[cc + tail] - mean) * (src_data[cc + tail] - mean); - } + float variance = 0.f; + for (size_t sp = 0lu; sp < C2; sp++) { + variance += (src_data_ptr[cc + sp] - mean) * (src_data_ptr[cc + sp] - mean); + } + + if (epsMode_ == insideSqrt) variance = 1.f / sqrtf(variance * C2inv + eps); + else if (epsMode_ == outsideSqrt) + variance = 1.f / (sqrtf(variance * C2inv) + eps); - // mvn for this channel - for (size_t tail = 0lu; tail < C2; tail++) { - dst_data[cc + tail] = (src_data[cc + tail] - mean) * variance; - } - } else { - // mvn for this channel - for (size_t tail = 0lu; tail < C2; tail++) { - dst_data[cc + tail] = src_data[cc + tail] - mean; - } + // mvn for this channel + for (size_t sp = 0lu; sp < C2; sp++) { + dst_data_ptr[cc + sp] = (src_data_ptr[cc + sp] - mean) * variance; } - }); - } + } else { + // mvn for this channel + for (size_t sp = 0lu; sp < C2; sp++) { + dst_data_ptr[cc + sp] = src_data_ptr[cc + sp] - mean; + } + } + }); } } } -template -void MKLDNNMVNNode::mvn_blk(const in_data_t* src_data, out_data_t* dst_data, const SizeVector& dims) { +void MKLDNNMVNNode::mvn_blk(const uint8_t* src_data, uint8_t* dst_data, const SizeVector& dims) { size_t blk_size = 1; // channel blk for memory layout - size_t ele_in_vmm = 4; if (mayiuse(cpu::x64::avx512_common)) { blk_size = 16; - ele_in_vmm = 16; - } else if (mayiuse(cpu::x64::avx2)) { - blk_size = 8; - ele_in_vmm = 8; } else { blk_size = 8; - ele_in_vmm = 4; } - size_t dims_size = dims.size(); - size_t N = (dims_size > 0) ? dims[0] : 1lu; - size_t C = (dims_size > 1) ? dims[1] : 1lu; - size_t D = (dims_size > 4) ? dims[dims_size - 3] : 1lu; - size_t H = (dims_size > 3) ? dims[dims_size - 2] : 1lu; - size_t W = (dims_size > 2) ? dims[dims_size - 1] : 1lu; + size_t N = 1; size_t C = 1; size_t D = 1; size_t H = 1; size_t W = 1; + std::tie(N, C, D, H, W) = get5dShapes(dims); bool is_nhwc = false; Layout layout = getParentEdgeAt(0)->getDesc().getLayout(); if (layout == NHWC || layout == NDHWC) is_nhwc = true; - size_t CB = is_nhwc ? C / blk_size : div_up(C, blk_size); + size_t CB = div_up(C, blk_size); size_t C0 = is_nhwc ? W * C : W * blk_size; size_t C1 = C0 * H; @@ -1056,392 +1124,251 @@ void MKLDNNMVNNode::mvn_blk(const in_data_t* src_data, out_data_t* dst_data, con size_t C3 = C2 * CB; size_t C5 = C * D * H * W; - size_t threads_num = parallel_get_num_threads(); + size_t threads_num = parallel_get_num_threads(); size_t aux_buffer_size = across_channels ? blk_size : rnd_up(C, blk_size); std::vector mean_buffer(aux_buffer_size * threads_num); std::vector variance_buffer(aux_buffer_size * threads_num); + size_t src_stride_size = is_nhwc ? static_cast(C * src_data_size) : static_cast(blk_size * src_data_size); + size_t dst_stride_size = is_nhwc ? static_cast(C * dst_data_size) : static_cast(blk_size * dst_data_size); + for (size_t b = 0lu; b < N; b++) { - size_t ccb = is_nhwc ? b * C2 : b * C3; + size_t b_offset = is_nhwc ? b * C5 : b * C3; if (across_channels) { // mean for this instance in batch float C5inv = 1.f / static_cast(C5); float mean_temp = 0.0f; mean_temp = parallel_sum3d(CB, D, H, mean_temp, [&](size_t cb, size_t d, size_t h)->float { - size_t ccbd = ccb + cb * C2 + d * C1 + h * C0; - size_t min_cb = (std::min)(blk_size, C - cb * blk_size); + size_t src_offset = is_nhwc ? b_offset + d * C1 + h * C0 + cb * blk_size + : b_offset + cb * C2 + d * C1 + h * C0; float mean_internal = 0.0f; - if ((min_cb == blk_size) && mvn_mean_kernel) { - auto mean_buffer_ptr = &mean_buffer[blk_size * parallel_get_thread_num()]; - for (int i = 0; i < blk_size; i++) - mean_buffer_ptr[i] = 0.f; + ///////////////////////////////// + // W // | + // // | + // // | + //blk + + + + + + // | + + // // | + // // | + // // \|/ + ///////////////////////////////// + auto mean_buffer_ptr = &mean_buffer[blk_size * parallel_get_thread_num()]; + for (int i = 0; i < blk_size; i++) + mean_buffer_ptr[i] = 0.f; + + auto arg = jit_mvn_call_args(); + arg.src = src_data + src_offset * src_data_size; + arg.sum = mean_buffer_ptr; + arg.src_stride = src_stride_size; + arg.work_amount = static_cast(W); + arg.oc_off = static_cast(cb * blk_size * sizeof(float)); // for tail process + (*mvn_mean_kernel)(&arg); // for W * blk - auto arg = jit_mvn_call_args(); - arg.src = src_data + ccbd; - arg.sum = mean_buffer_ptr; - arg.src_stride = static_cast(ele_in_vmm * src_data_size); - arg.work_amount = static_cast((W * blk_size)/ele_in_vmm); - (*mvn_mean_kernel)(&arg); - - for (int i = 0; i < blk_size; i++) - mean_internal += mean_buffer_ptr[i]; - - // no tail here due blk/ele_in_vmm is 1 or 2. - } else { - for (size_t w = 0lu; w < W; w++) { - size_t cw = ccbd + w * blk_size; - for (size_t c = 0lu; c < min_cb; c++) { - mean_internal += src_data[cw + c]; - } - } - } + size_t min_cb = (std::min)(blk_size, C - cb * blk_size); + for (int i = 0; i < min_cb; i++) + mean_internal += mean_buffer_ptr[i]; return mean_internal; }); float mean = mean_temp * C5inv; if (normalize_variance) { - // variance for one instance in batch + // variance: sum((x-mean)*(x-mean)) for one instance in batch float variance_temp = 0.0f; variance_temp = parallel_sum3d(CB, D, H, variance_temp, [&](size_t cb, size_t d, size_t h)->float { - size_t ccbd = ccb + cb * C2 + d * C1 + h * C0; - size_t min_cb = (std::min)(blk_size, C - cb * blk_size); + size_t src_offset = is_nhwc ? b_offset + d * C1 + h * C0 + cb * blk_size + : b_offset + cb * C2 + d * C1 + h * C0; float variance_internal = 0.0f; - if ((blk_size == min_cb) && mvn_variance_kernel) { - auto variance_buffer_ptr = &variance_buffer[blk_size * parallel_get_thread_num()]; - for (int i = 0; i < blk_size; i++) - variance_buffer_ptr[i] = 0.f; + auto variance_buffer_ptr = &variance_buffer[blk_size * parallel_get_thread_num()]; + for (int i = 0; i < blk_size; i++) + variance_buffer_ptr[i] = 0.f; - auto arg = jit_mvn_call_args(); - arg.src = src_data + ccbd; - arg.mean = static_cast(&mean); - arg.variance = variance_buffer_ptr; - arg.src_stride = static_cast(ele_in_vmm * src_data_size); - arg.work_amount = static_cast((W * blk_size)/ele_in_vmm); - (*mvn_variance_kernel)(&arg); + auto arg = jit_mvn_call_args(); + arg.src = src_data + src_offset * src_data_size; + arg.mean = static_cast(&mean); + arg.variance = variance_buffer_ptr; + arg.src_stride = src_stride_size; + arg.work_amount = static_cast(W); + arg.oc_off = cb * blk_size * sizeof(float); + (*mvn_variance_kernel)(&arg); - for (int i = 0; i < blk_size; i++) - variance_internal += variance_buffer_ptr[i]; - } else { - for (size_t w = 0lu; w < W; w++) { - size_t cw = ccbd + w * blk_size; - for (size_t c = 0lu; c < min_cb; c++) { - variance_internal += (src_data[cw + c] - mean) * (src_data[cw + c] - mean); - } - } - } + size_t min_cb = (std::min)(blk_size, C - cb * blk_size); + for (int i = 0; i < min_cb; i++) + variance_internal += variance_buffer_ptr[i]; return variance_internal; }); - float variance = 1.f / sqrtf(variance_temp * C5inv + eps); + float variance = 1.f; + if (epsMode_ == insideSqrt) + variance /= sqrtf(variance_temp * C5inv + eps); + else if (epsMode_ == outsideSqrt) + variance /= sqrtf(variance_temp * C5inv) + eps; // mvn for one instance in batch parallel_for3d(CB, D, H, [&](size_t cb, size_t d, size_t h) { - size_t ccbd = ccb + cb * C2 + d * C1 + h * C0; - size_t min_cb = (std::min)(blk_size, C - cb * blk_size); - if ((blk_size == min_cb) && mvn_kernel) { - auto arg = jit_mvn_call_args(); - arg.src = src_data + ccbd; - arg.dst = dst_data + ccbd; - arg.mean = static_cast(&mean); - arg.variance = static_cast(&variance); - arg.src_stride = static_cast(ele_in_vmm * src_data_size); - arg.dst_stride = static_cast(ele_in_vmm * dst_data_size); - arg.work_amount = static_cast((W * blk_size)/ele_in_vmm); - (*mvn_kernel)(&arg); - } else { - for (size_t w = 0lu; w < W; w++) { - size_t cw = ccbd + w * blk_size; - for (size_t c = 0lu; c < min_cb; c++) { - size_t src_offset = cw + c; - dst_data[src_offset] = (src_data[src_offset] - mean) * variance; - } - } - } + size_t src_offset = is_nhwc ? b_offset + d * C1 + h * C0 + cb * blk_size + : b_offset + cb * C2 + d * C1 + h * C0; + auto arg = jit_mvn_call_args(); + arg.src = src_data + src_offset * src_data_size; + arg.dst = dst_data + src_offset * dst_data_size; + arg.mean = static_cast(&mean); + arg.variance = static_cast(&variance); + arg.src_stride = src_stride_size; + arg.dst_stride = dst_stride_size; + arg.work_amount = static_cast(W); + arg.oc_off = cb * blk_size * sizeof(float); + (*mvn_kernel)(&arg); }); } else { // mvn for one instance in batch parallel_for3d(CB, D, H, [&](size_t cb, size_t d, size_t h) { - size_t ccbd = ccb + cb * C2 + d * C1 + h * C0; - size_t min_cb = (std::min)(blk_size, C - cb * blk_size); - if ((blk_size == min_cb) && mvn_kernel) { - auto arg = jit_mvn_call_args(); - arg.src = src_data + ccbd; - arg.dst = dst_data + ccbd; - arg.mean = static_cast(&mean); - arg.src_stride = static_cast(ele_in_vmm * src_data_size); - arg.dst_stride = static_cast(ele_in_vmm * dst_data_size); - arg.work_amount = static_cast((W * blk_size)/ele_in_vmm); - (*mvn_kernel)(&arg); - } else { - for (size_t w = 0lu; w < W; w++) { - size_t cw = ccbd + w * blk_size; - for (size_t c = 0lu; c < min_cb; c++) { - size_t src_offset = cw + c; - dst_data[src_offset] = src_data[src_offset] - mean; - } - } - } + size_t src_offset = is_nhwc ? b_offset + d * C1 + h * C0 + cb * blk_size + : b_offset + cb * C2 + d * C1 + h * C0; + auto arg = jit_mvn_call_args(); + arg.src = src_data + src_offset * src_data_size; + arg.dst = dst_data + src_offset * dst_data_size; + arg.mean = static_cast(&mean); + arg.src_stride = src_stride_size; + arg.dst_stride = dst_stride_size; + arg.work_amount = static_cast(W); + arg.oc_off = cb * blk_size * sizeof(float); + (*mvn_kernel)(&arg); }); } } else { // for per_channel - size_t tail_cb_end = div_up(static_cast(C), static_cast(blk_size)); - size_t src_stride = is_nhwc ? C : blk_size; - - size_t tail_cb_start = 0; float size_inv = 1.f / static_cast(D * H * W); - if (mvn_mean_kernel) { - tail_cb_start = CB; + for (int i = 0; i < mean_buffer.size(); i++) + mean_buffer[i] = 0.f; - for (int i = 0; i < mean_buffer.size(); i++) - mean_buffer[i] = 0.f; + // one thread for one C*W size(the same H) to get C size result for the same H, added to last group result + // keep the compute order the same as planar + parallel_for2d(D, H, [&](size_t thr_idx, size_t d, size_t h) { + for (size_t cb = 0; cb < CB; cb++) { + size_t src_offset = is_nhwc ? b_offset + d * C1 + h * C0 + cb * blk_size + : b_offset + cb * C2 + d * C1 + h * C0; + auto mean_buffer_ptr = &mean_buffer[blk_size * cb + aux_buffer_size * thr_idx]; + + auto arg = jit_mvn_call_args(); + arg.src = src_data + src_offset * src_data_size; + arg.sum = mean_buffer_ptr; + arg.src_stride = src_stride_size; + arg.work_amount = static_cast(W); + arg.oc_off = cb * blk_size * sizeof(float); + (*mvn_mean_kernel)(&arg); + } + }); + + for (size_t i = 1; i < threads_num; i++) { + for (size_t c = 0; c < C; c++) + mean_buffer[c] += mean_buffer[c + aux_buffer_size * i]; + } + for (size_t c = 0; c < C; c++) + mean_buffer[c] *= size_inv; + + if (normalize_variance) { + for (int i = 0; i < variance_buffer.size(); i++) + variance_buffer[i] = 0.f; parallel_for2d(D, H, [&](size_t thr_idx, size_t d, size_t h) { for (size_t cb = 0; cb < CB; cb++) { - size_t src_off = is_nhwc ? ccb + d * H * W * C + h * W * C + cb * blk_size - : ccb + d * H * W * blk_size + h * W * blk_size + cb * D * H * W * blk_size; - auto mean_buffer_ptr = &mean_buffer[blk_size * cb + aux_buffer_size * thr_idx]; + size_t src_offset = is_nhwc ? b_offset + d * C1 + h * C0 + cb * blk_size + : b_offset + cb * C2 + d * C1 + h * C0; + auto mean_buffer_ptr = &mean_buffer[blk_size * cb]; + auto variance_buffer_ptr = &variance_buffer[blk_size * cb + aux_buffer_size * thr_idx]; auto arg = jit_mvn_call_args(); - arg.src = src_data + src_off; - arg.sum = mean_buffer_ptr; - arg.src_stride = src_stride * src_data_size; + arg.src = src_data + src_offset * src_data_size; + arg.mean = mean_buffer_ptr; + arg.variance = variance_buffer_ptr; + arg.src_stride = src_stride_size; arg.work_amount = static_cast(W); - (*mvn_mean_kernel)(&arg); + arg.oc_off = cb * blk_size * sizeof(float); + (*mvn_variance_kernel)(&arg); } }); - for (size_t i = 1; i < threads_num; i++) { for (size_t c = 0; c < C; c++) - mean_buffer[c] += mean_buffer[c + aux_buffer_size * i]; + variance_buffer[c] += variance_buffer[c + aux_buffer_size * i]; } - for (size_t c = 0; c < C; c++) - mean_buffer[c] *= size_inv; - } - - for (size_t cb = tail_cb_start; cb < tail_cb_end; cb++) { - size_t src_off = is_nhwc ? ccb + cb * blk_size : ccb + cb * C2; - size_t min_cb = (std::min)(blk_size, C - cb * blk_size); - auto mean_buffer_ptr = &mean_buffer[blk_size * cb]; - - for (size_t c = 0lu; c < min_cb; c++) { - size_t cc = src_off + c; - - mean_buffer_ptr[c] = 0.0f; - for (size_t d = 0; d < D; d++) { - size_t cd = cc + d * C1; - for (size_t h = 0; h < H; h++) { - size_t ch = cd + h * C0; - for (size_t w = 0; w < W; w++) { - mean_buffer_ptr[c] += src_data[ch + w * src_stride]; - } - } - } - mean_buffer_ptr[c] *= size_inv; - } - } - - if (normalize_variance) { - tail_cb_start = 0; - if (mvn_variance_kernel) { - tail_cb_start = CB; - - for (int i = 0; i < variance_buffer.size(); i++) - variance_buffer[i] = 0.f; - - parallel_for2d(D, H, [&](size_t thr_idx, size_t d, size_t h) { - for (size_t cb = 0; cb < CB; cb++) { - size_t src_off = is_nhwc ? ccb + d * H * W * C + h * W * C + cb * blk_size - : ccb + d * H * W * blk_size + h * W * blk_size + cb * D * H * W * blk_size; - auto mean_buffer_ptr = &mean_buffer[blk_size * cb]; - auto variance_buffer_ptr = &variance_buffer[blk_size * cb + aux_buffer_size * thr_idx]; - - auto arg = jit_mvn_call_args(); - arg.src = src_data + src_off; - arg.mean = mean_buffer_ptr; - arg.variance = variance_buffer_ptr; - arg.src_stride = src_stride * src_data_size; - arg.work_amount = static_cast(W); - (*mvn_variance_kernel)(&arg); - } - }); - - for (size_t i = 1; i < threads_num; i++) { - for (size_t c = 0; c < C; c++) - variance_buffer[c] += variance_buffer[c + aux_buffer_size * i]; - } - for (size_t c = 0; c < C; c++) + for (size_t c = 0; c < C; c++) { + if (epsMode_ == insideSqrt) variance_buffer[c] = 1.f / sqrtf(variance_buffer[c] * size_inv + eps); + else if (epsMode_ == outsideSqrt) + variance_buffer[c] = 1.f / (sqrtf(variance_buffer[c] * size_inv) + eps); } - for (size_t cb = tail_cb_start; cb < tail_cb_end; cb++) { - size_t src_off = is_nhwc ? ccb + cb * blk_size : ccb + cb * C2; - size_t min_cb = (std::min)(blk_size, C - cb * blk_size); - auto mean_buffer_ptr = &mean_buffer[blk_size * cb]; - auto variance_buffer_ptr = &variance_buffer[blk_size * cb]; - - for (size_t c = 0lu; c < min_cb; c++) { - size_t cc = src_off + c; - - variance_buffer_ptr[c] = 0.0f; - for (size_t d = 0lu; d < D; d++) { - size_t cd = cc + d * C1; - for (size_t h = 0lu; h < H; h++) { - size_t ch = cd + h * C0; - for (size_t w = 0lu; w < W; w++) { - variance_buffer_ptr[c] += - (src_data[ch + w * src_stride] - mean_buffer_ptr[c]) * - (src_data[ch + w * src_stride] - mean_buffer_ptr[c]); - } - } - } - variance_buffer_ptr[c] = 1.f / sqrtf(variance_buffer_ptr[c] * size_inv + eps); - } - } - - tail_cb_start = 0; - if (mvn_kernel) { - tail_cb_start = CB; - - parallel_for2d(D, H, [&](size_t d, size_t h) { - for (size_t cb = 0; cb < CB; cb++) { - size_t src_off = is_nhwc ? ccb + d * H * W * C + h * W * C + cb * blk_size - : ccb + d * H * W * blk_size + h * W * blk_size + cb * D * H * W * blk_size; - auto mean_buffer_ptr = &mean_buffer[blk_size * cb]; - auto variance_buffer_ptr = &variance_buffer[blk_size * cb]; - - auto arg = jit_mvn_call_args(); - arg.src = src_data + src_off; - arg.dst = dst_data + src_off; - arg.mean = mean_buffer_ptr; - arg.variance = variance_buffer_ptr; - arg.src_stride = src_stride * src_data_size; - arg.dst_stride = src_stride * dst_data_size; - arg.work_amount = static_cast(W); - arg.oc_off = cb * blk_size * sizeof(float); - (*mvn_kernel)(&arg); - } - }); - } + parallel_for2d(D, H, [&](size_t d, size_t h) { + for (size_t cb = 0; cb < CB; cb++) { + size_t src_offset = is_nhwc ? b_offset + d * C1 + h * C0 + cb * blk_size + : b_offset + cb * C2 + d * C1 + h * C0; + auto mean_buffer_ptr = &mean_buffer[blk_size * cb]; + auto variance_buffer_ptr = &variance_buffer[blk_size * cb]; - for (size_t cb = tail_cb_start; cb < tail_cb_end; cb++) { - size_t src_off = is_nhwc ? ccb + cb * blk_size : ccb + cb * C2; - size_t min_cb = (std::min)(blk_size, C - cb * blk_size); - auto mean_buffer_ptr = &mean_buffer[blk_size * cb]; - auto variance_buffer_ptr = &variance_buffer[blk_size * cb]; - - for (size_t c = 0lu; c < min_cb; c++) { - size_t cc = src_off + c; - - for (size_t d = 0lu; d < D; d++) { - size_t cd = cc + d * C1; - for (size_t h = 0lu; h < H; h++) { - size_t ch = cd + h * C0; - for (size_t w = 0lu; w < W; w++) { - float dst_value = (src_data[ch + w * src_stride] - mean_buffer_ptr[c]) * variance_buffer_ptr[c]; - if (!fusedWith.empty()) { - const auto &p = (*attr.get()).post_ops_; - for (int i = 0; i < p.len(); i++) { - auto &post_op = p.entry_[i]; - if (post_op.is_eltwise()) { - // only eltwise_relu supported - if (dst_value < 0) dst_value = 0; - } else if (post_op.is_depthwise()) { - // only ScaleShift supported - float scale = post_op.depthwise.weights_data[cb * blk_size + c]; - float shift = post_op.depthwise.biases_data[cb * blk_size + c]; - dst_value = dst_value * scale + shift; - } else if (post_op.is_quantization()) { - bool do_dequantization = post_op.quantization.alg == - alg_kind::quantization_quantize_dequantize; - bool do_rounding = do_dequantization || isFloatCompatible(output_prec) || - i != p.len() - 1; - - auto quant = post_op.quantization; - float crl = quant.crop_low_data->shifts_[quant.crop_low_data->count_ == 1 ? 0 : cb * blk_size + c]; - float crh = quant.crop_high_data->shifts_[quant.crop_high_data->count_ == 1 ? 0 : cb * blk_size + c]; - float isc = quant.input_scale_data->scales_[quant.input_scale_data->count_ == 1 ? 0 : cb * blk_size + c]; - float ish = quant.input_shift_data->shifts_[quant.input_shift_data->count_ == 1 ? 0 : cb * blk_size + c]; - float osc = quant.output_scale_data->scales_[quant.output_scale_data->count_ == 1 ? 0 : cb * blk_size + c]; - float osh = quant.output_shift_data->shifts_[quant.output_shift_data->count_ == 1 ? 0 : cb * blk_size + c]; - - dst_value = nstl::min(crh, nstl::max(crl, dst_value)); - dst_value = dst_value * isc + ish; - - if (do_rounding) { - dst_value = roundf(dst_value); - } - - if (do_dequantization) { - dst_value = dst_value * osc + osh; - } - } - } - } - if (isFloatCompatible(output_prec)) { - dst_data[ch + w * src_stride] = dst_value; - } else if (output_prec == Precision::U8) { - dst_data[ch + w * src_stride] = (dst_value >= 0) ? lroundf(dst_value) : 0; - } else if (output_prec == Precision::I8) { - dst_data[ch + w * src_stride] = lroundf(dst_value); - } - } - } - } + auto arg = jit_mvn_call_args(); + arg.src = src_data + src_offset * src_data_size; + arg.dst = dst_data + src_offset * dst_data_size; + arg.mean = mean_buffer_ptr; + arg.variance = variance_buffer_ptr; + arg.src_stride = src_stride_size; + arg.dst_stride = dst_stride_size; + arg.work_amount = static_cast(W); + arg.oc_off = cb * blk_size * sizeof(float); + (*mvn_kernel)(&arg); } - } + }); } else { - tail_cb_start = 0; - if (mvn_kernel) { - tail_cb_start = CB; - - parallel_for2d(D, H, [&](size_t d, size_t h) { - for (size_t cb = 0; cb < CB; cb++) { - size_t src_off = is_nhwc ? ccb + d * H * W * C + h * W * C + cb * blk_size - : ccb + d * H * W * blk_size + h * W * blk_size + cb * D * H * W * blk_size; - auto mean_buffer_ptr = &mean_buffer[blk_size * cb]; - - auto arg = jit_mvn_call_args(); - arg.src = src_data + src_off; - arg.dst = dst_data + src_off; - arg.mean = mean_buffer_ptr; - arg.src_stride = src_stride * src_data_size; - arg.dst_stride = src_stride * dst_data_size; - arg.work_amount = static_cast(W); - (*mvn_kernel)(&arg); - } - }); - } + // normalize_variance == false + parallel_for2d(D, H, [&](size_t d, size_t h) { + for (size_t cb = 0; cb < CB; cb++) { + size_t src_offset = is_nhwc ? b_offset + d * C1 + h * C0 + cb * blk_size + : b_offset + cb * C2 + d * C1 + h * C0; + auto mean_buffer_ptr = &mean_buffer[blk_size * cb]; - for (size_t cb = tail_cb_start; cb < tail_cb_end; cb++) { - size_t src_off = is_nhwc ? ccb + cb * blk_size : ccb + cb * C2; - size_t min_cb = (std::min)(blk_size, C - cb * blk_size); - auto mean_buffer_ptr = &mean_buffer[blk_size * cb]; - - for (size_t c = 0lu; c < min_cb; c++) { - size_t cc = src_off + c; - - for (size_t d = 0lu; d < D; d++) { - size_t cd = cc + d * C1; - for (size_t h = 0lu; h < H; h++) { - size_t ch = cd + h * C0; - for (size_t w = 0lu; w < W; w++) { - float dst_value = src_data[ch + w * src_stride] - mean_buffer_ptr[c]; - if (isFloatCompatible(output_prec)) { - dst_data[ch + w * src_stride] = dst_value; - } else if (output_prec == Precision::U8) { - dst_data[ch + w * src_stride] = (dst_value >= 0) ? lroundf(dst_value) : 0; - } else if (output_prec == Precision::I8) { - dst_data[ch + w * src_stride] = lroundf(dst_value); - } - } - } - } + auto arg = jit_mvn_call_args(); + arg.src = src_data + src_offset * src_data_size; + arg.dst = dst_data + src_offset * dst_data_size; + arg.mean = mean_buffer_ptr; + arg.src_stride = src_stride_size; + arg.dst_stride = dst_stride_size; + arg.work_amount = static_cast(W); + arg.oc_off = cb * blk_size * sizeof(float); + (*mvn_kernel)(&arg); } - } + }); + } + } + } +} + +// Validates MVN node axes to check whether it can be executed on the current CPU implementation. +// Supported cases: +// 1D: axes: [0] +// 2D: axes: [1] +// 3D: axes: [1,2], [2] +// 4D: axes: [1,2,3], [2,3] +// 5D: axes: [1,2,3,4], [2,3,4] +bool MKLDNNMVNNode::checkAxesSuitability(const std::shared_ptr& node) { + const auto mvn = std::dynamic_pointer_cast(node); + if (mvn != nullptr && node->get_input_size() == 2) { + if (auto axesNode = dynamic_cast(mvn->get_input_node_ptr(1))) { + auto axesVal = axesNode->cast_vector(); + auto& mvnShape = mvn->get_output_shape(0); + if (mvnShape.size() == 1) { + if (axesVal.size() == 1 && axesVal[0] == 0) + return true; + else + return false; + } + if (mvnShape.size() > 5 || (mvnShape.size() != axesVal.size() + 1 && mvnShape.size() != axesVal.size() + 2)) + return false; + int value = mvnShape.size() - 1; + for (int i = axesVal.size() - 1; i >= 0; i--, value--) { + if (axesVal[i] != value) + return false; } + return true; } } + return false; } bool MKLDNNMVNNode::created() const { diff --git a/inference-engine/src/mkldnn_plugin/nodes/mkldnn_mvn_node.h b/inference-engine/src/mkldnn_plugin/nodes/mkldnn_mvn_node.h index c5ed81a621df03..c114ba458c20fd 100644 --- a/inference-engine/src/mkldnn_plugin/nodes/mkldnn_mvn_node.h +++ b/inference-engine/src/mkldnn_plugin/nodes/mkldnn_mvn_node.h @@ -17,10 +17,11 @@ struct jit_mvn_config_params { bool planar_layout; bool across_channels; bool normalize_variance; - mkldnn::memory::data_type src_dt; - mkldnn::memory::data_type dst_dt; + InferenceEngine::Precision src_prc; + InferenceEngine::Precision dst_prc; int src_data_size; int dst_data_size; + int C, D, H, W; }; struct jit_mvn_call_args { @@ -84,12 +85,14 @@ class MKLDNNMVNNode : public MKLDNNNode { return false; } + static bool checkAxesSuitability(const std::shared_ptr&); + private: - template - void mvn_pln(const in_data_t* src_data, out_data_t* dst_data, const InferenceEngine::SizeVector& dims); + void mvn_pln(const uint8_t *src_data, uint8_t *dst_data, const InferenceEngine::SizeVector &dims); + + void mvn_blk(const uint8_t *src_data, uint8_t *dst_data, const InferenceEngine::SizeVector &dims); - template - void mvn_blk(const in_data_t* src_data, out_data_t* dst_data, const InferenceEngine::SizeVector& dims); + void mvn_ref(const uint8_t *src_data, uint8_t *dst_data, const InferenceEngine::SizeVector &dims); void setPostOps(mkldnn::primitive_attr &attr, bool initWeights = false); @@ -98,6 +101,12 @@ class MKLDNNMVNNode : public MKLDNNNode { bool across_channels = false; bool normalize_variance = true; float eps = 1e-9f; + // Defines way to add epsilon: inside sqrt or outside. + enum epsType { + insideSqrt, + outsideSqrt + }; + epsType epsMode_; InferenceEngine::Precision input_prec, output_prec; size_t src_data_size, dst_data_size; diff --git a/inference-engine/src/mkldnn_plugin/nodes/mkldnn_normalize_node.cpp b/inference-engine/src/mkldnn_plugin/nodes/mkldnn_normalize_node.cpp index 94eb01e2e0fb59..66132ae81bc794 100644 --- a/inference-engine/src/mkldnn_plugin/nodes/mkldnn_normalize_node.cpp +++ b/inference-engine/src/mkldnn_plugin/nodes/mkldnn_normalize_node.cpp @@ -1,4 +1,4 @@ -// Copyright (C) 2018-2020 Intel Corporation +// Copyright (C) 2018-2021 Intel Corporation // SPDX-License-Identifier: Apache-2.0 // @@ -1398,8 +1398,9 @@ void MKLDNNNormalizeNode::normalize_blk(const in_data_t* src_data, out_data_t* d (*normalize_kernel)(&arg); }); } else { + std::vector fused_weight_modulo(weights_padding.size(), 0); for (size_t c = 0; c < C; c++) { - weights_padding[c] = weights_padding[c] * modulo_inv; + fused_weight_modulo[c] = weights_padding[c] * modulo_inv; } parallel_for2d(CB, H, [&](size_t cb, size_t h) { const in_data_t *src_data_b_cb_h = src_data_b + cb * H * W * blk_size + h * W * blk_size; @@ -1407,7 +1408,7 @@ void MKLDNNNormalizeNode::normalize_blk(const in_data_t* src_data, out_data_t* d auto arg = jit_normalize_call_args(); arg.src = src_data_b_cb_h; arg.dst = dst_data_b_cb_h; - arg.fused_factor = static_cast(&weights_padding[cb * blk_size]); // load once + arg.fused_factor = static_cast(&fused_weight_modulo[cb * blk_size]); // load once arg.work_amount = static_cast(W); arg.oc_off = cb * blk_size * sizeof(float); (*normalize_kernel)(&arg); diff --git a/inference-engine/src/mkldnn_plugin/nodes/mkldnn_quantize_node.cpp b/inference-engine/src/mkldnn_plugin/nodes/mkldnn_quantize_node.cpp index 8725b38adb0be5..3215bfc8749e13 100644 --- a/inference-engine/src/mkldnn_plugin/nodes/mkldnn_quantize_node.cpp +++ b/inference-engine/src/mkldnn_plugin/nodes/mkldnn_quantize_node.cpp @@ -18,6 +18,8 @@ #include #include "ie_parallel.hpp" +#include + // Quantization ranges validation is switched off by default in order to avoid regressions on user side // #define VALIDATE_QUANTIZATION_RANGES @@ -1029,7 +1031,7 @@ void MKLDNNQuantizeNode::init() { float ih = inputHighData[isInputHighBroadcasted ? 0 : i]; #if defined(VALIDATE_QUANTIZATION_RANGES) - if ((il == ih && levels != 2) || std::isnan(il) || std::isnan(ih) || std::isinf(il) || std::isinf(ih)) { + if ((il == ih && levels != 2) || il > ih || std::isnan(il) || std::isnan(ih) || std::isinf(il) || std::isinf(ih)) { THROW_IE_EXCEPTION << "Quantize layer with name '" << getName() << "' has invalid input quantize ranges: " << "inputLow = " << il << ", inputHigh = " << ih; } @@ -1578,6 +1580,33 @@ void MKLDNNQuantizeNode::appendPostOps(mkldnn::post_ops& ops) { isPostOpDataInitialized = true; } +bool MKLDNNQuantizeNode::isNeedToDecompose(const std::shared_ptr& node) { + if (const auto fq = std::dynamic_pointer_cast(node)) { + for (size_t i = 0; i < fq->get_input_size(); i++) { + if (fq->get_input_shape(i).size() > 5) + return true; + } + + for (size_t i = 1; i < fq->get_input_size(); i++) { + size_t count_not_unit_axis = 0; + auto shape = fq->get_input_shape(i); + + if (ngraph::shape_size(shape) != 1) { + size_t not_unit_axis = 0; + for (size_t i = 0; i < shape.size(); i++) { + if (shape[i] > 1) { + not_unit_axis = i; + count_not_unit_axis++; + } + } + if (count_not_unit_axis > 1 || not_unit_axis > 1) + return true; + } + } + } + return false; +} + bool MKLDNNQuantizeNode::created() const { return getType() == Quantize; } diff --git a/inference-engine/src/mkldnn_plugin/nodes/mkldnn_quantize_node.h b/inference-engine/src/mkldnn_plugin/nodes/mkldnn_quantize_node.h index e3fff7f72ff20c..234fd103d8ae56 100644 --- a/inference-engine/src/mkldnn_plugin/nodes/mkldnn_quantize_node.h +++ b/inference-engine/src/mkldnn_plugin/nodes/mkldnn_quantize_node.h @@ -1,4 +1,4 @@ -// Copyright (C) 2018-2020 Intel Corporation +// Copyright (C) 2018-2021 Intel Corporation // SPDX-License-Identifier: Apache-2.0 // @@ -113,6 +113,8 @@ class MKLDNNQuantizeNode : public MKLDNNNode { void appendPostOps(mkldnn::post_ops& ops) override; + static bool isNeedToDecompose(const std::shared_ptr& node); + private: void init() override; std::vector getDataFormats() const; diff --git a/inference-engine/src/mkldnn_plugin/nodes/mkldnn_roi_pooling_node.cpp b/inference-engine/src/mkldnn_plugin/nodes/mkldnn_roi_pooling_node.cpp index 00809fc42d792b..91e99f6d3fd97d 100644 --- a/inference-engine/src/mkldnn_plugin/nodes/mkldnn_roi_pooling_node.cpp +++ b/inference-engine/src/mkldnn_plugin/nodes/mkldnn_roi_pooling_node.cpp @@ -491,11 +491,13 @@ void MKLDNNROIPoolingNode::execute(mkldnn::stream strm) { float roi_end_w_ = src_roi_ptr[3]; float roi_end_h_ = src_roi_ptr[4]; - float height_scale = ((roi_end_h_ - roi_start_h_) * (jpp.ih - 1)) / (jpp.pooled_h - 1); - float width_scale = ((roi_end_w_ - roi_start_w_) * (jpp.iw - 1)) / (jpp.pooled_w - 1); + float height_scale = (jpp.pooled_h > 1 ? ((roi_end_h_ - roi_start_h_) * (jpp.ih - 1)) / (jpp.pooled_h - 1) : 0); + float width_scale = (jpp.pooled_w > 1 ? ((roi_end_w_ - roi_start_w_) * (jpp.iw - 1)) / (jpp.pooled_w - 1) : 0); - float in_y = (oh * height_scale + roi_start_h_ * (jpp.ih - 1)); - float in_x = (ow * width_scale + roi_start_w_ * (jpp.iw - 1)); + float in_y = (jpp.pooled_h > 1 ? (oh * height_scale + roi_start_h_ * (jpp.ih - 1)) : + 0.5 * (roi_start_h_ + roi_end_h_) * (jpp.ih - 1)); + float in_x = (jpp.pooled_w > 1 ? (ow * width_scale + roi_start_w_ * (jpp.iw - 1)) : + 0.5 * (roi_start_w_ + roi_end_w_) * (jpp.iw - 1)); if (in_y < 0 || in_y > jpp.ih - 1 || in_x < 0 || in_x > jpp.iw - 1) { if (roi_pooling_kernel) { diff --git a/inference-engine/src/mkldnn_plugin/nodes/mkldnn_scatter_update_node.cpp b/inference-engine/src/mkldnn_plugin/nodes/mkldnn_scatter_update_node.cpp index 0c9a79324ce862..df711ca58e8861 100644 --- a/inference-engine/src/mkldnn_plugin/nodes/mkldnn_scatter_update_node.cpp +++ b/inference-engine/src/mkldnn_plugin/nodes/mkldnn_scatter_update_node.cpp @@ -167,7 +167,8 @@ void MKLDNNScatterUpdateNode::initSupportedPrimitiveDescriptors() { auto dataType = MKLDNNExtensionUtils::IEPrecisionToDataType(dataPrec); dataSize = MKLDNNExtensionUtils::sizeOfDataType(dataType); - bool canBeInplace = getParentEdgeAt(DATA_ID)->getParent()->getChildEdges().size() == 1; + bool canBeInplace = getParentEdgeAt(DATA_ID)->getParent()->getChildEdges().size() == 1 && + !getParentEdgeAt(DATA_ID)->getParent()->isConstant(); InferenceEngine::LayerConfig config; config.dynBatchSupport = false; diff --git a/inference-engine/src/mkldnn_plugin/nodes/mkldnn_split_node.cpp b/inference-engine/src/mkldnn_plugin/nodes/mkldnn_split_node.cpp index 083f9bb94a9e08..ad7426d1dae45e 100644 --- a/inference-engine/src/mkldnn_plugin/nodes/mkldnn_split_node.cpp +++ b/inference-engine/src/mkldnn_plugin/nodes/mkldnn_split_node.cpp @@ -1,4 +1,4 @@ -// Copyright (C) 2018-2020 Intel Corporation +// Copyright (C) 2018-2021 Intel Corporation // SPDX-License-Identifier: Apache-2.0 // @@ -6,6 +6,7 @@ #include "common/cpu_memcpy.h" #include #include +#include #include #include #include @@ -80,7 +81,7 @@ void MKLDNNSplitNode::getSupportedDescriptors() { void MKLDNNSplitNode::initSupportedPrimitiveDescriptors() { using TensorDescFactory = std::function; constexpr size_t channelsPos = 1lu; - // perform guard checks + if (!supportedPrimitiveDescriptors.empty()) return; @@ -218,6 +219,16 @@ void MKLDNNSplitNode::initSupportedPrimitiveDescriptors() { } supportedPrimitiveDescriptors.emplace_back(config, impl_desc_type::unknown, outFormats); } + + // Special nspc -> ncsp case when splitting channels + if (axis == 1 && (dstFirstDims.ndims() == 4 || dstFirstDims.ndims() == 5)) { + auto plain = makePdInfo(&makePlainTensorDesc, inpPrecision, srcDims, outDims, impl_desc_type::ref); + auto perChannel = makePdInfo(&makePerChannelTensorDesc, inpPrecision, srcDims, outDims, impl_desc_type::ref); + + plain.getConfig().inConfs[0].desc = perChannel.getConfig().inConfs[0].desc; + + supportedPrimitiveDescriptors.push_back(plain); + } } void MKLDNNSplitNode::createPrimitive() { @@ -231,23 +242,49 @@ void MKLDNNSplitNode::createPrimitive() { if (getSelectedPrimitiveDescriptor() == nullptr) THROW_ERROR << "Preferable primitive descriptor is not set."; - if (!isOptimized()) - prepareOptimizedParams(); + canUseOptimizedNspc2Ncsp = true; + if (axis != 1) + canUseOptimizedNspc2Ncsp = false; + + if (getParentEdgeAt(0)->getBlob()->getTensorDesc().getLayout() != NHWC && + getParentEdgeAt(0)->getBlob()->getTensorDesc().getLayout() != NDHWC) + canUseOptimizedNspc2Ncsp = false; + + for (size_t i = 0; i < getChildEdges().size(); i++) { + if (getChildEdgeAt(i)->getBlob()->getTensorDesc().getLayout() != NCHW && + getChildEdgeAt(i)->getBlob()->getTensorDesc().getLayout() != NCDHW) + canUseOptimizedNspc2Ncsp = false; + } + + if (!isOptimized()) { + initializeDstMemPtrs(); + if (!canUseOptimizedNspc2Ncsp) + prepareOptimizedParams(); + } } void MKLDNNSplitNode::execute(mkldnn::stream strm) { if (isOptimized()) return; + if (dstMemPtrs.empty()) + THROW_ERROR << "Output data pointers have not been initialized."; + int MB = batchToProcess(); + + if (canUseOptimizedNspc2Ncsp) { + optimizedNspc2Ncsp(MB); + return; + } + uint8_t* srcData = reinterpret_cast(this->getParentEdgeAt(0)->getMemoryPtr()->GetPtr()); size_t batch = this->getParentEdgeAt(0)->getDims()[0]; if (batch != MB) optimizedParams.countStrides = optimizedParams.countStrides / batch * MB; - parallel_for2d(this->getChildEdges().size(), optimizedParams.countStrides, [&](size_t i, size_t j) { - uint8_t* dstData = optimizedParams.dstMemPtrs[i]; + parallel_for2d(dstMemPtrs.size(), optimizedParams.countStrides, [&](size_t i, size_t j) { + uint8_t* dstData = dstMemPtrs[i]; cpu_memcpy(&dstData[j * optimizedParams.dataSize[i]], &srcData[optimizedParams.srcDataOffsets[i] + j * optimizedParams.srcDataStride], @@ -346,7 +383,7 @@ void MKLDNNSplitNode::selectOptimalPrimitiveDescriptor() { inNum = 0; } if (MKLDNNExtensionUtils::initTensorsAreEqual( - getSupportedPrimitiveDescriptors()[i].getConfig().inConfs[0].desc, + supportedPrimitiveDescriptors[i].getConfig().inConfs[0].desc, parent_spd->getConfig().outConfs[inNum].desc)) { canSelectPrimitive.push_back(i); } @@ -364,6 +401,46 @@ void MKLDNNSplitNode::selectOptimalPrimitiveDescriptor() { } } + // if there are no inPlace, but more than one suitable configurations, select the one that matches the output layout + for (auto indx : canSelectPrimitive) { + bool outputDescFullMatch = true; + for (size_t i = 0; i < getChildEdges().size(); ++i) { + auto childEdge = getChildEdgeAt(i); + auto childPtr = childEdge->getChild(); + auto& vecChildSpd = childPtr->getSupportedPrimitiveDescriptors(); + const auto& outputDesc = supportedPrimitiveDescriptors[indx].getConfig().outConfs[i].desc; + + if (!vecChildSpd.empty()) { + int inNum = childEdge->getOutputNum(); + if (inNum < 0) { + inNum = 0; + } + bool hasMatchDesc = false; + for (auto& childSpd : vecChildSpd) { + if (inNum >= childSpd.getConfig().inConfs.size()) { + inNum = 0; + } + if (MKLDNNExtensionUtils::initTensorsAreEqual(outputDesc, childSpd.getConfig().inConfs[inNum].desc)) { + hasMatchDesc = true; + break; + } + } + if (!hasMatchDesc) { + outputDescFullMatch = false; + break; + } + } + } + if (outputDescFullMatch) { + selectPrimitiveDescriptorByIndex(static_cast(indx)); + return; + } + } + if (!canSelectPrimitive.empty()) { + selectPrimitiveDescriptorByIndex(static_cast(canSelectPrimitive.front())); + return; + } + // if there are no matching data layouts, select first optimized implementation for (size_t i = 0; i < supportedPrimitiveDescriptors.size(); i++) { if (supportedPrimitiveDescriptors[i].getImplementationType() == impl_desc_type::unknown) { @@ -384,50 +461,118 @@ void MKLDNNSplitNode::setDynamicBatchLim(int lim) { void MKLDNNSplitNode::prepareOptimizedParams() { const auto& inpTensorDesc = this->getSelectedPrimitiveDescriptor()->getConfig().inConfs[0].desc; + const auto outputPortsCount = outDims.size(); //find axis order position const auto& order = inpTensorDesc.getBlockingDesc().getOrder(); - unsigned axisOrderPos = UINT_MAX; + unsigned axisOrderPos = std::numeric_limits::max(); for (size_t i = 0; i < order.size(); ++i) { if (order[i] == axis) { axisOrderPos = i; break; } } - if (UINT_MAX == axisOrderPos) { + if (std::numeric_limits::max() == axisOrderPos) { THROW_ERROR << "Can't find the axis in the input tensor order list"; } uint8_t srcDataSize = inpTensorDesc.getPrecision().size(); const auto& srcDims = inpTensorDesc.getBlockingDesc().getBlockDims(); - int nDims = srcDims.size(); + const auto nDims = srcDims.size(); optimizedParams.countStrides = 1; for (int i = 0; i < axisOrderPos; i++) optimizedParams.countStrides *= srcDims[i]; optimizedParams.srcDataStride = 0; - optimizedParams.dataSize.resize(this->getChildEdges().size()); - optimizedParams.dstMemPtrs.clear(); - for (int i = 0; i < this->getChildEdges().size(); i++) { - if (uint8_t* dstData = reinterpret_cast(this->getChildEdgeAt(i)->getMemoryPtr()->GetPtr())) { - optimizedParams.dstMemPtrs.push_back(dstData); - } else { - THROW_ERROR << "can't get child edge indx " << i << "data."; - } + optimizedParams.dataSize.resize(outputPortsCount); + for (size_t i = 0; i < outputPortsCount; i++) { + auto outputEdge = this->getChildEdgesAtPort(i).front(); optimizedParams.dataSize[i] = srcDataSize; - for (int j = axisOrderPos; j < nDims; j++) - optimizedParams.dataSize[i] *= this->getChildEdgeAt(i)->getDesc().getBlockingDesc().getBlockDims()[j]; + for (size_t j = axisOrderPos; j < nDims; j++) + optimizedParams.dataSize[i] *= outputEdge->getDesc().getBlockingDesc().getBlockDims()[j]; optimizedParams.srcDataStride += optimizedParams.dataSize[i]; } - optimizedParams.srcDataOffsets.resize(this->getChildEdges().size()); + optimizedParams.srcDataOffsets.resize(outputPortsCount); optimizedParams.srcDataOffsets[0] = 0; - for (int i = 1; i < this->getChildEdges().size(); i++) { + for (size_t i = 1; i < outputPortsCount; i++) { optimizedParams.srcDataOffsets[i] = optimizedParams.srcDataOffsets[i - 1] + optimizedParams.dataSize[i - 1]; } } +void MKLDNNSplitNode::optimizedNspc2Ncsp(size_t MB) { + auto parentEdge = getParentEdgeAt(0); + const int ndims = parentEdge->getDims().ndims(); + const size_t IC = parentEdge->getDims()[1]; + const size_t D = ndims == 5 ? parentEdge->getDims()[ndims - 3] : 1; + const size_t H = parentEdge->getDims()[ndims - 2]; + const size_t W = parentEdge->getDims()[ndims - 1]; + + auto srcBlob = parentEdge->getBlob(); + auto srcData = srcBlob->cbuffer().as(); + const auto dataSize = srcBlob->getTensorDesc().getPrecision().size(); + + const size_t DHW = D*H*W; + const size_t strideIB = DHW * IC * dataSize; + const size_t strideIW = IC*dataSize; + const size_t strideOC = DHW * dataSize; + + for (size_t i = 0, sIdx = 0; i < outDims.size(); i++) { + auto dstData = dstMemPtrs[i]; + + size_t innerSize = 1; + auto dims = outDims[i].ToSizeVector(); + + for (size_t j = axis; j < dims.size(); j++) { + innerSize *= dims[j]; + } + auto srcPtr = srcData + srcBlob->getTensorDesc().offset(sIdx) * dataSize; + + const size_t OC = dims[1]; + const size_t strideOB = OC * strideOC; + + parallel_for2d(MB, DHW, [&](size_t b, size_t j) { + auto localSrcPtr = srcPtr + b*strideIB + j*strideIW; + auto localDstPtr = dstData + b*strideOB + j*dataSize; + for (size_t c = 0; c < OC; c++) { + cpu_memcpy(localDstPtr, localSrcPtr, dataSize); + localSrcPtr += dataSize; + localDstPtr += strideOC; + } + }); + + sIdx += innerSize; + } +} + +void MKLDNNSplitNode::initializeDstMemPtrs() { + dstMemPtrs.clear(); + + //Here we have to place the output data pointers in the order that reflects the output edges order. + //It's important in case when several edges are connected to one port. + //This is a naive implementation, an indexed priority queue or modified treap would be a more elegant solution. + std::unordered_map mapDstPtrs; + for (size_t i = 0; i < getChildEdges().size(); ++i) { + auto outputEdge = this->getChildEdgeAt(i); + if (uint8_t* dstData = reinterpret_cast(outputEdge->getMemoryPtr()->GetPtr())) { + mapDstPtrs[dstData] = i; + } else { + THROW_ERROR << "can't get child edge indx " << i << "data."; + } + } + + std::vector vecCountingSort(getChildEdges().size(), nullptr); + for (auto& item : mapDstPtrs) { + vecCountingSort[item.second] = item.first; + } + + dstMemPtrs.reserve(vecCountingSort.size()); + auto backInserter = std::back_inserter(dstMemPtrs); + std::copy_if(vecCountingSort.begin(), vecCountingSort.end(), backInserter, [](const uint8_t* x) {return x;}); + dstMemPtrs.shrink_to_fit(); +} + REG_MKLDNN_PRIM_FOR(MKLDNNSplitNode, Split); diff --git a/inference-engine/src/mkldnn_plugin/nodes/mkldnn_split_node.h b/inference-engine/src/mkldnn_plugin/nodes/mkldnn_split_node.h index b7813dd3714c24..af546860f39726 100644 --- a/inference-engine/src/mkldnn_plugin/nodes/mkldnn_split_node.h +++ b/inference-engine/src/mkldnn_plugin/nodes/mkldnn_split_node.h @@ -1,4 +1,4 @@ -// Copyright (C) 2018-2020 Intel Corporation +// Copyright (C) 2018-2021 Intel Corporation // SPDX-License-Identifier: Apache-2.0 // @@ -29,13 +29,17 @@ class MKLDNNSplitNode : public MKLDNNNode { private: void prepareOptimizedParams(); + void initializeDstMemPtrs(); + void optimizedNspc2Ncsp(size_t MB); + + bool canUseOptimizedNspc2Ncsp; size_t axis = 1; + std::vector dstMemPtrs; struct { std::vector dataSize; std::vector srcDataOffsets; - std::vector dstMemPtrs; size_t srcDataStride; size_t countStrides; } optimizedParams; diff --git a/inference-engine/src/mkldnn_plugin/utils/bfloat16.hpp b/inference-engine/src/mkldnn_plugin/utils/bfloat16.hpp index 51541de57cd2ce..dabff00462e53a 100644 --- a/inference-engine/src/mkldnn_plugin/utils/bfloat16.hpp +++ b/inference-engine/src/mkldnn_plugin/utils/bfloat16.hpp @@ -81,11 +81,12 @@ class jit_emu_vcvtneps2bf16 : public jit_emitter { prepare_table(); }; - size_t get_inputs_num() { return 1; }; + size_t get_inputs_num() override { return 1; }; private: void emit_impl(const std::vector& in_vec_idxs, const std::vector& out_vec_idxs, - const std::vector& pool_vec_idxs, const std::vector& pool_gpr_idxs) { + const std::vector& pool_vec_idxs, const std::vector& pool_gpr_idxs, + const emitter_context *emit_context) override { if (host_isa_ == mkldnn::impl::cpu::x64::cpu_isa_t::avx512_common) { Xbyak::Zmm in = Xbyak::Zmm(in_vec_idxs[0]); Xbyak::Ymm out = Xbyak::Ymm(out_vec_idxs[0]); @@ -110,7 +111,7 @@ class jit_emu_vcvtneps2bf16 : public jit_emitter { return ((output) << (4 * (input))); } - void register_table_entries() { + void register_table_entries() override { enum { fixup_input_code_qnan_ = 0, fixup_input_code_snan_ = 1, @@ -133,7 +134,7 @@ class jit_emu_vcvtneps2bf16 : public jit_emitter { push_arg_entry_of("selector", selector_int32, true); } - size_t aux_vecs_count() const { return 2; } + size_t aux_vecs_count() const override { return 2; } }; } // namespace MKLDNNPlugin diff --git a/inference-engine/src/multi_device/multi_device_async_infer_request.cpp b/inference-engine/src/multi_device/multi_device_async_infer_request.cpp index 434d137657c74d..845f6aac2d9485 100644 --- a/inference-engine/src/multi_device/multi_device_async_infer_request.cpp +++ b/inference-engine/src/multi_device/multi_device_async_infer_request.cpp @@ -39,8 +39,7 @@ MultiDeviceAsyncInferRequest::MultiDeviceAsyncInferRequest( _multiDeviceExecutableNetwork->_thisPreferredDeviceName = ""; // if any input is remote (e.g. was set with SetBlob), let' use the corresponding device for (const auto &it : _multiDeviceExecutableNetwork->GetInputsInfo()) { - Blob::Ptr b; - _inferRequest->GetBlob(it.first.c_str(), b); + auto b = _inferRequest->GetBlob(it.first); auto r = b->as(); if (r) { const auto name = r->getDeviceName(); @@ -87,9 +86,9 @@ void MultiDeviceAsyncInferRequest::Infer_ThreadUnsafe() { InferUsingAsync(); } -void MultiDeviceAsyncInferRequest::GetPerformanceCounts(std::map &perfMap) const { - CheckBusy(); - perfMap = std::move(_perfMap); +std::map MultiDeviceAsyncInferRequest::GetPerformanceCounts() const { + CheckState(); + return std::move(_perfMap); } MultiDeviceAsyncInferRequest::~MultiDeviceAsyncInferRequest() { diff --git a/inference-engine/src/multi_device/multi_device_async_infer_request.hpp b/inference-engine/src/multi_device/multi_device_async_infer_request.hpp index acc4ded20fc1bb..715f2d8ded2ecc 100644 --- a/inference-engine/src/multi_device/multi_device_async_infer_request.hpp +++ b/inference-engine/src/multi_device/multi_device_async_infer_request.hpp @@ -26,7 +26,7 @@ class MultiDeviceAsyncInferRequest : public InferenceEngine::AsyncInferRequestTh const MultiDeviceExecutableNetwork::Ptr& multiDeviceExecutableNetwork, const InferenceEngine::ITaskExecutor::Ptr& callbackExecutor); void Infer_ThreadUnsafe() override; - void GetPerformanceCounts(std::map &_perfMap) const override; + std::map GetPerformanceCounts() const override; ~MultiDeviceAsyncInferRequest() override; protected: diff --git a/inference-engine/src/multi_device/multi_device_exec_network.cpp b/inference-engine/src/multi_device/multi_device_exec_network.cpp index 2a5c50182bea15..13114e5faa1a0e 100644 --- a/inference-engine/src/multi_device/multi_device_exec_network.cpp +++ b/inference-engine/src/multi_device/multi_device_exec_network.cpp @@ -201,7 +201,7 @@ IInferRequest::Ptr MultiDeviceExecutableNetwork::CreateInferRequest() { _needPerfCounters, std::static_pointer_cast(shared_from_this()), _callbackExecutor); - asyncRequest.reset(new InferRequestBase(asyncTreadSafeImpl), [](IInferRequest *p) { p->Release(); }); + asyncRequest.reset(new InferRequestBase(asyncTreadSafeImpl), [](IInferRequest *p) { p->Release(); }); asyncTreadSafeImpl->SetPointerToPublicInterface(asyncRequest); return asyncRequest; } diff --git a/inference-engine/src/multi_device/multi_device_infer_request.cpp b/inference-engine/src/multi_device/multi_device_infer_request.cpp index a662cc711346af..a4f0128748bcf7 100644 --- a/inference-engine/src/multi_device/multi_device_infer_request.cpp +++ b/inference-engine/src/multi_device/multi_device_infer_request.cpp @@ -45,18 +45,16 @@ MultiDeviceInferRequest::MultiDeviceInferRequest(const InputsDataMap& networkI void MultiDeviceInferRequest::SetBlobsToAnotherRequest(InferRequest& req) { for (const auto &it : _networkInputs) { - Blob::Ptr blob; auto &name = it.first; // this request is already in BUSY state, so using the internal functions safely - GetBlob(name.c_str(), blob); + auto blob = GetBlob(name); if (req.GetBlob(name) != blob) req.SetBlob(name, blob); } for (const auto &it : _networkOutputs) { - Blob::Ptr blob; auto &name = it.first; // this request is already in BUSY state, so using the internal functions safely - GetBlob(name.c_str(), blob); + auto blob = GetBlob(name); if (req.GetBlob(name) != blob) req.SetBlob(name, blob); } diff --git a/inference-engine/src/multi_device/multi_device_infer_request.hpp b/inference-engine/src/multi_device/multi_device_infer_request.hpp index ef20b93b6caebb..b7c674e272c6c2 100644 --- a/inference-engine/src/multi_device/multi_device_infer_request.hpp +++ b/inference-engine/src/multi_device/multi_device_infer_request.hpp @@ -25,7 +25,7 @@ class MultiDeviceInferRequest : public InferenceEngine::InferRequestInternal { explicit MultiDeviceInferRequest(const InferenceEngine::InputsDataMap& networkInputs, const InferenceEngine::OutputsDataMap& networkOutputs, InferenceEngine::InferRequest request_to_share_blobs_with); - void GetPerformanceCounts(std::map&) const override { + std::map GetPerformanceCounts() const override { THROW_IE_EXCEPTION_WITH_STATUS(NOT_IMPLEMENTED); } void InferImpl() override { diff --git a/inference-engine/src/plugin_api/cpp_interfaces/base/ie_executable_network_base.hpp b/inference-engine/src/plugin_api/cpp_interfaces/base/ie_executable_network_base.hpp index 6a649de3bc3368..d659c912b6c753 100644 --- a/inference-engine/src/plugin_api/cpp_interfaces/base/ie_executable_network_base.hpp +++ b/inference-engine/src/plugin_api/cpp_interfaces/base/ie_executable_network_base.hpp @@ -12,6 +12,7 @@ #include #include #include +#include #include #include #include @@ -24,18 +25,17 @@ namespace InferenceEngine { /** * @brief Executable network `noexcept` wrapper which accepts IExecutableNetworkInternal derived instance which can throw exceptions * @ingroup ie_dev_api_exec_network_api - * @tparam T Minimal CPP implementation of IExecutableNetworkInternal (e.g. ExecutableNetworkInternal) - */ -template + */ class ExecutableNetworkBase : public IExecutableNetwork { - std::shared_ptr _impl; +protected: + std::shared_ptr _impl; public: /** * @brief Constructor with actual underlying implementation. * @param impl Underlying implementation of type IExecutableNetworkInternal */ - explicit ExecutableNetworkBase(std::shared_ptr impl) { + explicit ExecutableNetworkBase(std::shared_ptr impl) { if (impl.get() == nullptr) { THROW_IE_EXCEPTION << "implementation not defined"; } @@ -77,7 +77,7 @@ class ExecutableNetworkBase : public IExecutableNetwork { if (idx >= v.size()) { return OUT_OF_BOUNDS; } - pState = std::make_shared>(v[idx]); + pState = std::make_shared(v[idx]); return OK; } catch (const std::exception& ex) { return InferenceEngine::DescriptionBuffer(GENERAL_ERROR, resp) << ex.what(); @@ -91,11 +91,6 @@ class ExecutableNetworkBase : public IExecutableNetwork { delete this; } - /// @private Need for unit tests only - TODO: unit tests should test using public API, non having details - const std::shared_ptr getImpl() const { - return _impl; - } - StatusCode SetConfig(const std::map& config, ResponseDesc* resp) noexcept override { TO_STATUS(_impl->SetConfig(config)); } @@ -112,8 +107,8 @@ class ExecutableNetworkBase : public IExecutableNetwork { TO_STATUS(pContext = _impl->GetContext()); } -private: - ~ExecutableNetworkBase() = default; +protected: + ~ExecutableNetworkBase() override = default; }; /** @@ -127,7 +122,7 @@ template inline typename InferenceEngine::ExecutableNetwork make_executable_network(std::shared_ptr impl) { // to suppress warning about deprecated QueryState IE_SUPPRESS_DEPRECATED_START - typename ExecutableNetworkBase::Ptr net(new ExecutableNetworkBase(impl), [](IExecutableNetwork* p) { + typename ExecutableNetworkBase::Ptr net(new ExecutableNetworkBase(impl), [](IExecutableNetwork* p) { p->Release(); }); IE_SUPPRESS_DEPRECATED_END diff --git a/inference-engine/src/plugin_api/cpp_interfaces/base/ie_infer_async_request_base.hpp b/inference-engine/src/plugin_api/cpp_interfaces/base/ie_infer_async_request_base.hpp index 75f2cc32184f37..fc751fb0308cb2 100644 --- a/inference-engine/src/plugin_api/cpp_interfaces/base/ie_infer_async_request_base.hpp +++ b/inference-engine/src/plugin_api/cpp_interfaces/base/ie_infer_async_request_base.hpp @@ -11,27 +11,25 @@ #include "cpp_interfaces/exception2status.hpp" #include "cpp_interfaces/plugin_itt.hpp" #include +#include #include "ie_iinfer_request.hpp" #include "ie_preprocess.hpp" -#include "ie_profiling.hpp" namespace InferenceEngine { /** * @brief Inference request `noexcept` wrapper which accepts IAsyncInferRequestInternal derived instance which can throw exceptions * @ingroup ie_dev_api_async_infer_request_api - * @tparam T Minimal CPP implementation of IAsyncInferRequestInternal (e.g. AsyncInferRequestThreadSafeDefault) */ -template class InferRequestBase : public IInferRequest { - std::shared_ptr _impl; + std::shared_ptr _impl; public: /** * @brief Constructor with actual underlying implementation. * @param impl Underlying implementation of type IAsyncInferRequestInternal */ - explicit InferRequestBase(std::shared_ptr impl): _impl(impl) {} + explicit InferRequestBase(std::shared_ptr impl): _impl(impl) {} StatusCode Infer(ResponseDesc* resp) noexcept override { OV_ITT_SCOPED_TASK(itt::domains::Plugin, "Infer"); @@ -45,7 +43,7 @@ class InferRequestBase : public IInferRequest { StatusCode GetPerformanceCounts(std::map& perfMap, ResponseDesc* resp) const noexcept override { - TO_STATUS(_impl->GetPerformanceCounts(perfMap)); + TO_STATUS(perfMap = _impl->GetPerformanceCounts()); } StatusCode SetBlob(const char* name, const Blob::Ptr& data, ResponseDesc* resp) noexcept override { @@ -57,11 +55,11 @@ class InferRequestBase : public IInferRequest { } StatusCode GetBlob(const char* name, Blob::Ptr& data, ResponseDesc* resp) noexcept override { - TO_STATUS(_impl->GetBlob(name, data)); + TO_STATUS(data = _impl->GetBlob(name)); } StatusCode GetPreProcess(const char* name, const PreProcessInfo** info, ResponseDesc *resp) const noexcept override { - TO_STATUS(_impl->GetPreProcess(name, info)); + TO_STATUS(*info = &(_impl->GetPreProcess(name))); } StatusCode StartAsync(ResponseDesc* resp) noexcept override { @@ -101,7 +99,7 @@ class InferRequestBase : public IInferRequest { if (idx >= v.size()) { return OUT_OF_BOUNDS; } - pState = std::make_shared>(v[idx]); + pState = std::make_shared(v[idx]); return OK; } catch (const std::exception& ex) { return InferenceEngine::DescriptionBuffer(GENERAL_ERROR, resp) << ex.what(); diff --git a/inference-engine/src/plugin_api/cpp_interfaces/base/ie_variable_state_base.hpp b/inference-engine/src/plugin_api/cpp_interfaces/base/ie_variable_state_base.hpp index 9f1763e7d05c0c..4222f560ccaef8 100644 --- a/inference-engine/src/plugin_api/cpp_interfaces/base/ie_variable_state_base.hpp +++ b/inference-engine/src/plugin_api/cpp_interfaces/base/ie_variable_state_base.hpp @@ -16,19 +16,17 @@ IE_SUPPRESS_DEPRECATED_START /** * @brief Default implementation for IVariableState - * @tparam T Minimal CPP implementation of IVariableStateInternal (e.g. VariableStateInternal) - * @ingroup ie_dev_api_variable_state_api + * @ingroup ie_dev_api_variable_state_api */ -template class VariableStateBase : public IVariableState { - std::shared_ptr impl; + std::shared_ptr impl; public: /** * @brief Constructor with actual underlying implementation. * @param impl Underlying implementation of type IVariableStateInternal */ - explicit VariableStateBase(std::shared_ptr impl): impl(impl) { + explicit VariableStateBase(std::shared_ptr impl): impl(impl) { if (impl == nullptr) { THROW_IE_EXCEPTION << "VariableStateBase implementation is not defined"; } diff --git a/inference-engine/src/plugin_api/cpp_interfaces/exception2status.hpp b/inference-engine/src/plugin_api/cpp_interfaces/exception2status.hpp index e319a5f8cb2952..4ec96091e9a226 100644 --- a/inference-engine/src/plugin_api/cpp_interfaces/exception2status.hpp +++ b/inference-engine/src/plugin_api/cpp_interfaces/exception2status.hpp @@ -141,6 +141,12 @@ namespace InferenceEngine { */ #define NOT_ALLOCATED_str std::string("[NOT_ALLOCATED] ") +/** + * @def INFER_CANCELLED_str + * @brief Defines the `infer cancelled` message + */ +#define INFER_CANCELLED_str std::string("[INFER_CANCELLED] ") + /** * @} */ diff --git a/inference-engine/src/plugin_api/cpp_interfaces/impl/ie_executable_network_thread_safe_async_only.hpp b/inference-engine/src/plugin_api/cpp_interfaces/impl/ie_executable_network_thread_safe_async_only.hpp index 3bd2baa94550b4..3cfd1ddad5a7cc 100644 --- a/inference-engine/src/plugin_api/cpp_interfaces/impl/ie_executable_network_thread_safe_async_only.hpp +++ b/inference-engine/src/plugin_api/cpp_interfaces/impl/ie_executable_network_thread_safe_async_only.hpp @@ -39,7 +39,7 @@ class ExecutableNetworkThreadSafeAsyncOnly : public ExecutableNetworkInternal, auto asyncRequestImpl = this->CreateAsyncInferRequestImpl(_networkInputs, _networkOutputs); asyncRequestImpl->setPointerToExecutableNetworkInternal(shared_from_this()); - asyncRequest.reset(new InferRequestBase(asyncRequestImpl), [](IInferRequest* p) { + asyncRequest.reset(new InferRequestBase(asyncRequestImpl), [](IInferRequest* p) { p->Release(); }); asyncRequestImpl->SetPointerToPublicInterface(asyncRequest); diff --git a/inference-engine/src/plugin_api/cpp_interfaces/impl/ie_executable_network_thread_safe_default.hpp b/inference-engine/src/plugin_api/cpp_interfaces/impl/ie_executable_network_thread_safe_default.hpp index 0885efbc653482..6b41edf84db01e 100644 --- a/inference-engine/src/plugin_api/cpp_interfaces/impl/ie_executable_network_thread_safe_default.hpp +++ b/inference-engine/src/plugin_api/cpp_interfaces/impl/ie_executable_network_thread_safe_default.hpp @@ -69,7 +69,7 @@ class ExecutableNetworkThreadSafeDefault : public ExecutableNetworkInternal, auto asyncThreadSafeImpl = std::make_shared( syncRequestImpl, _taskExecutor, _callbackExecutor); - asyncRequest.reset(new InferRequestBase(asyncThreadSafeImpl), + asyncRequest.reset(new InferRequestBase(asyncThreadSafeImpl), [](IInferRequest *p) { p->Release(); }); asyncThreadSafeImpl->SetPointerToPublicInterface(asyncRequest); diff --git a/inference-engine/src/plugin_api/cpp_interfaces/impl/ie_infer_async_request_thread_safe_default.hpp b/inference-engine/src/plugin_api/cpp_interfaces/impl/ie_infer_async_request_thread_safe_default.hpp index dcb4d20e9f2433..0d88c6db1b48f4 100644 --- a/inference-engine/src/plugin_api/cpp_interfaces/impl/ie_infer_async_request_thread_safe_default.hpp +++ b/inference-engine/src/plugin_api/cpp_interfaces/impl/ie_infer_async_request_thread_safe_default.hpp @@ -41,20 +41,25 @@ namespace InferenceEngine { * @snippet example_async_infer_request.cpp async_infer_request:define_pipeline */ class AsyncInferRequestThreadSafeDefault : public IAsyncInferRequestInternal { - using AtomicCallback = std::atomic; + enum InferState {Idle, Busy, Canceled, Stop}; using Futures = std::vector>; using Promise = std::shared_ptr>; enum Stage_e : std::uint8_t { executor, task }; InferRequestInternal::Ptr _syncRequest; + friend struct DisableCallbackGuard; struct DisableCallbackGuard { - explicit DisableCallbackGuard(AtomicCallback& callback) - : _callbackRef(callback), _callback(callback.exchange(nullptr)) {} + explicit DisableCallbackGuard(AsyncInferRequestThreadSafeDefault* this_) + : _this{this_} { + std::lock_guard lock{_this->_mutex}; + std::swap(_callback, _this->_callback); + } ~DisableCallbackGuard() { - _callbackRef = _callback; + std::lock_guard lock{_this->_mutex}; + _this->_callback = _callback; } - AtomicCallback& _callbackRef; - IInferRequest::CompletionCallback _callback; + AsyncInferRequestThreadSafeDefault* _this = nullptr; + IInferRequest::CompletionCallback _callback = nullptr; }; struct ImmediateStreamsExecutor : public InferenceEngine::ITaskExecutor { @@ -63,6 +68,63 @@ class AsyncInferRequestThreadSafeDefault : public IAsyncInferRequestInternal { IStreamsExecutor::Ptr _streamsExecutor; }; + template + void InferImpl(const F& f) { + _syncRequest->checkBlobs(); + InferState state = InferState::Idle; + { + std::lock_guard lock{_mutex}; + state = _state; + switch (_state) { + case InferState::Busy : + THROW_IE_EXCEPTION_WITH_STATUS(REQUEST_BUSY); + case InferState::Canceled : + THROW_IE_EXCEPTION_WITH_STATUS(INFER_CANCELLED); + case InferState::Idle : { + _futures.erase(std::remove_if(std::begin(_futures), std::end(_futures), + [](const std::shared_future& future) { + if (future.valid()) { + return (std::future_status::ready == + future.wait_for(std::chrono::milliseconds {0})); + } else { + return true; + } + }), + _futures.end()); + _promise = {}; + _futures.emplace_back(_promise.get_future().share()); + } break; + case InferState::Stop : break; + } + _state = InferState::Busy; + } + if (state != InferState::Stop) { + try { + f(); + } catch (...) { + _promise.set_exception(std::current_exception()); + std::lock_guard lock{_mutex}; + _state = InferState::Idle; + throw; + } + } + } + +protected: + /** + * @brief Throws exception if inference request is busy or canceled + */ + void CheckState() const { + std::lock_guard lock {_mutex}; + switch (_state) { + case InferState::Busy : + THROW_IE_EXCEPTION_WITH_STATUS(REQUEST_BUSY); + case InferState::Canceled : + THROW_IE_EXCEPTION_WITH_STATUS(INFER_CANCELLED); + default: break; + } + } + public: /** * @brief A shared pointer to AsyncInferRequestThreadSafeDefault @@ -84,11 +146,11 @@ class AsyncInferRequestThreadSafeDefault : public IAsyncInferRequestInternal { _syncRequest {request}, _requestExecutor {taskExecutor}, _callbackExecutor {callbackExecutor}, - _pipeline {{taskExecutor, [this] {_syncRequest->Infer();}}}, - _syncPipeline {{std::make_shared(), [this] {_syncRequest->Infer();}}} { + _pipeline {{taskExecutor, [this] {_syncRequest->InferImpl();}}}, + _syncPipeline {{std::make_shared(), [this] {_syncRequest->InferImpl();}}} { auto streamsExecutor = std::dynamic_pointer_cast(taskExecutor); if (streamsExecutor != nullptr) { - _syncPipeline = {{std::make_shared(std::move(streamsExecutor)), [this] {_syncRequest->Infer();}}}; + _syncPipeline = {{std::make_shared(std::move(streamsExecutor)), [this] {_syncRequest->InferImpl();}}}; } } @@ -107,8 +169,9 @@ class AsyncInferRequestThreadSafeDefault : public IAsyncInferRequestInternal { */ StatusCode Wait(int64_t millis_timeout) override { if (millis_timeout < IInferRequest::WaitMode::RESULT_READY) { - THROW_IE_EXCEPTION << PARAMETER_MISMATCH_str + "Timeout can't be less " - << IInferRequest::WaitMode::RESULT_READY << " for InferRequest::Wait\n"; + THROW_IE_EXCEPTION_WITH_STATUS(PARAMETER_MISMATCH) + << " Timeout can't be less " + << IInferRequest::WaitMode::RESULT_READY << " for InferRequest::Wait\n"; } auto status = std::future_status::deferred; @@ -144,68 +207,57 @@ class AsyncInferRequestThreadSafeDefault : public IAsyncInferRequestInternal { } void StartAsync() override { - if (setIsRequestBusy(true)) ThrowBusy(); - try { - StartAsync_ThreadUnsafe(); - } catch (...) { - setIsRequestBusy(false); - throw; - } + InferImpl([&] {StartAsync_ThreadUnsafe();}); } void Infer() override { - if (setIsRequestBusy(true)) ThrowBusy(); - try { - Infer_ThreadUnsafe(); - } catch (...) { - setIsRequestBusy(false); - throw; - } - setIsRequestBusy(false); + DisableCallbackGuard disableCallbackGuard{this}; + InferImpl([&] {Infer_ThreadUnsafe();}); + Wait(InferenceEngine::IInferRequest::WaitMode::RESULT_READY); } - void GetPerformanceCounts(std::map& perfMap) const override { - CheckBusy(); - _syncRequest->GetPerformanceCounts(perfMap); + std::map GetPerformanceCounts() const override { + CheckState(); + return _syncRequest->GetPerformanceCounts(); } - void SetBlob(const char* name, const Blob::Ptr& data) override { - CheckBusy(); + void SetBlob(const std::string& name, const Blob::Ptr& data) override { + CheckState(); _syncRequest->SetBlob(name, data); } - void SetBlob(const char* name, const Blob::Ptr& data, const PreProcessInfo& info) override { - CheckBusy(); + void SetBlob(const std::string& name, const Blob::Ptr& data, const PreProcessInfo& info) override { + CheckState(); _syncRequest->SetBlob(name, data, info); } - void GetBlob(const char* name, Blob::Ptr& data) override { - CheckBusy(); - _syncRequest->GetBlob(name, data); + Blob::Ptr GetBlob(const std::string& name) override { + CheckState(); + return _syncRequest->GetBlob(name); } - void GetPreProcess(const char* name, const PreProcessInfo** info) const override { - _syncRequest->GetPreProcess(name, info); + const PreProcessInfo& GetPreProcess(const std::string& name) const override { + return _syncRequest->GetPreProcess(name); } void SetBatch(int batch) override { - CheckBusy(); + CheckState(); _syncRequest->SetBatch(batch); }; void GetUserData(void** data) override { - CheckBusy(); + CheckState(); if (data == nullptr) THROW_IE_EXCEPTION << NOT_ALLOCATED_str; *data = _userData; } void SetUserData(void* data) override { - CheckBusy(); + CheckState(); _userData = data; } void SetCompletionCallback(IInferRequest::CompletionCallback callback) override { - CheckBusy(); + CheckState(); _callback = callback; } @@ -222,12 +274,21 @@ class AsyncInferRequestThreadSafeDefault : public IAsyncInferRequestInternal { return _syncRequest->QueryState(); } + void ThrowIfCanceled() const { + std::lock_guard lock{_mutex}; + if (_state == InferState::Canceled) { + THROW_IE_EXCEPTION_WITH_STATUS(INFER_CANCELLED); + } + } + StatusCode Cancel() override { - StatusCode status = Wait(IInferRequest::WaitMode::STATUS_ONLY); - if (status == INFER_NOT_STARTED) { - return status; + std::lock_guard lock{_mutex}; + if (_state == InferState::Idle) { + return StatusCode::INFER_NOT_STARTED; + } else { + _state = InferState::Canceled; + return InferenceEngine::OK; } - return _syncRequest->Cancel(); } protected: @@ -240,37 +301,6 @@ class AsyncInferRequestThreadSafeDefault : public IAsyncInferRequestInternal { */ using Pipeline = std::vector; - /** - * @brief Determines if request busy. - * @return `True` if request busy, `false` otherwise. - */ - bool isRequestBusy() const { - return _isRequestBusy; - } - - /** - * @brief Sets the is request busy. - * @param[in] isBusy Indicates if busy - * @return `True` is case of success, `false` otherwise. - */ - bool setIsRequestBusy(bool isBusy) { - return _isRequestBusy.exchange(isBusy); - } - - /** - * @brief Throws an exception that an inference request is busy. - */ - [[noreturn]] static void ThrowBusy() { - THROW_IE_EXCEPTION << InferenceEngine::details::as_status << StatusCode::REQUEST_BUSY << REQUEST_BUSY_str; - } - - /** - * @brief Checks whether an inference request is busy and calls ThrowBusy if `true` - */ - void CheckBusy() const { - if (isRequestBusy()) ThrowBusy(); - } - /** * @brief Creates and run the first stage task. If destructor was not called add a new std::future to the * AsyncInferRequestThreadSafeDefault::_futures list that would be used to wait @@ -281,36 +311,9 @@ class AsyncInferRequestThreadSafeDefault : public IAsyncInferRequestInternal { */ void RunFirstStage(const Pipeline::iterator itBeginStage, const Pipeline::iterator itEndStage, const ITaskExecutor::Ptr callbackExecutor = {}) { - _promise = {}; - bool stop = [&] { - std::lock_guard lock(_mutex); - if (!_stop) { - _futures.erase(std::remove_if(std::begin(_futures), std::end(_futures), - [](const std::shared_future& future) { - if (future.valid()) { - return (std::future_status::ready == - future.wait_for(std::chrono::milliseconds {0})); - } else { - return true; - } - }), - _futures.end()); - - _futures.emplace_back(_promise.get_future().share()); - } - return _stop; - }(); - - if (!stop) { - try { - auto& firstStageExecutor = std::get(*itBeginStage); - IE_ASSERT(nullptr != firstStageExecutor); - firstStageExecutor->run(MakeNextStageTask(itBeginStage, itEndStage, std::move(callbackExecutor))); - } catch (...) { - _promise.set_exception(std::current_exception()); - throw; - } - } + auto& firstStageExecutor = std::get(*itBeginStage); + IE_ASSERT(nullptr != firstStageExecutor); + firstStageExecutor->run(MakeNextStageTask(itBeginStage, itEndStage, std::move(callbackExecutor))); } /** @@ -320,38 +323,25 @@ class AsyncInferRequestThreadSafeDefault : public IAsyncInferRequestInternal { */ void StopAndWait() { _callback = nullptr; + Futures futures; + InferState state = InferState::Idle; { - std::lock_guard lock(_mutex); - if (!_stop) { - _stop = true; - for (auto&& future : _futures) { - if (future.valid()) { - future.wait(); - } + std::lock_guard lock{_mutex}; + state = _state; + if (state != InferState::Stop) { + _state = InferState::Stop; + futures = std::move(_futures); + } + } + if (state != InferState::Stop) { + for (auto&& future : futures) { + if (future.valid()) { + future.wait(); } } } } - /** - * @brief Implements Infer() using StartAsync() and Wait() - */ - void InferUsingAsync() { - DisableCallbackGuard disableCallbackGuard{_callback}; - StartAsync_ThreadUnsafe(); - Wait(InferenceEngine::IInferRequest::WaitMode::RESULT_READY); - } - - /** - * @brief Implements Infer() using synchronous pipeline and Wait() - */ - void InferUsingSync() { - DisableCallbackGuard disableCallbackGuard{_callback}; - _syncRequest->checkBlobs(); - RunFirstStage(_syncPipeline.begin(), _syncPipeline.end(), _syncCallbackExecutor); - // If we have exception we should extract it from future using Wait() method - Wait(InferenceEngine::IInferRequest::WaitMode::RESULT_READY); - } ITaskExecutor::Ptr _requestExecutor; //!< Used to run inference CPU tasks. ITaskExecutor::Ptr _callbackExecutor; //!< Used to run post inference callback in asynchronous pipline @@ -364,7 +354,6 @@ class AsyncInferRequestThreadSafeDefault : public IAsyncInferRequestInternal { * @note Used by StartAsync which ensures thread-safety and calls this method after. */ virtual void StartAsync_ThreadUnsafe() { - _syncRequest->checkBlobs(); RunFirstStage(_pipeline.begin(), _pipeline.end(), _callbackExecutor); } @@ -373,7 +362,14 @@ class AsyncInferRequestThreadSafeDefault : public IAsyncInferRequestInternal { * @note Used by Infer which ensures thread-safety and calls this method after. */ virtual void Infer_ThreadUnsafe() { - InferUsingSync(); + RunFirstStage(_syncPipeline.begin(), _syncPipeline.end(), _syncCallbackExecutor); + } + + /** + * @brief Implements Infer() using StartAsync() and Wait() + */ + void InferUsingAsync() { + StartAsync_ThreadUnsafe(); } private: @@ -418,22 +414,25 @@ class AsyncInferRequestThreadSafeDefault : public IAsyncInferRequestInternal { if ((itEndStage == itNextStage) || (nullptr != localCurrentException)) { auto lastStageTask = [this, requestStatus, localCurrentException]() mutable { auto promise = std::move(_promise); - auto callback = _callback.load(); - if (setIsRequestBusy(false)) { - if (nullptr != callback) { - InferenceEngine::CurrentException() = localCurrentException; - try { - callback(_publicInterface, requestStatus); - } catch (...) { - localCurrentException = std::current_exception(); - } - InferenceEngine::CurrentException() = nullptr; - } - if (nullptr == localCurrentException) { - promise.set_value(); - } else { - promise.set_exception(localCurrentException); + IInferRequest::CompletionCallback callback = nullptr; + { + std::lock_guard lock{_mutex}; + _state = InferState::Idle; + callback = _callback; + } + if (nullptr != callback) { + InferenceEngine::CurrentException() = localCurrentException; + try { + callback(_publicInterface, requestStatus); + } catch (...) { + localCurrentException = std::current_exception(); } + InferenceEngine::CurrentException() = nullptr; + } + if (nullptr == localCurrentException) { + promise.set_value(); + } else { + promise.set_exception(localCurrentException); } }; @@ -446,13 +445,12 @@ class AsyncInferRequestThreadSafeDefault : public IAsyncInferRequestInternal { }, std::move(callbackExecutor)); } - std::atomic_bool _isRequestBusy = {false}; void* _userData = nullptr; - AtomicCallback _callback = {nullptr}; + IInferRequest::CompletionCallback _callback = nullptr; IInferRequest::Ptr _publicInterface; std::promise _promise; mutable std::mutex _mutex; Futures _futures; - bool _stop = false; + InferState _state = InferState::Idle; }; } // namespace InferenceEngine diff --git a/inference-engine/src/plugin_api/cpp_interfaces/impl/ie_infer_request_internal.hpp b/inference-engine/src/plugin_api/cpp_interfaces/impl/ie_infer_request_internal.hpp index 9bb32e22b30f87..daf00a4e089bc6 100644 --- a/inference-engine/src/plugin_api/cpp_interfaces/impl/ie_infer_request_internal.hpp +++ b/inference-engine/src/plugin_api/cpp_interfaces/impl/ie_infer_request_internal.hpp @@ -76,9 +76,9 @@ class InferRequestInternal : virtual public IInferRequestInternal { * @param data - a reference to input or output blob. The type of Blob must correspond to the network input * precision and size. */ - void SetBlob(const char* name, const Blob::Ptr& userBlob) override { + void SetBlob(const std::string& name, const Blob::Ptr& userBlob) override { OV_ITT_SCOPED_TASK(itt::domains::Plugin, "SetBlob"); - if (name == nullptr) { + if (name.empty()) { THROW_IE_EXCEPTION << NOT_FOUND_str + "Failed to set blob with empty name"; } if (!userBlob) THROW_IE_EXCEPTION << NOT_ALLOCATED_str << "Failed to set empty blob with name: \'" << name << "\'"; @@ -142,12 +142,13 @@ class InferRequestInternal : virtual public IInferRequestInternal { /** * @brief Given optional implementation of getting blob to avoid need for it to be implemented by plugin * @param name - a name of input or output blob. - * @param data - a reference to input or output blob. The type of Blob must correspond to the network input + * @return Returns input or output blob. The type of Blob must correspond to the network input * precision and size. * @note if ROI blob was previously set it is returned (without dimensions checks) instead of default blob. */ - void GetBlob(const char* name, Blob::Ptr& data) override { + Blob::Ptr GetBlob(const std::string& name) override { OV_ITT_SCOPED_TASK(itt::domains::Plugin, "GetBlob"); + Blob::Ptr data; InputInfo::Ptr foundInput; DataPtr foundOutput; const SizeVector oneVector = { 1 }; @@ -163,10 +164,10 @@ class InferRequestInternal : virtual public IInferRequestInternal { ? foundInput->getTensorDesc().getDims() : oneVector); - if (auto devBlob = _deviceInputs[name]) { - if (preProcessingRequired(foundInput, data, devBlob)) { - addInputPreProcessingFor(name, data, devBlob); - } + auto& devBlob = _deviceInputs[name]; + if (preProcessingRequired(foundInput, data, devBlob)) { + // if no devBlob, performs inplace + addInputPreProcessingFor(name, data, devBlob ? devBlob : _inputs[name]); } } } else { @@ -176,6 +177,7 @@ class InferRequestInternal : virtual public IInferRequestInternal { ? foundOutput->getTensorDesc().getDims() : oneVector); } + return data; } /** @@ -184,7 +186,7 @@ class InferRequestInternal : virtual public IInferRequestInternal { * @param data - a reference to input or output blob. The type of Blob must correspond to the network input precision and size. * @param info Preprocess info for blob. */ - void SetBlob(const char* name, const Blob::Ptr& data, const PreProcessInfo& info) override { + void SetBlob(const std::string& name, const Blob::Ptr& data, const PreProcessInfo& info) override { InputInfo::Ptr foundInput; DataPtr foundOutput; if (findInputAndOutputBlobByName(name, foundInput, foundOutput)) { @@ -199,13 +201,13 @@ class InferRequestInternal : virtual public IInferRequestInternal { /** * @brief Gets pre-process for input data * @param name Name of input blob. - * @param info pointer to a pointer to PreProcessInfo structure + * @return Returns constant reference to PreProcessInfo structure */ - void GetPreProcess(const char* name, const PreProcessInfo** info) const override { + const PreProcessInfo& GetPreProcess(const std::string& name) const override { InputInfo::Ptr foundInput; DataPtr foundOutput; if (findInputAndOutputBlobByName(name, foundInput, foundOutput)) { - *info = &foundInput->getPreProcess(); + return foundInput->getPreProcess(); } else { THROW_IE_EXCEPTION << "Output blob can't have pre-processing"; } @@ -283,7 +285,7 @@ class InferRequestInternal : virtual public IInferRequestInternal { * @throws [parameter_mismatch] exception if input and output has the same name * @throws [not_found] exception if there is no input and output layers with given name */ - bool findInputAndOutputBlobByName(const char* name, InputInfo::Ptr& foundInput, DataPtr& foundOutput) const { + bool findInputAndOutputBlobByName(const std::string& name, InputInfo::Ptr& foundInput, DataPtr& foundOutput) const { foundInput = nullptr; foundOutput = nullptr; if (_networkOutputs.empty()) { @@ -412,7 +414,7 @@ class InferRequestInternal : virtual public IInferRequestInternal { auto& preproc_ptr = ppDataIt->second; preproc_ptr->isApplicable(from, to); // Stores the given blob as ROI blob. It will be used to fill in network input - // during pre-processing + // during pre-processingstd::map preproc_ptr->setRoiBlob(from); } }; diff --git a/inference-engine/src/plugin_api/cpp_interfaces/interface/ie_iinfer_request_internal.hpp b/inference-engine/src/plugin_api/cpp_interfaces/interface/ie_iinfer_request_internal.hpp index a15165fb283dd6..5ffa03e403e617 100644 --- a/inference-engine/src/plugin_api/cpp_interfaces/interface/ie_iinfer_request_internal.hpp +++ b/inference-engine/src/plugin_api/cpp_interfaces/interface/ie_iinfer_request_internal.hpp @@ -47,9 +47,9 @@ class IInferRequestInternal { /** * @brief Queries performance measures per layer to get feedback of what is the most time consuming layer. * Note: not all plugins may provide meaningful data - * @param perfMap - a map of layer names to profiling information for that layer. + * @return Returns a map of layer names to profiling information for that layer. */ - virtual void GetPerformanceCounts(std::map& perfMap) const = 0; + virtual std::map GetPerformanceCounts() const = 0; /** * @brief Set input/output data to infer @@ -58,16 +58,16 @@ class IInferRequestInternal { * @param data - a reference to input or output blob. The type of Blob must correspond to the network input * precision and size. */ - virtual void SetBlob(const char* name, const Blob::Ptr& data) = 0; + virtual void SetBlob(const std::string& name, const Blob::Ptr& data) = 0; /** * @brief Get input/output data to infer * @note Memory allocation doesn't happen * @param name - a name of input or output blob. - * @param data - a reference to input or output blob. The type of Blob must correspond to the network input + * @return Returns input or output blob. The type of Blob must correspond to the network input * precision and size. */ - virtual void GetBlob(const char* name, Blob::Ptr& data) = 0; + virtual Blob::Ptr GetBlob(const std::string& name) = 0; /** * @brief Sets pre-process for input data @@ -75,14 +75,14 @@ class IInferRequestInternal { * @param data - a reference to input or output blob. The type of Blob must correspond to the network input precision and size. * @param info Preprocess info for blob. */ - virtual void SetBlob(const char* name, const Blob::Ptr& data, const PreProcessInfo& info) = 0; + virtual void SetBlob(const std::string& name, const Blob::Ptr& data, const PreProcessInfo& info) = 0; /** * @brief Gets pre-process for input data * @param name Name of input blob. - * @param info pointer to a pointer to PreProcessInfo structure + * @return Returns constant reference to PreProcessInfo structure */ - virtual void GetPreProcess(const char* name, const PreProcessInfo** info) const = 0; + virtual const PreProcessInfo& GetPreProcess(const std::string& name) const = 0; /** * @brief Sets new batch size when dynamic batching is enabled in executable network that created this request. diff --git a/inference-engine/src/plugin_api/ie_ngraph_utils.hpp b/inference-engine/src/plugin_api/ie_ngraph_utils.hpp index 22cd621cd47572..aa3e40b08db779 100644 --- a/inference-engine/src/plugin_api/ie_ngraph_utils.hpp +++ b/inference-engine/src/plugin_api/ie_ngraph_utils.hpp @@ -71,7 +71,7 @@ inline ::ngraph::element::Type convertPrecision(const std::string& precision) { return ::ngraph::element::Type(::ngraph::element::Type_t::i32); } else if (precision == "i64" || precision == "I64") { return ::ngraph::element::Type(::ngraph::element::Type_t::i64); - } else if (precision == "u1" || precision == "U1") { + } else if (precision == "u1" || precision == "U1" || precision == "BIN" || precision == "bin") { return ::ngraph::element::Type(::ngraph::element::Type_t::u1); } else if (precision == "u8" || precision == "U8") { return ::ngraph::element::Type(::ngraph::element::Type_t::u8); diff --git a/inference-engine/src/plugin_api/ie_profiling.hpp b/inference-engine/src/plugin_api/ie_profiling.hpp deleted file mode 100644 index 1fa34cef89ee2a..00000000000000 --- a/inference-engine/src/plugin_api/ie_profiling.hpp +++ /dev/null @@ -1,242 +0,0 @@ -// Copyright (C) 2018-2020 Intel Corporation -// SPDX-License-Identifier: Apache-2.0 -// - -/** - * @brief [DEPRECATED] Defines API to profile your plugin using Intel VTune. - * @details This API is still available but deprecated. Use plugin_itt.hpp instead. - * @file ie_profiling.hpp - */ - -#pragma once - -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include - -/** - * @cond - */ - -#ifdef ENABLE_PROFILING_ITT -#include -#endif - -namespace InferenceEngine { - -template -void annotateBegin(Static&& static_, Block&& block_); - -template -void annotateEnd(Static&& static_, Block&& block_); - -template -struct Annotate { - struct Static_ { - template - struct idx {}; - - template - struct idx : idx {}; - - template - struct idx<0, S...> { - using type = idx; - }; - - template - Static_(ArgTuple&& arg_tuple, idx): static_ {std::get(std::forward(arg_tuple))...} {} - - template - explicit Static_(ArgTuple&& arg_tuple) - : Static_ {std::forward(arg_tuple), typename idx::value>::type {}} {} - - Static static_; - }; - - static Static_ static_; - - Block block_; - - Annotate(const Annotate&) = delete; - Annotate& operator=(const Annotate&) = delete; - Annotate(Annotate&&) = default; - Annotate& operator=(Annotate&&) = default; - - template - inline explicit Annotate(Ts&&... xs): block_ {std::forward(xs)...} { - annotateBegin(static_.static_, block_); - } - - inline ~Annotate() { - annotateEnd(static_.static_, block_); - } -}; - -template -typename Annotate::Static_ Annotate::static_(Local::staticArgs()); - -#define IE_ANNOTATE_CONCAT(x, y) IE_ANNOTATE_CONCAT_EVAL(x, y) -#define IE_ANNOTATE_CONCAT_EVAL(x, y) x##y - -#define IE_ANNOTATE_UNPACK(tuple) IE_ANNOTATE_UNPACK_EVAL tuple -#define IE_ANNOTATE_UNPACK_EVAL(...) __VA_ARGS__ - -#define IE_ANNOTATE_MAKE_NAME(lib_name, postfix) \ - IE_ANNOTATE_CONCAT(IE_ANNOTATE_CONCAT(IE_ANNOTATE_CONCAT(__intel_util_annotate_, lib_name), postfix), __LINE__) - -#define IE_ANNOTATE_LOCAL_TYPE_NAME(lib_name) IE_ANNOTATE_MAKE_NAME(lib_name, _ctx) -#define IE_ANNOTATE_VARIABLE_NAME(lib_name) IE_ANNOTATE_MAKE_NAME(lib_name, _variable) -#define IE_ANNOTATE_FUNC_NAME(lib_name) IE_ANNOTATE_MAKE_NAME(lib_name, _func) - -#define IE_ANNOTATE_MAKE_SCOPE_TYPE(lib_name, static_type, block_type, make_static_args_tuple) \ - struct IE_ANNOTATE_LOCAL_TYPE_NAME(lib_name) \ - : ::InferenceEngine::Annotate { \ - using ::InferenceEngine::Annotate::Annotate; \ - static auto staticArgs() -> decltype(std::make_tuple(IE_ANNOTATE_UNPACK(make_static_args_tuple))) { \ - return std::make_tuple(IE_ANNOTATE_UNPACK(make_static_args_tuple)); \ - } \ - } - -#define IE_ANNOTATE_MAKE_SCOPE(lib_name, static_type, block_type, make_static_args_tuple, make_block_args_tuple) \ - IE_ANNOTATE_MAKE_SCOPE_TYPE(lib_name, static_type, block_type, make_static_args_tuple) \ - IE_ANNOTATE_VARIABLE_NAME(lib_name) {IE_ANNOTATE_UNPACK(make_block_args_tuple)}; - -#ifdef ENABLE_PROFILING_ITT -struct IttTaskHandles { - __itt_domain* const domain; - __itt_string_handle* const handle; - - explicit IttTaskHandles(const char* taskName) - : domain {__itt_domain_create("InferenceEngine")}, handle {__itt_string_handle_create(taskName)} {} -}; - -struct IttBlock {}; - -inline static void annotateBegin(IttTaskHandles& h, IttBlock&) { - __itt_task_begin(h.domain, __itt_null, __itt_null, h.handle); -} - -inline static void annotateEnd(IttTaskHandles& h, IttBlock&) { - __itt_task_end(h.domain); -} - -#define IE_ITT_SCOPE(task_name) \ - IE_ANNOTATE_MAKE_SCOPE(InferenceEngineItt, ::InferenceEngine::IttTaskHandles, ::InferenceEngine::IttBlock, \ - (task_name), ()) -#else -#define IE_ITT_SCOPE(task_name) -#endif - -#define IE_STR(x) IE_STR_(x) -#define IE_STR_(x) #x - -struct ProfilingTask; - -struct IttStatic {}; - -struct IttProfilingTask { - ProfilingTask* t; -}; - -static void annotateBegin(IttStatic&, IttProfilingTask& t); -static void annotateEnd(IttStatic&, IttProfilingTask& t); - -/** - * @class ProfilingTask - * @ingroup ie_dev_profiling - * @brief Used to annotate section of code which would be named at runtime - */ -struct ProfilingTask { - ProfilingTask() = default; - //! @private - ProfilingTask(const ProfilingTask&) = default; - - ProfilingTask& operator =(const ProfilingTask&) = default; - - /** - * @brief Construct ProfilingTask with runtime defined name - */ - inline explicit ProfilingTask(const std::string& taskName) - : name(taskName) -#ifdef ENABLE_PROFILING_ITT - , - domain(__itt_domain_create("InferenceEngine")), - handle(__itt_string_handle_create(taskName.c_str())) -#endif - { - } - -private: - friend void annotateBegin(IttStatic&, IttProfilingTask& t); - friend void annotateEnd(IttStatic&, IttProfilingTask& t); - - std::string name; -#ifdef ENABLE_PROFILING_ITT - __itt_domain* domain; - __itt_string_handle* handle; -#endif -}; - -inline static void annotateBegin(IttStatic&, IttProfilingTask& t) { -#ifdef ENABLE_PROFILING_ITT - __itt_task_begin(t.t->domain, __itt_null, __itt_null, t.t->handle); -#else - (void)t; -#endif -} - -inline static void annotateEnd(IttStatic&, IttProfilingTask& t) { -#ifdef ENABLE_PROFILING_ITT - __itt_task_end(t.t->domain); -#else - (void)t; -#endif -} - -#ifdef ENABLE_PROFILING_ITT -#define IE_ITT_TASK_SCOPE(profilingTask) \ - IE_ANNOTATE_MAKE_SCOPE(InferenceEngineIttScopeTask, ::InferenceEngine::IttStatic, \ - ::InferenceEngine::IttProfilingTask, (), (&(profilingTask))) -#else -#define IE_ITT_TASK_SCOPE(profiling_task) -#endif - -inline static void annotateSetThreadName(const char* name) { -#ifdef ENABLE_PROFILING_ITT - __itt_thread_set_name(name); -#else - (void)(name); -#endif -} - -/** - * @def IE_PROFILING_AUTO_SCOPE(NAME) - * @ingroup ie_dev_profiling - * @brief Annotate section of code till scope exit to be profiled using known at compile time @p NAME as section id - * @param NAME Known at compile time name of section of code that is passed to profiling back end - */ -#define IE_PROFILING_AUTO_SCOPE(NAME) IE_ITT_SCOPE(IE_STR(NAME)); - - -/** - * @def IE_PROFILING_AUTO_SCOPE_TASK(PROFILING_TASK) - * @ingroup ie_dev_profiling - * @brief Annotate section of code till scope exit to be profiled runtime configured variable of ProfilingTask type. - * ProfilingTask::name will be used as section id. - * @param PROFILING_TASK variable of ProfilingTask type - */ -#define IE_PROFILING_AUTO_SCOPE_TASK(PROFILING_TASK) IE_ITT_TASK_SCOPE(PROFILING_TASK); -} // namespace InferenceEngine - -/** - * @endcond - */ diff --git a/inference-engine/src/preprocessing/ie_preprocess_data.hpp b/inference-engine/src/preprocessing/ie_preprocess_data.hpp index 969141a9d3e9ac..f0e351f09bff59 100644 --- a/inference-engine/src/preprocessing/ie_preprocess_data.hpp +++ b/inference-engine/src/preprocessing/ie_preprocess_data.hpp @@ -9,7 +9,6 @@ #include #include -#include #include #include diff --git a/inference-engine/src/preprocessing/ie_preprocess_gapi.hpp b/inference-engine/src/preprocessing/ie_preprocess_gapi.hpp index de953f46c4ff51..d2aaf707bfaf7a 100644 --- a/inference-engine/src/preprocessing/ie_preprocess_gapi.hpp +++ b/inference-engine/src/preprocessing/ie_preprocess_gapi.hpp @@ -13,7 +13,6 @@ #include #include #include -#include "ie_profiling.hpp" #include // FIXME: Move this definition back to ie_preprocess_data, diff --git a/inference-engine/src/readers/ir_reader/ie_ir_parser.cpp b/inference-engine/src/readers/ir_reader/ie_ir_parser.cpp index 05a48664f523fc..c8bf3a03a7e0d9 100644 --- a/inference-engine/src/readers/ir_reader/ie_ir_parser.cpp +++ b/inference-engine/src/readers/ir_reader/ie_ir_parser.cpp @@ -601,14 +601,14 @@ void V10Parser::parsePreProcess(CNNNetwork& network, const pugi::xml_node& root, } V10Parser::GenericLayerParams V10Parser::XmlDeserializer::parseGenericParams(const pugi::xml_node& node) { - const auto parsePort = [](const pugi::xml_node& parentNode, + const auto parsePort = [this](const pugi::xml_node& parentNode, const GenericLayerParams& params, bool input) -> GenericLayerParams::LayerPortData { GenericLayerParams::LayerPortData port; port.portId = GetIntAttr(parentNode, "id"); - for (auto node = parentNode.child("dim"); !node.empty(); node = node.next_sibling("dim")) { + FOREACH_CHILD(node, parentNode, "dim") { size_t dim = 0; const pugi::char_t* dimVal = node.child_value(); std::stringstream ss(dimVal); @@ -626,6 +626,21 @@ V10Parser::GenericLayerParams V10Parser::XmlDeserializer::parseGenericParams(con type = InferenceEngine::details::convertPrecision(preStr); } port.precision = type; + std::vector names; + if (getParameters(parentNode, "names", names)) { + for (size_t i = 0; i < names.size(); i++) { + std::string name = names[i]; + // Restore original name if it contains delimiter + // getParameters(...) returns the vector of names which were split by delimiter ',' + // but some names can contain ',' as a part of name, in this case we use '\' to escape delimiter + // the cycle below is needed in order to find names which contained delimiter and restore the original name + while (i < names.size() && names[i].at(names[i].length() - 1) == '\\') { + name.replace(names[i].length() - 1, 1, ","); + name += names[++i]; + } + port.names.emplace(name); + } + } return port; }; GenericLayerParams params; @@ -662,22 +677,7 @@ std::shared_ptr V10Parser::XmlDeserializer::createNode( const pugi::xml_node& node, const Blob::CPtr& weights, const GenericLayerParams& params) { - static const InferenceEngine::details::caseless_unordered_map> creators = { - { "ReorgYolo", std::make_shared>("ReorgYolo") }, - { "PSROIPooling", std::make_shared>("PSROIPooling") }, - { "VariadicSplit", std::make_shared>("VariadicSplit") }, - }; - - // Check that operation in default opsets - auto isDefaultOpSet = [](const std::string& version) -> bool { - static char const * prefix = "opset"; - static size_t const prefixLen = strlen(prefix); - return version.length() == prefixLen + 1 - && version.compare(0, prefixLen, prefix) == 0 - && version[prefixLen] >= '1' - && version[prefixLen] <= '6'; - }; - + // Check that inputs are correctly defined for (size_t i = 0; i < inputs.size(); i++) { if (!inputs[i].get_node()) THROW_IE_EXCEPTION << params.type << " layer " << params.name << " with id: " << params.layerId @@ -692,23 +692,7 @@ std::shared_ptr V10Parser::XmlDeserializer::createNode( // Find registerd opset auto opsetIt = opsets.find(params.version); - if (isDefaultOpSet(params.version)) { - // Try to create operation from creators - auto creatorIt = creators.find(params.type); - if (creatorIt != creators.end()) { - auto const & creator = creatorIt->second; - // Check that opset isn't registered - // or opset should contains the same version of operation - // or doesn't contain operation with current type - if (opsetIt == opsets.end() - || opsetIt->second.contains_type(creator->getNodeType()) - || !opsetIt->second.contains_type(params.type)) - ngraphNode = creator->createLayer(inputs, node, weights, params); - } - } - // Try to create operation from loaded opsets - auto version = params.version; static const std::unordered_set experimental_detectrons = {"ExperimentalDetectronDetectionOutput", "ExperimentalDetectronGenerateProposalsSingleImage", "ExperimentalDetectronPriorGridGenerator", @@ -716,27 +700,27 @@ std::shared_ptr V10Parser::XmlDeserializer::createNode( "ExperimentalDetectronTopKROIs"}; if (experimental_detectrons.count(params.type)) { - version = "opset6"; + opsetIt = opsets.find("opset6"); } - if (!ngraphNode && opsets.count(version)) { - auto opset = opsets.at(version); + if (!ngraphNode && opsetIt != opsets.end()) { auto const & type = params.type == "Const" ? "Constant" : params.type; if (params.version == "opset1") { - // MVN and ROIPooling were missing in opset1 - if (type == "MVN" || type == "ROIPooling") { + // MVN, ROIPooling and ReorgYolo were missing in opset1 + if (type == "MVN" || type == "ROIPooling" || type == "ReorgYolo") { opsetIt = opsets.find("opset2"); if (opsetIt == opsets.end()) { THROW_IE_EXCEPTION << "Cannot create " << params.type << " layer " << params.name << " id:" << params.layerId << " from unsupported opset: " << params.version; } - opset = opsetIt->second; } } + auto const& opset = opsetIt->second; + ngraphNode = std::shared_ptr(opset.create_insensitive(type)); if (!ngraphNode) { THROW_IE_EXCEPTION << "Opset " << params.version << " doesn't contain the operation with type: " << type; @@ -823,70 +807,10 @@ std::shared_ptr V10Parser::XmlDeserializer::createNode( } ngraphNode->set_friendly_name(params.name); + for (size_t i = 0; i < params.outputPorts.size() && i < ngraphNode->get_output_size(); ++i) { + if (!params.outputPorts[i].names.empty()) + ngraphNode->get_output_tensor(i).set_names(params.outputPorts[i].names); + } return ngraphNode; } - -namespace InferenceEngine { -// VariadicSplit layer -template <> -std::shared_ptr V10Parser::LayerCreator::createLayer( - const ngraph::OutputVector& inputs, const pugi::xml_node& node, const Blob::CPtr& weights, - const GenericLayerParams& layerParsePrms) { - checkParameters(inputs, layerParsePrms, 3); - return std::make_shared(inputs[0], inputs[1], inputs[2]); -} - -// DepthToSpace layer -template <> -std::shared_ptr V10Parser::LayerCreator::createLayer( - const ngraph::OutputVector& inputs, const pugi::xml_node& node, const Blob::CPtr& weights, - const GenericLayerParams& layerParsePrms) { - checkParameters(inputs, layerParsePrms, 1); - pugi::xml_node dn = node.child("data"); - - if (dn.empty()) - THROW_IE_EXCEPTION << "Cannot read parameter for " << getType() << " layer with name: " << layerParsePrms.name; - - return std::make_shared(inputs[0], GetStrAttr(dn, "mode"), GetIntAttr(dn, "block_size", 1)); -} - -// ReorgYolo layer -template <> -std::shared_ptr V10Parser::LayerCreator::createLayer( - const ngraph::OutputVector& inputs, const pugi::xml_node& node, const Blob::CPtr& weights, - const GenericLayerParams& layerParsePrms) { - checkParameters(inputs, layerParsePrms, 1); - pugi::xml_node dn = node.child("data"); - - if (dn.empty()) - THROW_IE_EXCEPTION << "Cannot read parameter for " << getType() << " layer with name: " << layerParsePrms.name; - - auto stride = GetUIntAttr(dn, "stride"); - return std::make_shared(inputs[0], ngraph::Strides {stride}); -} - -// PSROIPooling layer -template <> -std::shared_ptr V10Parser::LayerCreator::createLayer( - const ngraph::OutputVector& inputs, const pugi::xml_node& node, const Blob::CPtr& weights, - const GenericLayerParams& layerParsePrms) { - checkParameters(inputs, layerParsePrms, 2); - pugi::xml_node dn = node.child("data"); - - if (dn.empty()) - THROW_IE_EXCEPTION << "Cannot read parameter for " << getType() << " layer with name: " << layerParsePrms.name; - - auto output_dim = GetIntAttr(dn, "output_dim"); - auto group_size = GetIntAttr(dn, "group_size", 1); - auto spatial_bins_x = GetIntAttr(dn, "spatial_bins_x", 1); - auto spatial_bins_y = GetIntAttr(dn, "spatial_bins_y", 1); - auto spatial_scale = GetFloatAttr(dn, "spatial_scale"); - auto mode = GetStrAttr(dn, "mode", "average"); - - return std::make_shared(inputs[0], inputs[1], - output_dim, group_size, spatial_scale, spatial_bins_x, - spatial_bins_y, mode); -} - -} // namespace InferenceEngine diff --git a/inference-engine/src/readers/ir_reader/ie_ir_parser.hpp b/inference-engine/src/readers/ir_reader/ie_ir_parser.hpp index a97872faabeee0..43d1c4000038a4 100644 --- a/inference-engine/src/readers/ir_reader/ie_ir_parser.hpp +++ b/inference-engine/src/readers/ir_reader/ie_ir_parser.hpp @@ -69,6 +69,7 @@ class V10Parser : public IParser { // Precision and dimensions are needed only for GenericIE op ngraph::element::Type_t precision; SizeVector dims; + std::unordered_set names; }; size_t layerId; std::string version; @@ -355,4 +356,4 @@ class V10Parser : public IParser { #endif // IR_READER_V10 -} // namespace InferenceEngine \ No newline at end of file +} // namespace InferenceEngine diff --git a/inference-engine/src/readers/ir_reader_v7/ie_format_parser.cpp b/inference-engine/src/readers/ir_reader_v7/ie_format_parser.cpp index cc0310d70c33a4..90016b9790c205 100644 --- a/inference-engine/src/readers/ir_reader_v7/ie_format_parser.cpp +++ b/inference-engine/src/readers/ir_reader_v7/ie_format_parser.cpp @@ -286,7 +286,7 @@ CNNNetworkImplPtr FormatParser::Parse(pugi::xml_node& root) { std::vector inputLayers; int nodeCnt = 0; std::map layerById; - for (auto node = allLayersNode.child("layer"); !node.empty(); node = node.next_sibling("layer")) { + FOREACH_CHILD(node, allLayersNode, "layer") { LayerParseParameters lprms; ParseGenericParams(node, lprms); @@ -578,7 +578,7 @@ void FormatParser::SetWeights(const TBlob::Ptr& weights) { } void FormatParser::ParseDims(SizeVector& dims, const pugi::xml_node& parentNode) const { - for (auto node = parentNode.child("dim"); !node.empty(); node = node.next_sibling("dim")) { + FOREACH_CHILD(node, parentNode, "dim") { unsigned int dim = 0; const pugi::char_t* dimVal = node.child_value(); stringstream ss(dimVal); diff --git a/inference-engine/src/transformations/include/transformations/common_optimizations/hswish_fusion.hpp b/inference-engine/src/transformations/include/transformations/common_optimizations/hswish_fusion.hpp index a1460443f56e5f..cf6ecb25d15053 100644 --- a/inference-engine/src/transformations/include/transformations/common_optimizations/hswish_fusion.hpp +++ b/inference-engine/src/transformations/include/transformations/common_optimizations/hswish_fusion.hpp @@ -17,7 +17,9 @@ class TRANSFORMATIONS_API HSwishFusion; class TRANSFORMATIONS_API HSwishFusionWithReluDiv; class TRANSFORMATIONS_API HSwishFusionWithReluMul; class TRANSFORMATIONS_API HSwishFusionWithoutRelu; -class TRANSFORMATIONS_API HSwishFusionWithClamp; +class TRANSFORMATIONS_API HSwishFusionWithClampMul; +class TRANSFORMATIONS_API HSwishFusionWithClampDiv; +class TRANSFORMATIONS_API HSwishFusionWithHSigmoidMul; } // namespace pass @@ -57,10 +59,30 @@ class ngraph::pass::HSwishFusionWithoutRelu: public ngraph::pass::MatcherPass { * @ingroup ie_transformation_common_api * @brief HSwishFusion transformation replaces a sub-graph x * (Clamp(x + 3, 0, 6) * const(1/6)) with a HSwish op. */ -class ngraph::pass::HSwishFusionWithClamp: public ngraph::pass::MatcherPass { +class ngraph::pass::HSwishFusionWithClampMul: public ngraph::pass::MatcherPass { public: NGRAPH_RTTI_DECLARATION; - HSwishFusionWithClamp(); + HSwishFusionWithClampMul(); +}; + +/** + * @ingroup ie_transformation_common_api + * @brief HSwishFusion transformation replaces a sub-graph x * (Clamp(x + 3, 0, 6) / 6) with a HSwish op. + */ +class ngraph::pass::HSwishFusionWithClampDiv: public ngraph::pass::MatcherPass { +public: + NGRAPH_RTTI_DECLARATION; + HSwishFusionWithClampDiv(); +}; + +/** + * @ingroup ie_transformation_common_api + * @brief HSwishFusion transformation replaces a sub-graph x * HSigmoid(x) with a HSwish op. + */ +class ngraph::pass::HSwishFusionWithHSigmoidMul: public ngraph::pass::MatcherPass { +public: + NGRAPH_RTTI_DECLARATION; + HSwishFusionWithHSigmoidMul(); }; /** @@ -74,6 +96,8 @@ class ngraph::pass::HSwishFusion: public ngraph::pass::GraphRewrite { add_matcher(); add_matcher(); add_matcher(); - add_matcher(); + add_matcher(); + add_matcher(); + add_matcher(); } -}; \ No newline at end of file +}; diff --git a/inference-engine/src/transformations/include/transformations/op_conversions/batch_norm_decomposition.hpp b/inference-engine/src/transformations/include/transformations/op_conversions/batch_norm_decomposition.hpp index 7845d835cd54b9..531bacc8ded626 100644 --- a/inference-engine/src/transformations/include/transformations/op_conversions/batch_norm_decomposition.hpp +++ b/inference-engine/src/transformations/include/transformations/op_conversions/batch_norm_decomposition.hpp @@ -19,7 +19,6 @@ namespace ngraph { namespace pass { class TRANSFORMATIONS_API BatchNormDecomposition; -class TRANSFORMATIONS_API BatchNormV5Decomposition; } // namespace pass } // namespace ngraph @@ -29,9 +28,3 @@ class ngraph::pass::BatchNormDecomposition: public ngraph::pass::MatcherPass { NGRAPH_RTTI_DECLARATION; BatchNormDecomposition(); }; - -class ngraph::pass::BatchNormV5Decomposition: public ngraph::pass::MatcherPass { -public: - NGRAPH_RTTI_DECLARATION; - BatchNormV5Decomposition(); -}; diff --git a/inference-engine/src/transformations/include/transformations/op_conversions/fq_decomposition.hpp b/inference-engine/src/transformations/include/transformations/op_conversions/fq_decomposition.hpp new file mode 100644 index 00000000000000..cb545cba8ef27e --- /dev/null +++ b/inference-engine/src/transformations/include/transformations/op_conversions/fq_decomposition.hpp @@ -0,0 +1,47 @@ +// Copyright (C) 2021 Intel Corporation +// SPDX-License-Identifier: Apache-2.0 +// + +#pragma once + +#include +#include + +namespace ngraph { +namespace pass { + +class TRANSFORMATIONS_API FakeQuantizeDecomposition; + +} // namespace pass +} // namespace ngraph + +/** + * @ingroup ie_transformation_common_api + * @brief FakeQuantizeDecomposition transformation decomposes FakeQuantize layer. + * + * Expression from specification: + * if x <= min(input_low, input_high): + * output = output_low + * elif x > max(input_low, input_high): + * output = output_high + * else: + * output = round((x - input_low) / (input_high - input_low) * (levels-1)) / (levels-1) * (output_high - output_low) + output_low + * + * expand brackets into round: + * round(x * (levels-1) / (input_high - input_low) - input_low * (levels-1) / (input_high - input_low)) + * div on (levels-1) and mult on (output_high - output_low) => mult on (output_high - output_low) / (levels-1) + * + * => + * round(x * (levels-1) / (input_high - input_low) - input_low * (levels-1) / (input_high - input_low)) * (output_high - output_low) / (levels-1) + output_low + * + * This transformation doesn't support following cases: + * 1. At least one 'range' input is not Constant + * 2. At least one 'input_low' input value greater or equal than 'input_high' input value + * + */ + +class ngraph::pass::FakeQuantizeDecomposition: public ngraph::pass::MatcherPass { +public: + NGRAPH_RTTI_DECLARATION; + FakeQuantizeDecomposition(); +}; diff --git a/inference-engine/src/transformations/include/transformations/op_conversions/mvn6_decomposition.hpp b/inference-engine/src/transformations/include/transformations/op_conversions/mvn6_decomposition.hpp index 2604830755fdd2..f1b5eb7abe6f9b 100644 --- a/inference-engine/src/transformations/include/transformations/op_conversions/mvn6_decomposition.hpp +++ b/inference-engine/src/transformations/include/transformations/op_conversions/mvn6_decomposition.hpp @@ -18,7 +18,7 @@ namespace pass { /** * @ingroup ie_transformation_common_api * @brief MVN6Decomposition transformation into sub-graph x - ReduceMean(x, axes) if normalize_variance is false and - * into sub-graph (x - ReduceMean(x, axes)) / Sqrt(ReduceSum((x - ReduceMean(x, axes)) ^ 2)) if normalize_variance is true. + * into sub-graph (x - ReduceMean(x, axes)) / Sqrt(ReduceMean((x - ReduceMean(x, axes)) ^ 2)) if normalize_variance is true. */ class ngraph::pass::MVN6Decomposition : public ngraph::pass::MatcherPass { public: diff --git a/inference-engine/src/transformations/src/transformations/common_optimizations/common_optimizations.cpp b/inference-engine/src/transformations/src/transformations/common_optimizations/common_optimizations.cpp index d1a25a7f00fb27..3adefade0bd002 100644 --- a/inference-engine/src/transformations/src/transformations/common_optimizations/common_optimizations.cpp +++ b/inference-engine/src/transformations/src/transformations/common_optimizations/common_optimizations.cpp @@ -51,6 +51,7 @@ #include "transformations/op_conversions/convert_previous_nms_to_nms_5.hpp" #include "transformations/op_conversions/hsigmoid_decomposition.hpp" #include "transformations/op_conversions/log_softmax_decomposition.hpp" +#include "transformations/op_conversions/mvn6_decomposition.hpp" #include #include @@ -63,6 +64,7 @@ bool ngraph::pass::CommonOptimizations::run_on_function(std::shared_ptr(); + manager.register_pass(); manager.register_pass(); // Resolves dynamism (replaces NonZero), CF needed // TODO: move to KMB @@ -83,7 +85,6 @@ bool ngraph::pass::CommonOptimizations::run_on_function(std::shared_ptr(); common_fusions->add_matcher(); common_fusions->add_matcher(); - //common_fusions->add_matcher(); common_fusions->add_matcher(); common_fusions->add_matcher(); common_fusions->add_matcher(); @@ -115,7 +116,7 @@ bool ngraph::pass::CommonOptimizations::run_on_function(std::shared_ptradd_matcher(); decomp->add_matcher(); decomp->add_matcher(); - decomp->add_matcher(); + decomp->add_matcher(); decomp->set_name("ngraph::pass::CommonDecompositions"); // CF is required after all decompositions diff --git a/inference-engine/src/transformations/src/transformations/common_optimizations/conv_bias_fusion.cpp b/inference-engine/src/transformations/src/transformations/common_optimizations/conv_bias_fusion.cpp index e4225bdcb15367..f6f05dc08b1bca 100644 --- a/inference-engine/src/transformations/src/transformations/common_optimizations/conv_bias_fusion.cpp +++ b/inference-engine/src/transformations/src/transformations/common_optimizations/conv_bias_fusion.cpp @@ -164,7 +164,7 @@ NGRAPH_RTTI_DEFINITION(ngraph::pass::ConvAddFusion, "ConvAddFusion", 0); ngraph::pass::ConvAddFusion::ConvAddFusion() { MATCHER_SCOPE(ConvAddFusion); auto conv = ngraph::pattern::wrap_type(pattern::consumers_count(1)); - auto add = ngraph::pattern::wrap_type({conv, std::make_shared()}); + auto add = ngraph::pattern::wrap_type({conv, pattern::any_input()}); matcher_pass_callback callback = [](ngraph::pattern::Matcher &m) { return conv_callback(m); @@ -179,7 +179,7 @@ NGRAPH_RTTI_DEFINITION(ngraph::pass::ConvMultiplyFusion, "ConvMultiplyFusion", 0 ngraph::pass::ConvMultiplyFusion::ConvMultiplyFusion() { MATCHER_SCOPE(ConvMultiplyFusion); auto conv = ngraph::pattern::wrap_type(pattern::consumers_count(1)); - auto add = ngraph::pattern::wrap_type({conv, std::make_shared()}); + auto add = ngraph::pattern::wrap_type({conv, pattern::any_input()}); matcher_pass_callback callback = [](ngraph::pattern::Matcher &m) { return conv_callback(m); @@ -194,7 +194,7 @@ NGRAPH_RTTI_DEFINITION(ngraph::pass::DeconvAddFusion, "DeconvAddFusion", 0); ngraph::pass::DeconvAddFusion::DeconvAddFusion() { MATCHER_SCOPE(DeconvAddFusion); auto conv = ngraph::pattern::wrap_type(pattern::consumers_count(1)); - auto add = ngraph::pattern::wrap_type({conv, std::make_shared()}); + auto add = ngraph::pattern::wrap_type({conv, pattern::any_input()}); matcher_pass_callback callback = [](ngraph::pattern::Matcher &m){ return conv_callback(m); diff --git a/inference-engine/src/transformations/src/transformations/common_optimizations/convert_quantize_dequantize.cpp b/inference-engine/src/transformations/src/transformations/common_optimizations/convert_quantize_dequantize.cpp index b396f51895b33d..bdf73138793033 100644 --- a/inference-engine/src/transformations/src/transformations/common_optimizations/convert_quantize_dequantize.cpp +++ b/inference-engine/src/transformations/src/transformations/common_optimizations/convert_quantize_dequantize.cpp @@ -1,4 +1,4 @@ -// Copyright (C) 2020 Intel Corporation +// Copyright (C) 2020-2021 Intel Corporation // SPDX-License-Identifier: Apache-2.0 // @@ -76,6 +76,11 @@ ngraph::pass::ConvertQuantizeDequantize::ConvertQuantizeDequantize() { ngraph::matcher_pass_callback callback = [=](pattern::Matcher& m) { auto pattern_map = m.get_pattern_value_map(); + + if (transformation_callback(m.get_match_root())) { + return false; + } + auto data = pattern_map[data_pattern]; auto input_low = pattern_map[input_low_pattern]; auto input_high = pattern_map[input_high_pattern]; diff --git a/inference-engine/src/transformations/src/transformations/common_optimizations/hswish_fusion.cpp b/inference-engine/src/transformations/src/transformations/common_optimizations/hswish_fusion.cpp index 0bb0815eaf54da..4d17ca440ff387 100644 --- a/inference-engine/src/transformations/src/transformations/common_optimizations/hswish_fusion.cpp +++ b/inference-engine/src/transformations/src/transformations/common_optimizations/hswish_fusion.cpp @@ -8,7 +8,7 @@ #include -#include +#include #include #include @@ -20,22 +20,22 @@ ngraph::pass::HSwishFusionWithReluDiv::HSwishFusionWithReluDiv() { MATCHER_SCOPE(HSwishFusionWithReluDiv); // Replaces a sub-graph (x * (min(Relu(x + 3), 6)) / 6 with a HSwish op. auto input = ngraph::pattern::any_input(); - auto add_constant = ngraph::pattern::wrap_type(); - auto add = std::make_shared(input, add_constant); - auto relu = std::make_shared(add); - auto min_constant = ngraph::pattern::wrap_type(); - auto min = std::make_shared(relu, min_constant); - auto mul = std::make_shared(input, min); - auto div_constant = ngraph::pattern::wrap_type(); - auto div = std::make_shared(mul, div_constant); + auto add_constant = ngraph::pattern::wrap_type(); + auto add = std::make_shared(input, add_constant); + auto relu = std::make_shared(add); + auto min_constant = ngraph::pattern::wrap_type(); + auto min = std::make_shared(relu, min_constant); + auto mul = std::make_shared(input, min); + auto div_constant = ngraph::pattern::wrap_type(); + auto div = std::make_shared(mul, div_constant); ngraph::matcher_pass_callback callback = [=](ngraph::pattern::Matcher &m) { auto &pattern_to_output = m.get_pattern_value_map(); auto x_output = pattern_to_output.at(input); - auto add_const_value = std::dynamic_pointer_cast(pattern_to_output.at(add_constant).get_node_shared_ptr()); - auto min_const_value = std::dynamic_pointer_cast(pattern_to_output.at(min_constant).get_node_shared_ptr()); - auto div_const_value = std::dynamic_pointer_cast(pattern_to_output.at(div_constant).get_node_shared_ptr()); + auto add_const_value = std::dynamic_pointer_cast(pattern_to_output.at(add_constant).get_node_shared_ptr()); + auto min_const_value = std::dynamic_pointer_cast(pattern_to_output.at(min_constant).get_node_shared_ptr()); + auto div_const_value = std::dynamic_pointer_cast(pattern_to_output.at(div_constant).get_node_shared_ptr()); bool valid_constant_values = op::util::has_constant_value(add_const_value, 3.0) && op::util::has_constant_value(min_const_value, 6.0) @@ -45,7 +45,7 @@ ngraph::pass::HSwishFusionWithReluDiv::HSwishFusionWithReluDiv() { return false; } - auto hswish = std::make_shared(x_output); + auto hswish = std::make_shared(x_output); hswish->set_friendly_name(m.get_match_root()->get_friendly_name()); ngraph::copy_runtime_info({ pattern_to_output.at(add_constant).get_node_shared_ptr(), @@ -72,22 +72,22 @@ ngraph::pass::HSwishFusionWithReluMul::HSwishFusionWithReluMul() { MATCHER_SCOPE(HSwishFusionWithReluMul); // Replaces a sub-graph (x * (min(Relu(x + 3), 6)) * const(1/6) with a HSwish op. auto input = ngraph::pattern::any_input(); - auto add_constant = ngraph::pattern::wrap_type(); - auto add = std::make_shared(input, add_constant); - auto relu = std::make_shared(add); - auto min_constant = ngraph::pattern::wrap_type(); - auto min = std::make_shared(relu, min_constant); - auto mul_first = std::make_shared(input, min); - auto mul_constant = ngraph::pattern::wrap_type(); - auto mul_second = std::make_shared(mul_first, mul_constant); + auto add_constant = ngraph::pattern::wrap_type(); + auto add = std::make_shared(input, add_constant); + auto relu = std::make_shared(add); + auto min_constant = ngraph::pattern::wrap_type(); + auto min = std::make_shared(relu, min_constant); + auto mul_first = std::make_shared(input, min); + auto mul_constant = ngraph::pattern::wrap_type(); + auto mul_second = std::make_shared(mul_first, mul_constant); ngraph::matcher_pass_callback callback = [=](ngraph::pattern::Matcher &m) { auto &pattern_to_output = m.get_pattern_value_map(); auto x_output = pattern_to_output.at(input); - auto add_const_value = std::dynamic_pointer_cast(pattern_to_output.at(add_constant).get_node_shared_ptr()); - auto min_const_value = std::dynamic_pointer_cast(pattern_to_output.at(min_constant).get_node_shared_ptr()); - auto mul_const_value = std::dynamic_pointer_cast(pattern_to_output.at(mul_constant).get_node_shared_ptr()); + auto add_const_value = std::dynamic_pointer_cast(pattern_to_output.at(add_constant).get_node_shared_ptr()); + auto min_const_value = std::dynamic_pointer_cast(pattern_to_output.at(min_constant).get_node_shared_ptr()); + auto mul_const_value = std::dynamic_pointer_cast(pattern_to_output.at(mul_constant).get_node_shared_ptr()); bool valid_constant_values = op::util::has_constant_value(add_const_value, 3.0f) && op::util::has_constant_value(min_const_value, 6.0f) @@ -97,7 +97,7 @@ ngraph::pass::HSwishFusionWithReluMul::HSwishFusionWithReluMul() { return false; } - auto hswish = std::make_shared(x_output); + auto hswish = std::make_shared(x_output); hswish->set_friendly_name(m.get_match_root()->get_friendly_name()); ngraph::copy_runtime_info({ pattern_to_output.at(add_constant).get_node_shared_ptr(), @@ -124,24 +124,24 @@ ngraph::pass::HSwishFusionWithoutRelu::HSwishFusionWithoutRelu() { MATCHER_SCOPE(HSwishFusionWithoutRelu); // Replaces a sub-graph x * (min(max(x + 3, 0), 6) / 6) with a HSwish op. auto input = ngraph::pattern::any_input(); - auto add_constant = ngraph::pattern::wrap_type(); - auto add = std::make_shared(input, add_constant); - auto max_constant = ngraph::pattern::wrap_type(); - auto max = std::make_shared(add, max_constant); - auto min_constant = ngraph::pattern::wrap_type(); - auto min = std::make_shared(max, min_constant); - auto div_constant = ngraph::pattern::wrap_type(); - auto div = std::make_shared(min, div_constant); - auto mul = std::make_shared(input, div); + auto add_constant = ngraph::pattern::wrap_type(); + auto add = std::make_shared(input, add_constant); + auto max_constant = ngraph::pattern::wrap_type(); + auto max = std::make_shared(add, max_constant); + auto min_constant = ngraph::pattern::wrap_type(); + auto min = std::make_shared(max, min_constant); + auto div_constant = ngraph::pattern::wrap_type(); + auto div = std::make_shared(min, div_constant); + auto mul = std::make_shared(input, div); ngraph::matcher_pass_callback callback = [=](ngraph::pattern::Matcher &m) { auto &pattern_to_output = m.get_pattern_value_map(); auto x_output = pattern_to_output.at(input); - auto add_const_value = std::dynamic_pointer_cast(pattern_to_output.at(add_constant).get_node_shared_ptr()); - auto max_const_value = std::dynamic_pointer_cast(pattern_to_output.at(max_constant).get_node_shared_ptr()); - auto min_const_value = std::dynamic_pointer_cast(pattern_to_output.at(min_constant).get_node_shared_ptr()); - auto div_const_value = std::dynamic_pointer_cast(pattern_to_output.at(div_constant).get_node_shared_ptr()); + auto add_const_value = std::dynamic_pointer_cast(pattern_to_output.at(add_constant).get_node_shared_ptr()); + auto max_const_value = std::dynamic_pointer_cast(pattern_to_output.at(max_constant).get_node_shared_ptr()); + auto min_const_value = std::dynamic_pointer_cast(pattern_to_output.at(min_constant).get_node_shared_ptr()); + auto div_const_value = std::dynamic_pointer_cast(pattern_to_output.at(div_constant).get_node_shared_ptr()); bool valid_constant_values = op::util::has_constant_value(add_const_value, 3.0f) && op::util::has_constant_value(max_const_value, 0.0f) @@ -152,7 +152,7 @@ ngraph::pass::HSwishFusionWithoutRelu::HSwishFusionWithoutRelu() { return false; } - auto hswish = std::make_shared(x_output); + auto hswish = std::make_shared(x_output); hswish->set_friendly_name(m.get_match_root()->get_friendly_name()); ngraph::copy_runtime_info({ pattern_to_output.at(add_constant).get_node_shared_ptr(), @@ -174,25 +174,25 @@ ngraph::pass::HSwishFusionWithoutRelu::HSwishFusionWithoutRelu() { register_matcher(m, callback); } -NGRAPH_RTTI_DEFINITION(ngraph::pass::HSwishFusionWithClamp, "HSwishFusionWithClamp", 0); +NGRAPH_RTTI_DEFINITION(ngraph::pass::HSwishFusionWithClampMul, "HSwishFusionWithClampMul", 0); -ngraph::pass::HSwishFusionWithClamp::HSwishFusionWithClamp() { - MATCHER_SCOPE(HSwishFusionWithClamp); +ngraph::pass::HSwishFusionWithClampMul::HSwishFusionWithClampMul() { + MATCHER_SCOPE(HSwishFusionWithClampMul); // Replaces a sub-graph x * (Clamp(x + 3, 0, 6) * const(1/6)) with a HSwish op. auto input = ngraph::pattern::any_input(); - auto add_constant = ngraph::pattern::wrap_type(); - auto add = std::make_shared(input, add_constant); - auto clamp = std::make_shared(add, 0.0f, 6.0f); - auto mul_constant = ngraph::pattern::wrap_type(); - auto mul_first = std::make_shared(clamp, mul_constant); - auto mul_second = std::make_shared(input, mul_first); + auto add_constant = ngraph::pattern::wrap_type(); + auto add = std::make_shared(input, add_constant); + auto clamp = std::make_shared(add, 0.0f, 6.0f); + auto mul_constant = ngraph::pattern::wrap_type(); + auto mul_first = std::make_shared(clamp, mul_constant); + auto mul_second = std::make_shared(input, mul_first); ngraph::matcher_pass_callback callback = [=](ngraph::pattern::Matcher &m) { auto &pattern_to_output = m.get_pattern_value_map(); auto x_output = pattern_to_output.at(input); - auto add_const_value = std::dynamic_pointer_cast(pattern_to_output.at(add_constant).get_node_shared_ptr()); - auto mul_const_value = std::dynamic_pointer_cast(pattern_to_output.at(mul_constant).get_node_shared_ptr()); + auto add_const_value = std::dynamic_pointer_cast(pattern_to_output.at(add_constant).get_node_shared_ptr()); + auto mul_const_value = std::dynamic_pointer_cast(pattern_to_output.at(mul_constant).get_node_shared_ptr()); bool valid_constant_values = op::util::has_constant_value(add_const_value, 3.0) && op::util::has_constant_value(mul_const_value, (1.0/6.0), 0.0001); @@ -201,7 +201,7 @@ ngraph::pass::HSwishFusionWithClamp::HSwishFusionWithClamp() { return false; } - auto hswish = std::make_shared(x_output); + auto hswish = std::make_shared(x_output); hswish->set_friendly_name(m.get_match_root()->get_friendly_name()); ngraph::copy_runtime_info({ pattern_to_output.at(add_constant).get_node_shared_ptr(), @@ -219,3 +219,71 @@ ngraph::pass::HSwishFusionWithClamp::HSwishFusionWithClamp() { auto m = std::make_shared(mul_second, matcher_name); register_matcher(m, callback); } + +NGRAPH_RTTI_DEFINITION(ngraph::pass::HSwishFusionWithClampDiv, "HSwishFusionWithClampDiv", 0); + +ngraph::pass::HSwishFusionWithClampDiv::HSwishFusionWithClampDiv() { + MATCHER_SCOPE(HSwishFusionWithClampDiv); + // Replaces a sub-graph x * (Clamp(x + 3, 0, 6) / 6) with a HSwish op. + auto input = ngraph::pattern::any_input(); + auto add_constant = ngraph::pattern::wrap_type(); + auto add = std::make_shared(input, add_constant); + auto clamp = std::make_shared(add, 0.0f, 6.0f); + auto div_constant = ngraph::pattern::wrap_type(); + auto div = std::make_shared(clamp, div_constant); + auto mul = std::make_shared(input, div); + + ngraph::matcher_pass_callback callback = [=](ngraph::pattern::Matcher &m) { + auto &pattern_to_output = m.get_pattern_value_map(); + auto x_output = pattern_to_output.at(input); + + auto add_const_value = std::dynamic_pointer_cast(pattern_to_output.at(add_constant).get_node_shared_ptr()); + auto div_const_value = std::dynamic_pointer_cast(pattern_to_output.at(div_constant).get_node_shared_ptr()); + + bool valid_constant_values = op::util::has_constant_value(add_const_value, 3.0) + && op::util::has_constant_value(div_const_value, 6.0); + if (!valid_constant_values) { + return false; + } + + auto hswish = std::make_shared(x_output); + + hswish->set_friendly_name(m.get_match_root()->get_friendly_name()); + ngraph::copy_runtime_info({ pattern_to_output.at(add).get_node_shared_ptr(), + pattern_to_output.at(clamp).get_node_shared_ptr(), + pattern_to_output.at(div).get_node_shared_ptr(), + pattern_to_output.at(mul).get_node_shared_ptr() + }, + hswish); + ngraph::replace_node(m.get_match_root(), hswish); + return true; + }; + + auto m = std::make_shared(mul, matcher_name); + register_matcher(m, callback); +} + +NGRAPH_RTTI_DEFINITION(ngraph::pass::HSwishFusionWithHSigmoidMul, "HSwishFusionWithHSigmoidMul", 0); + +ngraph::pass::HSwishFusionWithHSigmoidMul::HSwishFusionWithHSigmoidMul() { + MATCHER_SCOPE(HSwishFusionWithHSigmoidMul); + // Replaces a sub-graph x * HSigmoid(x) with a HSwish op. + auto input = ngraph::pattern::any_input(); + auto hsigmoid_pattern = ngraph::pattern::wrap_type({input}, ngraph::pattern::consumers_count(1)); + auto mul_pattern = ngraph::pattern::wrap_type({input, hsigmoid_pattern}); + + ngraph::matcher_pass_callback callback = [=](ngraph::pattern::Matcher &m) { + const auto& pattern_to_output = m.get_pattern_value_map(); + auto hsigmoid = pattern_to_output.at(hsigmoid_pattern).get_node_shared_ptr(); + auto mul = pattern_to_output.at(mul_pattern).get_node_shared_ptr(); + + auto hswish = std::make_shared(pattern_to_output.at(input)); + hswish->set_friendly_name(mul->get_friendly_name()); + ngraph::copy_runtime_info({hsigmoid, mul}, hswish); + ngraph::replace_node(mul, hswish); + return true; + }; + + auto m = std::make_shared(mul_pattern, matcher_name); + register_matcher(m, callback); +} diff --git a/inference-engine/src/transformations/src/transformations/control_flow/unroll_tensor_iterator.cpp b/inference-engine/src/transformations/src/transformations/control_flow/unroll_tensor_iterator.cpp index f7a54157e5f047..c2db73ea2ceef6 100644 --- a/inference-engine/src/transformations/src/transformations/control_flow/unroll_tensor_iterator.cpp +++ b/inference-engine/src/transformations/src/transformations/control_flow/unroll_tensor_iterator.cpp @@ -1,4 +1,4 @@ -// Copyright (C) 2020 Intel Corporation +// Copyright (C) 2021 Intel Corporation // SPDX-License-Identifier: Apache-2.0 // @@ -129,8 +129,10 @@ bool ngraph::pass::UnrollTensorIterator::run_on_function(std::shared_ptroutput(0).get_tensor().set_name( op::util::create_ie_output_name(ti->output(concat_desc->m_output_index))); + NGRAPH_SUPPRESS_DEPRECATED_END // connect the Concat layer to the corresponding TI outputs for (auto &input : ti->output(concat_desc->m_output_index).get_target_inputs()) { input.replace_source_output(concat); @@ -140,7 +142,9 @@ bool ngraph::pass::UnrollTensorIterator::run_on_function(std::shared_ptr result = body_functions[0]->get_results().at(concat_desc->m_body_value_index); const auto& input_to_res = result->get_input_source_output(0); // set output name to Tensor to store it for ngraph to cnn conversion + NGRAPH_SUPPRESS_DEPRECATED_START input_to_res.get_tensor().set_name(op::util::create_ie_output_name(ti->output(concat_desc->m_output_index))); + NGRAPH_SUPPRESS_DEPRECATED_END for (auto &input : ti->output(concat_desc->m_output_index).get_target_inputs()) { input.replace_source_output(input_to_res); } @@ -153,7 +157,9 @@ bool ngraph::pass::UnrollTensorIterator::run_on_function(std::shared_ptrinput_value(0); // set output name to Tensor to store it for ngraph to cnn conversion + NGRAPH_SUPPRESS_DEPRECATED_START in_value.get_tensor().set_name(op::util::create_ie_output_name(ti->output(output_desc->m_output_index))); + NGRAPH_SUPPRESS_DEPRECATED_END for (const auto &input : ti->output(output_desc->m_output_index).get_target_inputs()) { input.replace_source_output(result->get_input_source_output(0)); } diff --git a/inference-engine/src/transformations/src/transformations/op_conversions/batch_norm_decomposition.cpp b/inference-engine/src/transformations/src/transformations/op_conversions/batch_norm_decomposition.cpp index 8907487b8b66a9..a7efe7960b948b 100644 --- a/inference-engine/src/transformations/src/transformations/op_conversions/batch_norm_decomposition.cpp +++ b/inference-engine/src/transformations/src/transformations/op_conversions/batch_norm_decomposition.cpp @@ -19,7 +19,7 @@ NGRAPH_RTTI_DEFINITION(ngraph::pass::BatchNormDecomposition, "BatchNormDecomposi ngraph::pass::BatchNormDecomposition::BatchNormDecomposition() { MATCHER_SCOPE(BatchNormDecomposition); - auto bn = pattern::wrap_type({ + auto bn = pattern::wrap_type({ pattern::any_input(pattern::has_static_rank()), pattern::any_input(pattern::has_static_shape()), pattern::any_input(pattern::has_static_shape()), @@ -28,20 +28,30 @@ ngraph::pass::BatchNormDecomposition::BatchNormDecomposition() { }); ngraph::matcher_pass_callback callback = [this](ngraph::pattern::Matcher &m) { - auto m_bn = dynamic_pointer_cast(m.get_match_root()); - if (!m_bn) { + auto m_bn = m.get_match_root(); + Output m_input, m_gamma, m_beta, m_mean, m_var; + double eps; + if (auto m_bn_v1 = dynamic_pointer_cast(m_bn)) { + m_gamma = m_bn_v1->input_value(0); + m_beta = m_bn_v1->input_value(1); + m_input = m_bn_v1->input_value(2); + m_mean = m_bn_v1->input_value(3); + m_var = m_bn_v1->input_value(4); + eps = m_bn_v1->get_eps_value(); + } else if (auto m_bn_v5 = dynamic_pointer_cast(m_bn)) { + m_input = m_bn_v5->input_value(0); + m_gamma = m_bn_v5->input_value(1); + m_beta = m_bn_v5->input_value(2); + m_mean = m_bn_v5->input_value(3); + m_var = m_bn_v5->input_value(4); + eps = m_bn_v5->get_eps_value(); + } else { return false; } - auto m_gamma = m_bn->input_value(0); - auto m_beta = m_bn->input_value(1); - auto m_input = m_bn->input_value(2); - auto m_mean = m_bn->input_value(3); - auto m_var = m_bn->input_value(4); - const auto& input_type = m_input.get_element_type(); // scale_add = variance + eps - auto scale_add = make_shared(m_var, opset5::Constant::create(input_type, Shape{}, {m_bn->get_eps_value()})); + auto scale_add = make_shared(m_var, opset5::Constant::create(input_type, Shape{}, {eps})); // scale = sqrt(variance + eps) auto scale = make_shared(scale_add); // Divide `gamma` by `sqrt(variance + eps)` @@ -79,67 +89,3 @@ ngraph::pass::BatchNormDecomposition::BatchNormDecomposition() { this->register_matcher(m, callback); } -NGRAPH_RTTI_DEFINITION(ngraph::pass::BatchNormV5Decomposition, "BatchNormDecomposition", 5); - -// TODO: this pass will be unified with BatchNormDecomposition pass -ngraph::pass::BatchNormV5Decomposition::BatchNormV5Decomposition() { - MATCHER_SCOPE(BatchNormV5Decomposition); - auto bn = pattern::wrap_type({ - pattern::any_input(pattern::has_static_rank()), - pattern::any_input(pattern::has_static_shape()), - pattern::any_input(pattern::has_static_shape()), - pattern::any_input(pattern::has_static_shape()), - pattern::any_input(pattern::has_static_shape()) - }); - - ngraph::matcher_pass_callback callback = [this](ngraph::pattern::Matcher &m) { - auto m_bn = dynamic_pointer_cast(m.get_match_root()); - if (!m_bn) { - return false; - } - - auto m_input = m_bn->input_value(0); - auto m_gamma = m_bn->input_value(1); - auto m_beta = m_bn->input_value(2); - auto m_mean = m_bn->input_value(3); - auto m_var = m_bn->input_value(4); - - const auto& input_type = m_input.get_element_type(); - // scale_add = variance + eps - auto scale_add = make_shared(m_var, opset5::Constant::create(input_type, Shape{}, {m_bn->get_eps_value()})); - // scale = sqrt(variance + eps) - auto scale = make_shared(scale_add); - // Divide `gamma` by `sqrt(variance + eps)` - auto gamma_div_scale = std::make_shared(m_gamma, scale); - - int64_t dims_to_add = m_input.get_partial_shape().rank().get_length() - 2; - - // TODO: instead of getting full shape we can concatenate sequence of ones with ShapeOf - Shape input_aligned_shape = m_gamma.get_shape(); - for (int64_t i = 0; i < dims_to_add; ++i) - input_aligned_shape.push_back(1); - auto new_shape = opset5::Constant::create(element::i64, Shape{input_aligned_shape.size()}, input_aligned_shape); - - auto gamma_div_scale_aligned = make_shared(gamma_div_scale, new_shape, true); - auto beta_aligned = make_shared(m_beta, new_shape, true); - auto mean_aligned = make_shared(m_mean, new_shape, true); - - // input_sub_mean = input - mean - auto input_sub_mean = register_new_node(m_input, mean_aligned); - // Multiply `input - mean` and `gamma / sqrt(variance + eps)` - auto mul = std::make_shared(input_sub_mean, gamma_div_scale_aligned); - // Add `(input - mean) * gamma / sqrt(variance + eps)` and `beta` - auto add = std::make_shared(mul, beta_aligned); - - add->set_friendly_name(m_bn->get_friendly_name()); - - copy_runtime_info(m_bn, {scale_add, scale, gamma_div_scale, gamma_div_scale_aligned, - beta_aligned, input_sub_mean, mul, add}); - - replace_node(m_bn, add); - - return true; - }; - auto m = std::make_shared(bn, matcher_name); - this->register_matcher(m, callback); -} diff --git a/inference-engine/src/transformations/src/transformations/op_conversions/convert_gelu.cpp b/inference-engine/src/transformations/src/transformations/op_conversions/convert_gelu.cpp index 2ee04f4f654c36..a3c683d1537a9d 100644 --- a/inference-engine/src/transformations/src/transformations/op_conversions/convert_gelu.cpp +++ b/inference-engine/src/transformations/src/transformations/op_conversions/convert_gelu.cpp @@ -10,13 +10,13 @@ #include #include +#include NGRAPH_RTTI_DEFINITION(ngraph::pass::ConvertGELU, "ConvertGELU", 0); ngraph::pass::ConvertGELU::ConvertGELU() { MATCHER_SCOPE(ConvertGELU); - auto input = std::make_shared(element::f32, Shape{}); - auto gelu = std::make_shared(input); + auto gelu = pattern::wrap_type(); ngraph::matcher_pass_callback callback = [this](pattern::Matcher& m) { auto gelu = std::dynamic_pointer_cast(m.get_match_root()); diff --git a/inference-engine/src/transformations/src/transformations/op_conversions/convert_shuffle_channels3.cpp b/inference-engine/src/transformations/src/transformations/op_conversions/convert_shuffle_channels3.cpp index 4b4d9f901fd661..ec80a640a5dae1 100644 --- a/inference-engine/src/transformations/src/transformations/op_conversions/convert_shuffle_channels3.cpp +++ b/inference-engine/src/transformations/src/transformations/op_conversions/convert_shuffle_channels3.cpp @@ -11,6 +11,7 @@ #include #include #include +#include using namespace ngraph; @@ -18,8 +19,7 @@ NGRAPH_RTTI_DEFINITION(ngraph::pass::ConvertShuffleChannels3, "ConvertShuffleCha ngraph::pass::ConvertShuffleChannels3::ConvertShuffleChannels3() { MATCHER_SCOPE(ConvertShuffleChannels3); - auto input = std::make_shared(element::f32, Shape{1, 1, 1, 1}); - auto shuffle_channels = std::make_shared<::opset3::ShuffleChannels>(input); + auto shuffle_channels = pattern::wrap_type(); ngraph::matcher_pass_callback callback = [this](pattern::Matcher &m) { auto shuffle_channels = std::dynamic_pointer_cast<::opset3::ShuffleChannels>(m.get_match_root()); diff --git a/inference-engine/src/transformations/src/transformations/op_conversions/convert_subtract.cpp b/inference-engine/src/transformations/src/transformations/op_conversions/convert_subtract.cpp index a9d18261b5c0d5..7080688b09c409 100644 --- a/inference-engine/src/transformations/src/transformations/op_conversions/convert_subtract.cpp +++ b/inference-engine/src/transformations/src/transformations/op_conversions/convert_subtract.cpp @@ -1,4 +1,4 @@ -// Copyright (C) 2018-2020 Intel Corporation +// Copyright (C) 2018-2021 Intel Corporation // SPDX-License-Identifier: Apache-2.0 // @@ -18,8 +18,12 @@ ngraph::pass::ConvertSubtract::ConvertSubtract() { MATCHER_SCOPE(ConvertSubtract); auto sub = ngraph::pattern::wrap_type(); - ngraph::matcher_pass_callback callback = [](pattern::Matcher& m) { - auto sub = std::dynamic_pointer_cast (m.get_match_root()); + ngraph::matcher_pass_callback callback = [=](pattern::Matcher& m) { + if (transformation_callback(m.get_match_root())) { + return false; + } + + auto sub = std::dynamic_pointer_cast(m.get_match_root()); if (!sub) { return false; } diff --git a/inference-engine/src/transformations/src/transformations/op_conversions/convert_ti_to_sequences.cpp b/inference-engine/src/transformations/src/transformations/op_conversions/convert_ti_to_sequences.cpp index c30123803b8197..2a9c72b59cb990 100644 --- a/inference-engine/src/transformations/src/transformations/op_conversions/convert_ti_to_sequences.cpp +++ b/inference-engine/src/transformations/src/transformations/op_conversions/convert_ti_to_sequences.cpp @@ -1,4 +1,4 @@ -// Copyright (C) 2020 Intel Corporation +// Copyright (C) 2021 Intel Corporation // SPDX-License-Identifier: Apache-2.0 // @@ -24,8 +24,8 @@ NGRAPH_RTTI_DEFINITION(ngraph::pass::ConvertTensorIteratorToGRUSequence, "Conver ngraph::pass::ConvertTensorIteratorToLSTMSequence::ConvertTensorIteratorToLSTMSequence() { MATCHER_SCOPE(ConvertTensorIteratorToLSTMSequence); - auto tensor_iterator = std::make_shared(ngraph::element::f32, - ngraph::Shape{}, ngraph::pattern::has_class()); + auto tensor_iterator = pattern::wrap_type(); + ngraph::matcher_pass_callback callback = [this](pattern::Matcher &m) { auto ti = std::dynamic_pointer_cast(m.get_match_root()); if (!ti || transformation_callback(ti)) @@ -180,7 +180,9 @@ ngraph::pass::ConvertTensorIteratorToLSTMSequence::ConvertTensorIteratorToLSTMSe for (const auto &input : ti->output(ordered_out_descs[i]->m_output_index).get_target_inputs()) { input.replace_source_output(outputs[i]->output(0)); } + NGRAPH_SUPPRESS_DEPRECATED_START outputs[i]->get_output_tensor(0).set_name(op::util::create_ie_output_name(ti->output(ordered_out_descs[i]->m_output_index))); + NGRAPH_SUPPRESS_DEPRECATED_END } } @@ -199,8 +201,8 @@ ngraph::pass::ConvertTensorIteratorToLSTMSequence::ConvertTensorIteratorToLSTMSe ngraph::pass::ConvertTensorIteratorToRNNSequence::ConvertTensorIteratorToRNNSequence() { MATCHER_SCOPE(ConvertTensorIteratorToRNNSequence); - auto tensor_iterator = std::make_shared(ngraph::element::f32, - ngraph::Shape{}, ngraph::pattern::has_class()); + auto tensor_iterator = pattern::wrap_type(); + ngraph::matcher_pass_callback callback = [this](pattern::Matcher &m) { auto ti = std::dynamic_pointer_cast(m.get_match_root()); if (!ti || transformation_callback(ti)) @@ -334,7 +336,9 @@ ngraph::pass::ConvertTensorIteratorToRNNSequence::ConvertTensorIteratorToRNNSequ for (const auto &input : ti->output(ordered_out_descs[i]->m_output_index).get_target_inputs()) { input.replace_source_output(outputs[i]->output(0)); } + NGRAPH_SUPPRESS_DEPRECATED_START outputs[i]->get_output_tensor(0).set_name(op::util::create_ie_output_name(ti->output(ordered_out_descs[i]->m_output_index))); + NGRAPH_SUPPRESS_DEPRECATED_END } } @@ -353,8 +357,8 @@ ngraph::pass::ConvertTensorIteratorToRNNSequence::ConvertTensorIteratorToRNNSequ ngraph::pass::ConvertTensorIteratorToGRUSequence::ConvertTensorIteratorToGRUSequence() { MATCHER_SCOPE(ConvertTensorIteratorToGRUSequence); - auto tensor_iterator = std::make_shared(ngraph::element::f32, - ngraph::Shape{}, ngraph::pattern::has_class()); + auto tensor_iterator = pattern::wrap_type(); + ngraph::matcher_pass_callback callback = [this](pattern::Matcher &m) { auto ti = std::dynamic_pointer_cast(m.get_match_root()); if (!ti || transformation_callback(ti)) @@ -489,7 +493,9 @@ ngraph::pass::ConvertTensorIteratorToGRUSequence::ConvertTensorIteratorToGRUSequ for (const auto &input : ti->output(ordered_out_descs[i]->m_output_index).get_target_inputs()) { input.replace_source_output(outputs[i]->output(0)); } + NGRAPH_SUPPRESS_DEPRECATED_START outputs[i]->get_output_tensor(0).set_name(op::util::create_ie_output_name(ti->output(ordered_out_descs[i]->m_output_index))); + NGRAPH_SUPPRESS_DEPRECATED_END } } diff --git a/inference-engine/src/transformations/src/transformations/op_conversions/fq_decomposition.cpp b/inference-engine/src/transformations/src/transformations/op_conversions/fq_decomposition.cpp new file mode 100644 index 00000000000000..ab4e91e1b324f7 --- /dev/null +++ b/inference-engine/src/transformations/src/transformations/op_conversions/fq_decomposition.cpp @@ -0,0 +1,124 @@ +// Copyright (C) 2021 Intel Corporation +// SPDX-License-Identifier: Apache-2.0 +// + +#include "itt.hpp" +#include "transformations/op_conversions/fq_decomposition.hpp" + +#include +#include +#include +#include +#include + +#include + +NGRAPH_RTTI_DEFINITION(ngraph::pass::FakeQuantizeDecomposition, "FakeQuantizeDecomposition", 0); + +bool isValidRangesInputs(const std::shared_ptr &fq) { + auto il = fq->input_value(1); + auto ih = fq->input_value(2); + auto greater_equal = std::make_shared(il, ih); + + ngraph::OutputVector result(1); + if (!greater_equal->constant_fold(result, greater_equal->input_values())) + return false; + + auto res_node = std::dynamic_pointer_cast(result[0].get_node_shared_ptr()); + + const std::vector comp_result = res_node->cast_vector(); + + return !std::any_of(comp_result.begin(), comp_result.end(), [](const bool value) { return value; }); +} + +ngraph::pass::FakeQuantizeDecomposition::FakeQuantizeDecomposition() { + MATCHER_SCOPE(FakeQuantizeDecomposition); + auto data = ngraph::pattern::any_input(); + auto il = ngraph::pattern::wrap_type(); + auto ih = ngraph::pattern::wrap_type(); + auto ol = ngraph::pattern::wrap_type(); + auto oh = ngraph::pattern::wrap_type(); + auto fake_quantize = ngraph::pattern::wrap_type({data, il, ih, ol, oh}); + + ngraph::matcher_pass_callback callback = [=](ngraph::pattern::Matcher &m) { + auto &pattern_to_output = m.get_pattern_value_map(); + const auto fake_quantize_node = std::dynamic_pointer_cast(pattern_to_output.at(fake_quantize).get_node_shared_ptr()); + + if (fake_quantize_node == nullptr || transformation_callback(fake_quantize_node) || !isValidRangesInputs(fake_quantize_node)) { + return false; + } + + Output data{fake_quantize_node->input_value(0)}; + const Output input_low{fake_quantize_node->input_value(1)}; + const Output input_high{fake_quantize_node->input_value(2)}; + const Output output_low{fake_quantize_node->input_value(3)}; + const Output output_high{fake_quantize_node->input_value(4)}; + auto input_type = data.get_element_type(); + + ngraph::NodeVector decomp_ops; + if (input_type != input_low.get_element_type()) { + input_type = input_low.get_element_type(); + data = std::make_shared(data, input_type); + decomp_ops.push_back(data.get_node_shared_ptr()); + } + + // if we set input_low or input_high in formula we got output = output_low and output = output_high respectively + // so we just clamp x + const auto max = std::make_shared(data, input_low); + const auto min = std::make_shared(max, input_high); + decomp_ops.push_back(max); + decomp_ops.push_back(min); + + // (levels-1) + const auto levels_minus_one = std::make_shared(input_type, Shape{}, fake_quantize_node->get_levels() - 1); + decomp_ops.push_back(levels_minus_one); + // (input_high - input_low) + const auto subInHighLow = std::make_shared(input_high, input_low); + // (levels-1) / (input_high - input_low) + const auto isc = std::make_shared(levels_minus_one, subInHighLow); + // input_low * (levels-1) / (input_high - input_low) + const auto ish = std::make_shared(input_low, isc); + decomp_ops.push_back(subInHighLow); + decomp_ops.push_back(isc); + decomp_ops.push_back(ish); + + // x * (levels-1) / (input_high - input_low) + const auto after_isc_apply = std::make_shared(min, isc); + // x * (levels-1) / (input_high - input_low) - input_low * (levels-1) / (input_high - input_low) + const auto after_ish_apply = std::make_shared(after_isc_apply, ish); + decomp_ops.push_back(after_isc_apply); + decomp_ops.push_back(after_ish_apply); + + // round(x * (levels-1) / (input_high - input_low) - input_low * (levels-1) / (input_high - input_low)) + const auto round = std::make_shared(after_ish_apply, ngraph::opset5::Round::RoundMode::HALF_TO_EVEN); + decomp_ops.push_back(round); + + // (output_high - output_low) + const auto sub_out_high_low = std::make_shared(output_high, output_low); + // (output_high - output_low) / (levels-1) + const auto osc = std::make_shared(sub_out_high_low, levels_minus_one); + decomp_ops.push_back(sub_out_high_low); + decomp_ops.push_back(osc); + + // round(x * (levels-1) / (input_high - input_low) - input_low * (levels-1) / (input_high - input_low)) * (output_high - output_low) / (levels-1) + const auto after_osc_apply = std::make_shared(round, osc); + // round(x * (levels-1) / (input_high - input_low) - input_low * (levels-1) / (input_high - input_low)) * (output_high - output_low) / (levels-1) + + // output_low + std::shared_ptr result = std::make_shared(after_osc_apply, output_low); + decomp_ops.push_back(after_osc_apply); + decomp_ops.push_back(result); + + if (result->get_output_element_type(0) != fake_quantize_node->get_output_element_type(0)) { + result = std::make_shared(result, fake_quantize_node->get_output_element_type(0)); + decomp_ops.push_back(result); + } + + result->set_friendly_name(m.get_match_root()->get_friendly_name()); + ngraph::copy_runtime_info(fake_quantize_node, decomp_ops); + ngraph::replace_node(m.get_match_root(), result); + return true; + }; + + auto m = std::make_shared(fake_quantize, matcher_name); + register_matcher(m, callback); +} diff --git a/inference-engine/src/transformations/src/transformations/op_conversions/lstm_cell_decomposition.cpp b/inference-engine/src/transformations/src/transformations/op_conversions/lstm_cell_decomposition.cpp index 1f30d6662cf3a9..28e7d2c429d333 100644 --- a/inference-engine/src/transformations/src/transformations/op_conversions/lstm_cell_decomposition.cpp +++ b/inference-engine/src/transformations/src/transformations/op_conversions/lstm_cell_decomposition.cpp @@ -18,10 +18,8 @@ NGRAPH_RTTI_DEFINITION(ngraph::pass::LSTMCellDecomposition, "LSTMCellDecompositi ngraph::pass::LSTMCellDecomposition::LSTMCellDecomposition() { MATCHER_SCOPE(LSTMCellDecomposition); - auto is_supported_lstm_cell = [](const std::shared_ptr& n) { - return pattern::has_class()(n) || pattern::has_class()(n); - }; - auto any_lstm = std::make_shared(element::f32, Shape{}, is_supported_lstm_cell); + auto any_lstm = pattern::wrap_type(); + ngraph::matcher_pass_callback callback = [this](ngraph::pattern::Matcher& m) { auto lstm_cell = std::dynamic_pointer_cast(m.get_match_root()); if (!lstm_cell || transformation_callback(lstm_cell)) { diff --git a/inference-engine/src/transformations/src/transformations/op_conversions/mvn6_decomposition.cpp b/inference-engine/src/transformations/src/transformations/op_conversions/mvn6_decomposition.cpp index bc5b0974f7c8e1..2876dca7d4ae1f 100644 --- a/inference-engine/src/transformations/src/transformations/op_conversions/mvn6_decomposition.cpp +++ b/inference-engine/src/transformations/src/transformations/op_conversions/mvn6_decomposition.cpp @@ -17,7 +17,7 @@ ngraph::pass::MVN6Decomposition::MVN6Decomposition() { MATCHER_SCOPE(MVN6Decomposition); // Decomposes MVN(x, axes) op if normalize_variance is false into sub-graph // x - ReduceMean(x, axes), if normalize_variance is true into sub-graph - // (x - ReduceMean(x, axes)) / Sqrt(ReduceSum((x - ReduceMean(x, axes)) ^ 2)) + // (x - ReduceMean(x, axes)) / Sqrt(ReduceMean((x - ReduceMean(x, axes)) ^ 2)) auto mvn = ngraph::pattern::wrap_type(); ngraph::matcher_pass_callback callback = [=](ngraph::pattern::Matcher& m) { @@ -40,7 +40,7 @@ ngraph::pass::MVN6Decomposition::MVN6Decomposition() { ngraph::replace_node(mvn_node, mean_normalization); } else { auto mul = std::make_shared(mean_normalization, mean_normalization); - auto sum = std::make_shared(mul, axes, true); + auto mean2 = std::make_shared(mul, axes, true); auto eps = mvn_node->get_eps(); auto eps_node = ngraph::opset6::Constant::create(data.get_element_type(), ngraph::Shape{ 1 }, { eps }); @@ -51,19 +51,19 @@ ngraph::pass::MVN6Decomposition::MVN6Decomposition() { std::shared_ptr div; if (eps_mode == op::MVNEpsMode::INSIDE_SQRT) { - eps_add = std::make_shared(sum, eps_node); + eps_add = std::make_shared(mean2, eps_node); sqrt = std::make_shared(eps_add); div = std::make_shared(mean_normalization, sqrt); } else if (eps_mode == op::MVNEpsMode::OUTSIDE_SQRT) { - sqrt = std::make_shared(sum); + sqrt = std::make_shared(mean2); eps_add = std::make_shared(sqrt, eps_node); - div = std::make_shared(mean_normalization, sqrt); + div = std::make_shared(mean_normalization, eps_add); } else { return false; } div->set_friendly_name(mvn_node->get_friendly_name()); - ngraph::copy_runtime_info(mvn_node, { mean, mean_normalization, mul, sum, eps_node, eps_add, sqrt, div }); + ngraph::copy_runtime_info(mvn_node, { mean, mean_normalization, mul, mean2, eps_node, eps_add, sqrt, div }); ngraph::replace_node(mvn_node, div); } return true; diff --git a/inference-engine/src/transformations/src/transformations/serialize.cpp b/inference-engine/src/transformations/src/transformations/serialize.cpp index 06a994469fc7bb..01469fa1731048 100644 --- a/inference-engine/src/transformations/src/transformations/serialize.cpp +++ b/inference-engine/src/transformations/src/transformations/serialize.cpp @@ -472,6 +472,17 @@ std::string get_output_precision_name(ngraph::Output& o) { } } +std::string escape_delim(const std::string& name, const char delim = ',') { + std::string result_name = name; + const std::string escaped_delim = std::string("\\") + delim; + size_t index = result_name.find(delim, 0); + while (index != std::string::npos) { + result_name.replace(index, 1, escaped_delim); + index = result_name.find(delim, index + 2); + } + return result_name; +} + std::string generate_unique_name( const std::unordered_set& unique_names, std::string base_name, int suffix) { @@ -506,10 +517,20 @@ bool is_exec_graph(const ngraph::Function& f) { return false; } +bool has_dynamic_output(std::shared_ptr n) { + for (size_t i = 0; i < n->get_output_size(); i++) { + if (n->get_output_partial_shape(i).is_dynamic()) { + return true; + } + } + return false; +} + bool resolve_dynamic_shapes(const ngraph::Function& f) { const auto & f_ops = f.get_ordered_ops(); if (std::all_of(f_ops.begin(), f_ops.end(), - [](std::shared_ptr results) { return !results->is_dynamic(); })) { + [](std::shared_ptr results) { + return !results->is_dynamic() && !has_dynamic_output(results); })) { return false; } @@ -662,6 +683,15 @@ void ngfunction_2_irv10(pugi::xml_node& netXml, port.append_attribute("id").set_value(port_id++); port.append_attribute("precision") .set_value(get_output_precision_name(o).c_str()); + std::string names; + for (const auto& name : o.get_tensor().get_names()) { + if (!names.empty()) + names += ", "; + names += escape_delim(name); + } + if (!names.empty()) { + port.append_attribute("names").set_value(names.c_str()); + } for (auto d : o.get_shape()) { pugi::xml_node dim = port.append_child("dim"); dim.append_child(pugi::xml_node_type::node_pcdata) diff --git a/inference-engine/src/vpu/common/include/vpu/ngraph/operations/static_shape_loop.hpp b/inference-engine/src/vpu/common/include/vpu/ngraph/operations/static_shape_loop.hpp new file mode 100644 index 00000000000000..d6d0f24e06076d --- /dev/null +++ b/inference-engine/src/vpu/common/include/vpu/ngraph/operations/static_shape_loop.hpp @@ -0,0 +1,22 @@ +// Copyright (C) 2021 Intel Corporation +// SPDX-License-Identifier: Apache-2.0 +// + +#pragma once + +#include + +namespace ngraph { namespace vpu { namespace op { + +class StaticShapeLoop : public ngraph::opset6::Loop { +public: + NGRAPH_RTTI_DECLARATION; + + explicit StaticShapeLoop(const Loop& loop); + void validate_and_infer_types() override; + bool visit_attributes(AttributeVisitor&) override; +}; + +} // namespace op +} // namespace vpu +} // namespace ngraph diff --git a/inference-engine/src/vpu/common/include/vpu/ngraph/transformations/dynamic_to_static_shape_loop.hpp b/inference-engine/src/vpu/common/include/vpu/ngraph/transformations/dynamic_to_static_shape_loop.hpp new file mode 100644 index 00000000000000..d458909ecc088f --- /dev/null +++ b/inference-engine/src/vpu/common/include/vpu/ngraph/transformations/dynamic_to_static_shape_loop.hpp @@ -0,0 +1,16 @@ +// Copyright (C) 2021 Intel Corporation +// SPDX-License-Identifier: Apache-2.0 +// + +#pragma once + +#include "ngraph/node.hpp" + +#include + +namespace vpu { + +void validateLoop(const ngraph::Node& node); +void dynamicToStaticShapeLoop(std::shared_ptr node); + +} // namespace vpu diff --git a/inference-engine/src/vpu/common/include/vpu/ngraph/transformations/dynamic_to_static_shape_split.hpp b/inference-engine/src/vpu/common/include/vpu/ngraph/transformations/dynamic_to_static_shape_split.hpp index 54e6c573b4aa60..33e9905ff5f634 100644 --- a/inference-engine/src/vpu/common/include/vpu/ngraph/transformations/dynamic_to_static_shape_split.hpp +++ b/inference-engine/src/vpu/common/include/vpu/ngraph/transformations/dynamic_to_static_shape_split.hpp @@ -8,6 +8,7 @@ namespace vpu { +void validateSplit(const ngraph::Node& node); void dynamicToStaticShapeSplit(std::shared_ptr target); } // namespace vpu diff --git a/inference-engine/src/vpu/common/src/ngraph/operations/static_shape_loop.cpp b/inference-engine/src/vpu/common/src/ngraph/operations/static_shape_loop.cpp new file mode 100644 index 00000000000000..81914594040dc8 --- /dev/null +++ b/inference-engine/src/vpu/common/src/ngraph/operations/static_shape_loop.cpp @@ -0,0 +1,53 @@ +// Copyright (C) 2021 Intel Corporation +// SPDX-License-Identifier: Apache-2.0 +// + +#include +#include "vpu/ngraph/operations/static_shape_loop.hpp" + +namespace ngraph { namespace vpu { namespace op { + +NGRAPH_RTTI_DEFINITION(ngraph::vpu::op::StaticShapeLoop, "StaticShapeLoop", 0); + +StaticShapeLoop::StaticShapeLoop(const Loop& loop) : Loop(loop) {} + +void StaticShapeLoop::validate_and_infer_types() { + const auto isLoopStatic = [this]() { + const auto& outs = outputs(); + return !outs.empty() && std::all_of(outs.cbegin(), outs.cend(), [](const Output& output) { return output.get_partial_shape().is_static(); }); + }; + + if (isLoopStatic()) { + return; + } + + Loop::validate_and_infer_types(); + + const auto maxIterationsCountEstimation = ngraph::maximum_value(input_value(0).get_node_shared_ptr()); + NODE_VALIDATION_CHECK(this, maxIterationsCountEstimation.first, + "Encountered a loop for which upper-bound estimation for iterations count ", input_value(0), " failed"); + const auto& maxIterationsCount = maxIterationsCountEstimation.second; + NODE_VALIDATION_CHECK(this, maxIterationsCount > 0, "Encountered a loop with non-positive upper-bound estimation for iterations count ", + maxIterationsCountEstimation.second); + + const auto& body = get_function(); + for (const auto& outputDescription : get_output_descriptions()) { + if (const auto& concatOutputDescription = ngraph::as_type_ptr(outputDescription)) { + const auto& bodyOutput = body->output(concatOutputDescription->m_body_value_index); + const auto& axis = concatOutputDescription->m_axis; + auto partialShape = bodyOutput.get_partial_shape(); + partialShape[axis] *= maxIterationsCount; + + const auto& concatOutput = output(concatOutputDescription->m_output_index); + set_output_type(concatOutputDescription->m_output_index, concatOutput.get_element_type(), partialShape); + } + } +} + +bool StaticShapeLoop::visit_attributes(AttributeVisitor& visitor) { + return Loop::visit_attributes(visitor); +} + +} // namespace op +} // namespace vpu +} // namespace ngraph diff --git a/inference-engine/src/vpu/common/src/ngraph/transformations/dynamic_to_static_shape.cpp b/inference-engine/src/vpu/common/src/ngraph/transformations/dynamic_to_static_shape.cpp index 4189c79ee676fa..ba95758f036e25 100644 --- a/inference-engine/src/vpu/common/src/ngraph/transformations/dynamic_to_static_shape.cpp +++ b/inference-engine/src/vpu/common/src/ngraph/transformations/dynamic_to_static_shape.cpp @@ -24,6 +24,7 @@ #include "vpu/ngraph/transformations/dynamic_to_static_shape_unary_elementwise.hpp" #include "vpu/ngraph/transformations/dynamic_to_static_shape_unsqueeze.hpp" #include "vpu/ngraph/transformations/dynamic_to_static_shape_variadic_split.hpp" +#include "vpu/ngraph/transformations/dynamic_to_static_shape_loop.hpp" #include "vpu/ngraph/utilities.hpp" #include "vpu/utils/error.hpp" @@ -72,20 +73,23 @@ bool propagateUpperBoundFromExistingDSR(std::shared_ptr& funct return function_changed; } +using Validators = std::unordered_map>; +const Validators& getValidators() { + static const Validators validators = { + {ngraph::opset5::Split::type_info, validateSplit}, + {ngraph::opset5::VariadicSplit::type_info, validateSplit}, + {ngraph::opset6::Loop::type_info, validateLoop}, + }; + return validators; +} + void validateDynamicFunction(const ngraph::Function& function) { - for (auto const& split : function.get_ordered_ops()) { - if (split->get_type_info() != ngraph::opset5::Split::type_info && split->get_type_info() != ngraph::opset5::VariadicSplit::type_info) { + const auto& validators = getValidators(); + for (const auto& node : function.get_ordered_ops()) { + if (!validators.count(node->get_type_info())) { continue; } - - VPU_THROW_UNLESS(split->get_input_size() >= 2, "There is Split operation \"{}\" without specified axis", split->get_friendly_name()); - const auto& axis = ngraph::as_type_ptr(split->input_value(1).get_node_shared_ptr()); - VPU_THROW_UNLESS(axis != nullptr, "There is Split operation \"{}\" with dynamic axis \"{}\", but only constant axis is supported", - split->get_friendly_name(), split->input_value(1).get_node_shared_ptr()->get_friendly_name()); - const auto axisValue = ngraph::normalize_axis(split.get(), axis->cast_vector().front(), split->get_input_partial_shape(0).rank()); - VPU_THROW_UNLESS(split->get_input_partial_shape(0)[axisValue].is_static(), - "There is Split operation \"{}\" by dynamic dimension, but only split by static dimension is supported: shape = \"{}\", axis = \"{}\"", - split->get_friendly_name(), split->get_input_partial_shape(0), axisValue); + validators.at(node->get_type_info())(*node); } } @@ -142,6 +146,8 @@ const Transformations& getDefaultTransformations() { {ngraph::opset3::ReduceMin::type_info, dynamicToStaticShapeReduce}, {ngraph::opset3::ReduceProd::type_info, dynamicToStaticShapeReduce}, {ngraph::opset3::ReduceSum::type_info, dynamicToStaticShapeReduce}, + + {ngraph::opset6::Loop::type_info, dynamicToStaticShapeLoop}, }; return transformations; } diff --git a/inference-engine/src/vpu/common/src/ngraph/transformations/dynamic_to_static_shape_loop.cpp b/inference-engine/src/vpu/common/src/ngraph/transformations/dynamic_to_static_shape_loop.cpp new file mode 100644 index 00000000000000..94f6a15082c286 --- /dev/null +++ b/inference-engine/src/vpu/common/src/ngraph/transformations/dynamic_to_static_shape_loop.cpp @@ -0,0 +1,112 @@ +// Copyright (C) 2021 Intel Corporation +// SPDX-License-Identifier: Apache-2.0 +// + +#include "ngraph/opsets/opset6.hpp" +#include "vpu/ngraph/transformations/dynamic_to_static_shape_loop.hpp" +#include +#include +#include +#include + +template +bool hasDynamic(const std::vector& dataObjects) { + return std::any_of(dataObjects.cbegin(), dataObjects.cend(), [](const DataObject& data) { return data.get_partial_shape().is_dynamic(); }); +} + +namespace vpu { + +void validateLoop(const ngraph::Node& node) { + const auto& loop = dynamic_cast(node); + VPU_THROW_UNLESS(loop.get_input_size() >= 3, "Encountered operation {} with {} inputs, expected at least {} inputs", loop, loop.get_input_size(), 3); + + const auto& executionCondition = ngraph::as_type_ptr(loop.input_value(1).get_node_shared_ptr()); + VPU_THROW_UNLESS(executionCondition != nullptr, "Execution condition of a loop {} is expected to be constant true, got {}", loop, executionCondition); + const auto& executionConditionValue = executionCondition->get_vector(); + VPU_THROW_UNLESS(executionConditionValue == std::vector{true}, + "Execution condition of a loop {} is expected to be constant true, got {}", loop, executionCondition); + + for (const auto& inputDescription : loop.get_input_descriptions()) { + if (const auto& sliceInputDescription = ngraph::as_type_ptr(inputDescription)) { + const auto& sliceInput = loop.input_value(sliceInputDescription->m_input_index); + const auto& partialShape = sliceInput.get_partial_shape(); + + VPU_THROW_UNLESS(partialShape.rank().is_static(), "Slice input {} of a loop {} is expected to have static rank, got dynamic", sliceInput, loop); + const auto& rank = partialShape.rank().get_length(); + for (std::size_t dimension = 1; dimension < rank; ++dimension) { + VPU_THROW_UNLESS(partialShape[dimension].is_static(), + "Slice input {} of a loop {} is expected to have only batch as dynamic dimension, got {}", sliceInput, loop, partialShape); + } + } else if (const auto& invariantInputDescription = ngraph::as_type_ptr(inputDescription)) { + const auto& invariantInput = loop.input_value(invariantInputDescription->m_input_index); + const auto& partialShape = invariantInput.get_partial_shape(); + VPU_THROW_UNLESS(partialShape.is_static(), + "Invariant input {} of a loop {} is expected to have static shape, got {}", invariantInput, loop, partialShape); + } else { + VPU_THROW_FORMAT("Encountered unknown input type of a loop {} at index {}", loop, inputDescription->m_input_index); + } + } + + const auto& body = loop.get_function(); + for (const auto& operation : body->get_ordered_ops()) { + VPU_THROW_UNLESS(!hasDynamic(operation->inputs()) && !hasDynamic(operation->outputs()), + "Encountered a loop {} with dynamic operation {} in the body, but only static body loops are supported", loop, operation); + } + + for (const auto& outputDescription : loop.get_output_descriptions()) { + if (const auto& concatOutputDescription = ngraph::as_type_ptr(outputDescription)) { + const auto& concatOutput = loop.output(concatOutputDescription->m_output_index); + const auto& partialShape = concatOutput.get_partial_shape(); + + VPU_THROW_UNLESS(partialShape.rank().is_static(), "Concat output {} of a loop {} is expected to have static rank, got dynamic", concatOutput, loop); + const auto& rank = partialShape.rank().get_length(); + for (std::size_t dimension = 1; dimension < rank; ++dimension) { + VPU_THROW_UNLESS(partialShape[dimension].is_static(), + "Concat output {} of a loop {} is expected to have only batch as dynamic dimension, got {}", concatOutput, loop, partialShape); + } + } else if (const auto& bodyOutputDescription = ngraph::as_type_ptr(outputDescription)) { + const auto& bodyOutput = loop.output(bodyOutputDescription->m_output_index); + const auto& partialShape = bodyOutput.get_partial_shape(); + VPU_THROW_UNLESS(partialShape.is_static(), + "Body output {} of a loop {} is expected to have static shape, got {}", bodyOutput, loop, partialShape); + } else { + VPU_THROW_FORMAT("Encountered unknown output type of a loop {} at index {}", loop, outputDescription->m_output_index); + } + } +} + +void dynamicToStaticShapeLoop(std::shared_ptr node) { + const auto& loop = ngraph::as_type_ptr(node); + VPU_THROW_UNLESS(loop != nullptr, "Encountered node {}, but expected loop", node); + + const auto copied = ngraph::as_type_ptr(loop->clone_with_new_inputs(loop->input_values())); + const auto& staticShapeLoop = std::make_shared(*copied); + staticShapeLoop->validate_and_infer_types(); + const auto& iterationsCount = staticShapeLoop->input_value(0); + const auto& body = staticShapeLoop->get_function(); + for (const auto& outputDescription : loop->get_output_descriptions()) { + const auto& index = outputDescription->m_output_index; + auto replacement = staticShapeLoop->output(index).get_node_shared_ptr(); + if (const auto& concatOutputDescription = ngraph::as_type_ptr(outputDescription)) { + const auto& bodyOutput = body->get_results().at(concatOutputDescription->m_body_value_index)->input_value(0); + + VPU_THROW_UNLESS(bodyOutput.get_partial_shape().is_static(), + "Encountered loop {} with dynamic body output {}, but only static body is supported", loop, bodyOutput); + auto shape = bodyOutput.get_shape(); + const auto& axis = concatOutputDescription->m_axis; + + const auto outputShape = std::make_shared( + std::make_shared(ngraph::element::i64, ngraph::Shape{shape.size()}, shape), + std::make_shared(ngraph::element::i64, ngraph::Shape{1}, axis), + iterationsCount, + std::make_shared(ngraph::element::i64, ngraph::Shape{}, 0)); + + replacement = std::make_shared(replacement, outputShape); + } + + replacement->set_friendly_name(loop->get_friendly_name() + "." + std::to_string(index)); + loop->output(index).replace(replacement); + } +} + +} // namespace vpu diff --git a/inference-engine/src/vpu/common/src/ngraph/transformations/dynamic_to_static_shape_split.cpp b/inference-engine/src/vpu/common/src/ngraph/transformations/dynamic_to_static_shape_split.cpp index a74dc251b5b705..0b06b9033cdd6e 100644 --- a/inference-engine/src/vpu/common/src/ngraph/transformations/dynamic_to_static_shape_split.cpp +++ b/inference-engine/src/vpu/common/src/ngraph/transformations/dynamic_to_static_shape_split.cpp @@ -14,6 +14,17 @@ namespace vpu { +void validateSplit(const ngraph::Node& split) { + VPU_THROW_UNLESS(split.get_input_size() >= 2, "There is Split operation \"{}\" without specified axis", split.get_friendly_name()); + const auto& axis = ngraph::as_type_ptr(split.input_value(1).get_node_shared_ptr()); + VPU_THROW_UNLESS(axis != nullptr, "There is Split operation \"{}\" with dynamic axis \"{}\", but only constant axis is supported", + split.get_friendly_name(), split.input_value(1).get_node_shared_ptr()->get_friendly_name()); + const auto axisValue = ngraph::normalize_axis(split.description(), axis->cast_vector().front(), split.get_input_partial_shape(0).rank()); + VPU_THROW_UNLESS(split.get_input_partial_shape(0)[axisValue].is_static(), + "There is Split operation \"{}\" by dynamic dimension, but only split by static dimension is supported: shape = \"{}\", axis = \"{}\"", + split.get_friendly_name(), split.get_input_partial_shape(0), axisValue); +} + void dynamicToStaticShapeSplit(std::shared_ptr target) { const auto split = ngraph::as_type_ptr(target); VPU_THROW_UNLESS(split, diff --git a/inference-engine/src/vpu/common/src/ngraph/transformations/extract_dynamic_batch/extract_dynamic_batch.cpp b/inference-engine/src/vpu/common/src/ngraph/transformations/extract_dynamic_batch/extract_dynamic_batch.cpp index d55fe5629d42b0..5e876c3f80be3f 100644 --- a/inference-engine/src/vpu/common/src/ngraph/transformations/extract_dynamic_batch/extract_dynamic_batch.cpp +++ b/inference-engine/src/vpu/common/src/ngraph/transformations/extract_dynamic_batch/extract_dynamic_batch.cpp @@ -342,6 +342,7 @@ std::shared_ptr makeLoop(ngraph::Node* root, ngraph::Node* results.emplace_back(std::make_shared(iterationCondition)); auto body = std::make_shared(results, parameters, "body"); loop->set_function(body); + loop->set_special_body_ports({-1, static_cast(results.size()) - 1}); for (const auto& entry : slicedInputs) { loop->set_sliced_input(entry.first, entry.second, 0, 1, 1, -1, 0); } @@ -358,7 +359,6 @@ std::shared_ptr makeLoop(ngraph::Node* root, ngraph::Node* loop->get_concatenated_slices(entry, 0, 1, 1, -1, 0); } - loop->set_special_body_ports({-1, static_cast(results.size()) - 1}); loop->validate_and_infer_types(); return loop; } @@ -379,6 +379,30 @@ bool updateExternals(const ngraph::Node* source, const Nodes& allForward, const return updated; } +template +bool removeExternalConnections(ngraph::Node* source, SubGraph& topSubGraph, SubGraph& bottomSubGraph, Nodes& topExternals, Nodes& bottomExternals, + NextTop&& getNextTop, NextBottom&& getNextBottom) { + bool hasBeenUpdated = false; + + bool hasNewTopExternals = false; + bool hasNewBottomExternals = false; + do { + hasNewTopExternals = updateExternals(source, topSubGraph.all, bottomSubGraph.all, topExternals, getNextBottom); + if (hasNewTopExternals) { + topSubGraph = getLeaves(source, topExternals, getNextTop); + hasBeenUpdated = true; + } + + hasNewBottomExternals = updateExternals(source, bottomSubGraph.all, topSubGraph.all, bottomExternals, getNextTop); + if (hasNewBottomExternals) { + bottomSubGraph = getLeaves(source, bottomExternals, getNextBottom); + hasBeenUpdated = true; + } + } while (hasNewTopExternals || hasNewBottomExternals); + + return hasBeenUpdated; +} + } // namespace bool ExtractBatch::run_on_function(std::shared_ptr functionPointer) { @@ -434,32 +458,14 @@ bool ExtractBatch::run_on_function(std::shared_ptr functionPoi auto topSubGraph = getLeaves(source, topExternals, getNextTop); auto bottomSubGraph = getLeaves(source, bottomExternals, getNextBottom); - auto hasNewTopExternals = updateExternals(source, topSubGraph.all, bottomSubGraph.all, topExternals, getNextBottom); - if (hasNewTopExternals) { - topSubGraph = getLeaves(source, topExternals, getNextTop); - } - - bool hasNewBottomExternals = updateExternals(source, bottomSubGraph.all, topSubGraph.all, bottomExternals, getNextTop); - if (hasNewBottomExternals) { - bottomSubGraph = getLeaves(source, bottomExternals, getNextBottom); - } + removeExternalConnections(source, topSubGraph, bottomSubGraph, topExternals, bottomExternals, getNextTop, getNextBottom); ngraph::Node* top = nullptr; ngraph::Node* bottom = nullptr; do { getLeavesLCA(source, top, topSubGraph.all, topSubGraph.leaves, bottomSubGraph.all, getNextTop, getNextBottom); getLeavesLCA(source, bottom, bottomSubGraph.all, bottomSubGraph.leaves, topSubGraph.all, getNextBottom, getNextTop); - - hasNewTopExternals = updateExternals(source, topSubGraph.all, bottomSubGraph.all, topExternals, getNextBottom); - if (hasNewTopExternals) { - topSubGraph = getLeaves(source, topExternals, getNextTop); - } - - hasNewBottomExternals = updateExternals(source, bottomSubGraph.all, topSubGraph.all, bottomExternals, getNextTop); - if (hasNewBottomExternals) { - bottomSubGraph = getLeaves(source, bottomExternals, getNextBottom); - } - } while (hasNewTopExternals || hasNewBottomExternals); + } while (removeExternalConnections(source, topSubGraph, bottomSubGraph, topExternals, bottomExternals, getNextTop, getNextBottom)); for (const auto& node : topSubGraph.all) { if (sources.count(node)) { diff --git a/inference-engine/src/vpu/graph_transformer/include/vpu/stages/iteration_rule.hpp b/inference-engine/src/vpu/graph_transformer/include/vpu/stages/iteration_rule.hpp index 21d130043422c6..8ea01dcd241064 100644 --- a/inference-engine/src/vpu/graph_transformer/include/vpu/stages/iteration_rule.hpp +++ b/inference-engine/src/vpu/graph_transformer/include/vpu/stages/iteration_rule.hpp @@ -1,4 +1,4 @@ -// Copyright (C) 2018-2020 Intel Corporation +// Copyright (C) 2018-2021 Intel Corporation // SPDX-License-Identifier: Apache-2.0 // @@ -13,6 +13,8 @@ namespace vpu { +constexpr auto g_dynamicIterationCount = -1; + class IterationRule { public: IterationRule(Dim new_axis, int32_t new_start, int32_t new_stride, int32_t new_end) diff --git a/inference-engine/src/vpu/graph_transformer/src/frontend/custom_kernel.cpp b/inference-engine/src/vpu/graph_transformer/src/frontend/custom_kernel.cpp index 55fdf990ff38ac..3bc0082f76b673 100644 --- a/inference-engine/src/vpu/graph_transformer/src/frontend/custom_kernel.cpp +++ b/inference-engine/src/vpu/graph_transformer/src/frontend/custom_kernel.cpp @@ -196,7 +196,7 @@ CustomKernel::CustomKernel(const pugi::xml_node& kernel, std::string configDir): _maxShaves = XMLParseUtils::GetIntAttr(kernel, "max-shaves", 0); std::string fileName; - for (auto source = kernel.child("Source"); !source.empty(); source = source.next_sibling("Source")) { + FOREACH_CHILD(source, kernel, "Source") { fileName = _configDir + "/" + XMLParseUtils::GetStrAttr(source, "filename", ""); std::ifstream inputFile(fileName, std::ios::binary); @@ -307,7 +307,7 @@ void CustomKernel::processParametersNode(const pugi::xml_node& node) { const auto cmp = ie::details::CaselessEq {}; const auto parameters = node.child("Parameters"); - for (auto tensor = parameters.child("Tensor"); !tensor.empty(); tensor = tensor.next_sibling("Tensor")) { + FOREACH_CHILD(tensor, parameters, "Tensor") { KernelParam kp; auto typeStr = XMLParseUtils::GetStrAttr(tensor, "type"); @@ -340,7 +340,7 @@ void CustomKernel::processParametersNode(const pugi::xml_node& node) { _kernelParams.push_back(std::move(kp)); } - for (auto data = parameters.child("Data"); !data.empty(); data = data.next_sibling("Data")) { + FOREACH_CHILD(data, parameters, "Data") { KernelParam kp; auto typeStr = XMLParseUtils::GetStrAttr(data, "type"); @@ -377,7 +377,7 @@ void CustomKernel::processParametersNode(const pugi::xml_node& node) { _kernelParams.push_back(std::move(kp)); } - for (auto scalar = parameters.child("Scalar"); !scalar.empty(); scalar = scalar.next_sibling("Scalar")) { + FOREACH_CHILD(scalar, parameters, "Scalar") { KernelParam kp; const auto type = XMLParseUtils::GetStrAttr(scalar, "type"); diff --git a/inference-engine/src/vpu/graph_transformer/src/frontend/custom_layer.cpp b/inference-engine/src/vpu/graph_transformer/src/frontend/custom_layer.cpp index 9c6cd3254c3e29..82d237b1cdb365 100644 --- a/inference-engine/src/vpu/graph_transformer/src/frontend/custom_layer.cpp +++ b/inference-engine/src/vpu/graph_transformer/src/frontend/custom_layer.cpp @@ -160,7 +160,7 @@ CustomLayer::CustomLayer(std::string configDir, const pugi::xml_node& customLaye assertOneOrMoreOccurrence(customLayer, {"Kernel"}); auto kernelNodes = [&] { auto nodes = SmallVector{}; - for (auto kernel = customLayer.child("Kernel"); !kernel.empty(); kernel = kernel.next_sibling("Kernel")) { + FOREACH_CHILD(kernel, customLayer, "Kernel") { assertExactlyOneOccurrence(kernel, {"Parameters", "WorkSizes"}); assertOneOrMoreOccurrence(kernel, {"Source"}); nodes.push_back(kernel); diff --git a/inference-engine/src/vpu/graph_transformer/src/stages/loop_end.cpp b/inference-engine/src/vpu/graph_transformer/src/stages/loop_end.cpp index f78b216b210d5f..0f3209496b46c0 100644 --- a/inference-engine/src/vpu/graph_transformer/src/stages/loop_end.cpp +++ b/inference-engine/src/vpu/graph_transformer/src/stages/loop_end.cpp @@ -1,12 +1,10 @@ -// Copyright (C) 2018-2020 Intel Corporation +// Copyright (C) 2018-2021 Intel Corporation // SPDX-License-Identifier: Apache-2.0 // #include "vpu/frontend/frontend.hpp" #include "vpu/stages/iteration_rule.hpp" -#include -#include #include #include @@ -14,8 +12,6 @@ namespace vpu { namespace { -const int32_t dynamicIterationNum = -1; - class LoopEnd : public StageNode { public: using StageNode::StageNode; @@ -56,18 +52,19 @@ class LoopEnd : public StageNode { } void serializeParamsImpl(BlobSerializer& serializer) const override { - int32_t iterations_count = attrs().has("batchId") ? dynamicIterationNum : attrs().get("iterations-count"); - serializer.append(iterations_count); + const auto iterationsCount = static_cast(attrs().getOrDefault("iterations-count", g_dynamicIterationCount)); + serializer.append(iterationsCount); + + const auto& endCopies = attrs().getOrDefault("end-iteration-components", {}); + serializer.append(checked_cast(endCopies.size())); if (attrs().has("batchId")) { const auto batchId = attrs().get("batchId"); - const auto numDims = inputEdge(batchId)->input()->desc().numDims(); + const auto numDims = outputEdge(batchId)->output()->desc().numDims(); const auto batchDimInd = numDims - 1 - dimToIeInd(Dim::N, numDims); serializer.append(static_cast(batchDimInd)); } - const auto& endCopies = attrs().getOrDefault("end-iteration-components", {}); - serializer.append(checked_cast(endCopies.size())); for (const auto& component : endCopies) { const auto& rule = component.first.second; auto axis = rule.axis; @@ -85,7 +82,7 @@ class LoopEnd : public StageNode { if (attrs().has("batchId")) { auto batchId = attrs().get("batchId"); - inputEdge(batchId)->input()->serializeBuffer(serializer); + outputEdge(batchId)->output()->serializeBuffer(serializer); } for (const auto& iteration : endCopies) { diff --git a/inference-engine/src/vpu/graph_transformer/src/stages/loop_start.cpp b/inference-engine/src/vpu/graph_transformer/src/stages/loop_start.cpp index baaee3f858e0d7..d9031f7fe7a343 100644 --- a/inference-engine/src/vpu/graph_transformer/src/stages/loop_start.cpp +++ b/inference-engine/src/vpu/graph_transformer/src/stages/loop_start.cpp @@ -1,12 +1,10 @@ -// Copyright (C) 2018-2020 Intel Corporation +// Copyright (C) 2018-2021 Intel Corporation // SPDX-License-Identifier: Apache-2.0 // #include "vpu/frontend/frontend.hpp" #include "vpu/stages/iteration_rule.hpp" -#include -#include #include #include @@ -14,8 +12,6 @@ namespace vpu { namespace { -const int32_t dynamicIterationNum = -1; - class LoopStart : public StageNode { public: using StageNode::StageNode; @@ -44,10 +40,13 @@ class LoopStart : public StageNode { } void serializeParamsImpl(BlobSerializer& serializer) const override { - int32_t iterations_count = attrs().has("batchId") ? dynamicIterationNum : attrs().get("iterations-count"); - serializer.append(iterations_count); + const auto iterationsCount = static_cast(attrs().getOrDefault("iterations-count", g_dynamicIterationCount)); + serializer.append(iterationsCount); serializer.append(attrs().get("stages-count")); + const auto& startCopies = attrs().getOrDefault("start-iteration-components", {}); + serializer.append(checked_cast(startCopies.size())); + if (attrs().has("batchId")) { const auto batchId = attrs().get("batchId"); const auto numDims = inputEdge(batchId)->input()->desc().numDims(); @@ -55,8 +54,6 @@ class LoopStart : public StageNode { serializer.append(static_cast(batchDimInd)); } - const auto& startCopies = attrs().getOrDefault("start-iteration-components", {}); - serializer.append(checked_cast(startCopies.size())); for (const auto& component : startCopies) { const auto& rule = component.first.second; auto axis = rule.axis; diff --git a/inference-engine/src/vpu/graph_transformer/src/stages/tensor_iterator.cpp b/inference-engine/src/vpu/graph_transformer/src/stages/tensor_iterator.cpp index c242620fd45758..7a11285ac435a7 100644 --- a/inference-engine/src/vpu/graph_transformer/src/stages/tensor_iterator.cpp +++ b/inference-engine/src/vpu/graph_transformer/src/stages/tensor_iterator.cpp @@ -1,20 +1,17 @@ -// Copyright (C) 2018-2020 Intel Corporation +// Copyright (C) 2018-2021 Intel Corporation // SPDX-License-Identifier: Apache-2.0 // #include "vpu/frontend/frontend.hpp" #include "vpu/stages/iteration_rule.hpp" #include "vpu/utils/auto_scope.hpp" -#include "vpu/compile_env.hpp" #include #include "vpu/model/data_contents/ie_blob_content.hpp" #include #include -#include #include -#include #include #include #include @@ -26,6 +23,11 @@ namespace { using PortMap = ie::TensorIterator::PortMap; +constexpr auto s_curIterPort = "loop_body_current_iteration_idx"; +constexpr auto s_tripCountPort = "loop_trip_count_idx"; +constexpr auto s_initCondPort = "loop_execution_condition_idx"; +constexpr auto s_condPort = "loop_body_condition_output_idx"; + bool isIterable(const PortMap& rule) { return rule.axis != -1; } @@ -184,9 +186,9 @@ void FrontEnd::parseTensorIterator(const Model& model, const ie::CNNLayerPtr& la const auto& bodyInputs = tensorIterator->body.inputs; VPU_THROW_UNLESS(!bodyInputs.empty(), "If there is no an input for Tensor Iterator's body, so there is no iteration in tensor iterator"); - for (auto iterator = bodyInputs.begin(); iterator != bodyInputs.end(); ++iterator) { - const auto& bodyInput = *iterator; - const bool isLast = iterator == std::prev(bodyInputs.end()); + for (std::size_t bodyInputPort = 0; bodyInputPort < bodyInputs.size(); ++bodyInputPort) { + const auto& bodyInput = bodyInputs[bodyInputPort]; + const bool isLast = bodyInputPort == (bodyInputs.size() - 1); VPU_THROW_UNLESS(!isFakeHolder(bodyInput) || isLast , "There can be only one fake holder and it can be only the last Tensor Iterator body input"); if (isFakeHolder(bodyInput)) { // fake holder keeps strong references on const data objects that are not presented in Tensor Iterator's body input vector @@ -195,11 +197,11 @@ void FrontEnd::parseTensorIterator(const Model& model, const ie::CNNLayerPtr& la } VPU_THROW_UNLESS(!(isIterable(bodyInput, tensorIterator) && hasBackEdgeConnectionTo(bodyInput, tensorIterator)), - "There must not be a back-edge connection to iterable component"); + "There must not be a back-edge connection to iterable component"); const auto& tensorIteratorInputs = findTIInputsDataByBodyData(bodyInput); VPU_THROW_UNLESS(tensorIteratorInputs.size() == 1, - "There must be exactly one Tensor Iterator's input data object for each body's input data object except fake holder"); + "There must be exactly one Tensor Iterator's input data object for each body's input data object except fake holder"); const auto& tensorIteratorInput = tensorIteratorInputs.front(); if (isIterable(bodyInput, tensorIterator)) { @@ -210,8 +212,8 @@ void FrontEnd::parseTensorIterator(const Model& model, const ie::CNNLayerPtr& la } else if (hasBackEdgeConnectionTo(bodyInput, tensorIterator)) { const auto& bodyOutputs = getBodyOutputsByBodyInput(bodyInput); VPU_THROW_UNLESS(bodyOutputs.size() == 1, - "There must be exactly one Tensor Iterator's body output data object for each back-edge connection " - "with the same Tensor Iterator's body input data object"); + "There must be exactly one Tensor Iterator's body output data object for each back-edge connection " + "with the same Tensor Iterator's body input data object"); const auto& bodyOutput = bodyOutputs.front(); backedges[std::make_pair(bodyOutput, tensorIteratorInput)].push_back(bodyInput); @@ -257,6 +259,30 @@ void FrontEnd::parseTensorIterator(const Model& model, const ie::CNNLayerPtr& la } } + vpu::Optional batchIdx{}; + if (tensorIterator->params.count(s_tripCountPort)) { + VPU_THROW_UNLESS(!iterations.empty(), + "Encountered Loop which is supposed to be loop by dynamic batch (dynamic iterations count), but didn't find an iteration component"); + VPU_THROW_UNLESS(!tensorIterator->params.count(s_curIterPort), "Current iteration port for body of Loop operation is not supported"); + batchIdx = static_cast(loopStartInputs.size()); + } + + if (tensorIterator->params.count(s_initCondPort)) { + const auto& input = tensorIterator->insData[tensorIterator->GetParamAsUInt(s_initCondPort)].lock(); + VPU_THROW_UNLESS(isConst(input), "Execution condition for Loop must be constant true"); + + const auto& creator = getCreatorLayer(input).lock(); + VPU_THROW_UNLESS(creator->blobs.size() == 1, "Execution condition for Loop must contain exactly one blob, got {}", creator->blobs.size()); + + const auto& blob = creator->blobs.begin()->second; + VPU_THROW_UNLESS(blob->size() == 1, "Execution condition for Loop must be single value, got {} values", blob->size()); + VPU_THROW_UNLESS(blob->getTensorDesc().getPrecision() == InferenceEngine::Precision::I32, + "Execution condition for Loop must have I32 type, got {}", blob->getTensorDesc().getPrecision()); + + const auto value = blob->buffer().as()[0]; + VPU_THROW_UNLESS(value == 1, "Execution condition for Loop must be true, got {} as value", value); + } + IterationComponents start_iteration_components; for (const auto& iteration : iterations) { const auto& tensorIteratorInput = iteration.first.first; @@ -313,6 +339,10 @@ void FrontEnd::parseTensorIterator(const Model& model, const ie::CNNLayerPtr& la auto loopStart = _stageBuilder->addLoopStartStage(model, tensorIterator->name + "@LoopStart", loopStartInputs, loopStartOutputs); loopStart->attrs().set("start-iteration-components", start_iteration_components); + if (batchIdx.hasValue()) { + loopStart->attrs().set("batchId", batchIdx.get()); + } + for (const auto& backedge : backedges) { const auto& parent = getVpuData(backedge.first.first); VPU_THROW_UNLESS(parent != nullptr, "Loop End's inputs must be already parsed"); @@ -344,7 +374,27 @@ void FrontEnd::parseTensorIterator(const Model& model, const ie::CNNLayerPtr& la const auto& bodyOutputs = tensorIterator->body.outputs; VPU_THROW_UNLESS(!bodyOutputs.empty(), "If there is no an output for Tensor Iterator's body, so there is no iteration in tensor iterator"); - for (const auto& bodyOutput : bodyOutputs) { + for (std::size_t bodyOutputIdx = 0; bodyOutputIdx < bodyOutputs.size(); ++bodyOutputIdx) { + const auto& bodyOutput = bodyOutputs[bodyOutputIdx]; + + if (tensorIterator->params.count(s_condPort) && tensorIterator->GetParamAsUInt(s_condPort) == bodyOutputIdx) { + const auto& creator = getCreatorLayer(bodyOutput).lock(); + if (!creator) { + // ConstTransformer leaves constant without creator + // Assume it's true + continue; + } + VPU_THROW_UNLESS(isConst(bodyOutput), "Body execution condition must be constant true"); + + VPU_THROW_UNLESS(creator->blobs.size() == 1, "Body execution condition constant must have one blob"); + const auto& blob = creator->blobs.begin()->second; + VPU_THROW_UNLESS(blob->size() == 1, "Body execution condition must be single value"); + VPU_THROW_UNLESS(blob->getTensorDesc().getPrecision() == InferenceEngine::Precision::I32, "Body execution condition must be I32"); + const auto value = blob->buffer().as()[0]; + VPU_THROW_UNLESS(value == 1, "Body execution condition must be true"); + continue; + } + VPU_THROW_UNLESS(!isFakeHolder(bodyOutput), "Fake holder can be only in body's input"); const auto& tensorIteratorOutputs = findTIOutputsDataByBodyData(bodyOutput); @@ -380,6 +430,14 @@ void FrontEnd::parseTensorIterator(const Model& model, const ie::CNNLayerPtr& la auto loopEndOutputs = DataVector{}; + vpu::Optional batchIdx{}; + if (tensorIterator->params.count(s_tripCountPort)) { + VPU_THROW_UNLESS(!iterations.empty(), + "Encountered Loop which is supposed to be loop by dynamic batch (dynamic iterations count), but didn't find an iteration component"); + VPU_THROW_UNLESS(!tensorIterator->params.count(s_curIterPort), "Current iteration port for body of Loop operation is not supported"); + batchIdx = static_cast(loopEndOutputs.size()); + } + IterationComponents end_iteration_components; for (const auto& iteration : iterations) { const auto& tensorIteratorOutput = iteration.first.first; @@ -428,6 +486,10 @@ void FrontEnd::parseTensorIterator(const Model& model, const ie::CNNLayerPtr& la auto loopEnd = _stageBuilder->addLoopEndStage(model, tensorIterator->name + "@LoopEnd", loopEndInputs, loopEndOutputs); loopEnd->attrs().set("end-iteration-components", end_iteration_components); + if (batchIdx.hasValue()) { + loopEnd->attrs().set("batchId", batchIdx.get()); + } + return loopEnd; }; @@ -435,8 +497,13 @@ void FrontEnd::parseTensorIterator(const Model& model, const ie::CNNLayerPtr& la auto loopEnd = introduceLoopEnd(); auto loopStart = introduceLoopStart(); - loopStart->attrs().set("iterations-count", getNumIteration(*tensorIterator)); - loopEnd->attrs().set("iterations-count", getNumIteration(*tensorIterator)); + if (!tensorIterator->params.count(s_tripCountPort)) { + const auto iterationsCount = getNumIteration(*tensorIterator); + VPU_THROW_UNLESS(iterationsCount >= 0, "Encountered Tensor Iterator with iterations count equal to {}, but only non-negative values are supported", + iterationsCount); + loopStart->attrs().set("iterations-count", static_cast(iterationsCount)); + loopEnd->attrs().set("iterations-count", static_cast(iterationsCount)); + } // to allocate LoopEnd and LoopStart at the same time loopStart->attrs().set("loop-end", loopEnd); diff --git a/inference-engine/src/vpu/myriad_plugin/myriad_executable_network.cpp b/inference-engine/src/vpu/myriad_plugin/myriad_executable_network.cpp index ea4887dbe1fafe..300a9a0ccdf79a 100644 --- a/inference-engine/src/vpu/myriad_plugin/myriad_executable_network.cpp +++ b/inference-engine/src/vpu/myriad_plugin/myriad_executable_network.cpp @@ -17,6 +17,8 @@ using namespace InferenceEngine; +static const char importedNetworkName[] = "__importedExecutableNetworkFromBlobName"; + namespace vpu { namespace MyriadPlugin { @@ -112,8 +114,7 @@ void ExecutableNetwork::Import(std::istream& strm, return; } - // TODO: better name - char networkName[1024] = "importedNetwork"; + std::string networkName = importedNetworkName; BlobReader blobReader; blobReader.parse(_graphBlob); @@ -186,6 +187,10 @@ InferenceEngine::Parameter ExecutableNetwork::GetMetric(const std::string &name) InferenceEngine::CNNNetwork ExecutableNetwork::GetExecGraphInfo() { auto perfInfo = _executor->getPerfTimeInfo(_graphDesc._graphHandle); + if (_graphDesc._name == importedNetworkName) + THROW_IE_EXCEPTION << + "GetExecGraphInfo() can't be called for ExecutableNetwork that was imported from a compiled blob as far getting" + " original stage names, types, and topological order from the compiled blob is not implemented for now."; return buildRuntimeGraph(_graphMetaData, perfInfo); } diff --git a/inference-engine/src/vpu/myriad_plugin/myriad_executable_network.h b/inference-engine/src/vpu/myriad_plugin/myriad_executable_network.h index 571d5df4a3acc7..5319e70a9de02d 100644 --- a/inference-engine/src/vpu/myriad_plugin/myriad_executable_network.h +++ b/inference-engine/src/vpu/myriad_plugin/myriad_executable_network.h @@ -88,8 +88,7 @@ class ExecutableNetwork : public ie::ExecutableNetworkThreadSafeDefault { auto taskExecutorGetResult = getNextTaskExecutor(); auto asyncThreadSafeImpl = std::make_shared( syncRequestImpl, _taskExecutor, _callbackExecutor, taskExecutorGetResult); - asyncRequest.reset(new ie::InferRequestBase( - asyncThreadSafeImpl), + asyncRequest.reset(new ie::InferRequestBase(asyncThreadSafeImpl), [](ie::IInferRequest *p) { p->Release(); }); asyncThreadSafeImpl->SetPointerToPublicInterface(asyncRequest); return asyncRequest; diff --git a/inference-engine/src/vpu/myriad_plugin/myriad_infer_request.cpp b/inference-engine/src/vpu/myriad_plugin/myriad_infer_request.cpp index c36322c66aa753..4dc4edd054517f 100644 --- a/inference-engine/src/vpu/myriad_plugin/myriad_infer_request.cpp +++ b/inference-engine/src/vpu/myriad_plugin/myriad_infer_request.cpp @@ -280,7 +280,7 @@ void MyriadInferRequest::GetResult() { } } -void MyriadInferRequest::GetPerformanceCounts(std::map &perfMap) const { +std::map MyriadInferRequest::GetPerformanceCounts() const { auto perfInfo = _executor->getPerfTimeInfo(_graphDesc._graphHandle); if (_log->isActive(LogLevel::Info)) { @@ -289,7 +289,7 @@ void MyriadInferRequest::GetPerformanceCounts(std::map(perfInfo.size()), _config.perfReport(), _config.printReceiveTensorTime()); diff --git a/inference-engine/src/vpu/myriad_plugin/myriad_infer_request.h b/inference-engine/src/vpu/myriad_plugin/myriad_infer_request.h index 9ebd729f9cd621..dbbb4fe3f17c4f 100644 --- a/inference-engine/src/vpu/myriad_plugin/myriad_infer_request.h +++ b/inference-engine/src/vpu/myriad_plugin/myriad_infer_request.h @@ -52,8 +52,8 @@ class MyriadInferRequest : public InferenceEngine::InferRequestInternal { void InferAsync(); void GetResult(); - void - GetPerformanceCounts(std::map &perfMap) const override; + std::map + GetPerformanceCounts() const override; }; } // namespace MyriadPlugin diff --git a/inference-engine/tests/functional/inference_engine/CMakeLists.txt b/inference-engine/tests/functional/inference_engine/CMakeLists.txt index b4d3fc4ef0d913..a2ed13cb63ebee 100644 --- a/inference-engine/tests/functional/inference_engine/CMakeLists.txt +++ b/inference-engine/tests/functional/inference_engine/CMakeLists.txt @@ -1,4 +1,4 @@ -# Copyright (C) 2019 Intel Corporation +# Copyright (C) 2019-2021 Intel Corporation # # SPDX-License-Identifier: Apache-2.0 # @@ -6,6 +6,7 @@ set(TARGET_NAME ieFuncTests) set(INCLUDES ${IE_MAIN_SOURCE_DIR}/src/inference_engine) + set(LINK_LIBRARIES gmock funcTestUtils @@ -16,6 +17,7 @@ set(LINK_LIBRARIES openvino::conditional_compilation sharedTestClasses ) + set(DEPENDENCIES mock_engine inference_engine_ir_reader @@ -25,18 +27,10 @@ set(DEPENDENCIES sharedTestClasses ) -if (NGRAPH_ONNX_IMPORT_ENABLE AND NOT NGRAPH_USE_PROTOBUF_LITE) - list(APPEND INCLUDES "${OpenVINO_MAIN_SOURCE_DIR}/docs/onnx_custom_op") - list(APPEND LINK_LIBRARIES onnx_custom_op) - list(APPEND DEPENDENCIES onnx_custom_op) -else() +if (NOT NGRAPH_ONNX_IMPORT_ENABLE OR NGRAPH_USE_PROTOBUF_LITE) set(EXCLUDED_SOURCE_PATHS "${CMAKE_CURRENT_SOURCE_DIR}/onnx_reader") endif() -if (NOT NGRAPH_ONNX_IMPORT_ENABLE OR NOT ENABLE_MKL_DNN OR NGRAPH_USE_PROTOBUF_LITE) - set(EXCLUDED_SOURCE_PATHS ${EXCLUDED_SOURCE_PATHS} "${CMAKE_CURRENT_SOURCE_DIR}/extension.cpp") -endif() - addIeTargetTest( NAME ${TARGET_NAME} ROOT ${CMAKE_CURRENT_SOURCE_DIR} @@ -49,15 +43,16 @@ addIeTargetTest( IE ) -ie_faster_build(${TARGET_NAME} - PCH PRIVATE "precomp.hpp" -) - -if(TARGET inference_engine_onnx_reader) +if(NGRAPH_ONNX_IMPORT_ENABLE) + target_compile_definitions(${TARGET_NAME} PRIVATE + NGRAPH_ONNX_IMPORT_ENABLE + ONNX_TEST_MODELS="${CMAKE_CURRENT_SOURCE_DIR}/onnx_reader/models/") add_dependencies(${TARGET_NAME} inference_engine_onnx_reader) endif() -target_compile_definitions(${TARGET_NAME} PRIVATE ONNX_TEST_MODELS="${CMAKE_CURRENT_SOURCE_DIR}/onnx_reader/models/") +ie_faster_build(${TARGET_NAME} + PCH PRIVATE "precomp.hpp" +) include(CMakeParseArguments) diff --git a/inference-engine/tests/functional/inference_engine/cnn_network/convert_ngraph_to_cnn_network_tests.cpp b/inference-engine/tests/functional/inference_engine/cnn_network/convert_ngraph_to_cnn_network_tests.cpp index 5c04218fa51189..0c56b14da984ac 100644 --- a/inference-engine/tests/functional/inference_engine/cnn_network/convert_ngraph_to_cnn_network_tests.cpp +++ b/inference-engine/tests/functional/inference_engine/cnn_network/convert_ngraph_to_cnn_network_tests.cpp @@ -12,12 +12,14 @@ #include #include #include +#include #include #include #include #include #include #include +#include #include #include #include @@ -389,3 +391,77 @@ TEST(ConvertFunctionToCNNNetworkTests, NonUniqueNamesParametersNegative) { EXPECT_THAT(e.what(), testing::HasSubstr(std::string("Detected two output operations with the same name:"))); } } + +TEST(ConvertFunctionToCNNNetworkTests, IteratorForMemoryLayers) { + std::shared_ptr f(nullptr); + { + auto constReadVal = ngraph::opset6::Constant::create(ngraph::element::f32, {1, 37632}, {0}); + constReadVal->set_friendly_name("const"); + auto readVal = std::make_shared(constReadVal, "buffer_1"); + readVal->set_friendly_name("readVal_Buf1"); + + auto constVarSplit1 = ngraph::opset6::Constant::create(ngraph::element::i64, {}, {1}); + constVarSplit1->set_friendly_name("varSplitConst1"); + auto constVarSplit2 = ngraph::opset6::Constant::create(ngraph::element::i64, {2}, {5376, 32256}); + constVarSplit2->set_friendly_name("varSplitConst2"); + + auto varSplit = std::make_shared(readVal, constVarSplit1, constVarSplit2); + + auto param1 = std::make_shared(ngraph::element::f32, ngraph::Shape{1, 5376}); + auto varConcat = std::make_shared(ngraph::OutputVector{varSplit->output(0), param1}, 1); + auto result = std::make_shared(varConcat); + + auto param2 = std::make_shared(ngraph::element::f32, ngraph::Shape{1, 5376}); + auto varConcat2 = std::make_shared(ngraph::OutputVector{varSplit->output(1), param2}, 1); + + auto assign = std::make_shared(varConcat2, "buffer_1"); + f = std::make_shared(ngraph::ResultVector{result}, ngraph::SinkVector{assign}, ngraph::ParameterVector{param1, param2}); + } + + InferenceEngine::CNNNetwork nGraphImpl(f); + nGraphImpl = CNNNetwork(InferenceEngine::details::convertFunctionToICNNNetwork(f, nGraphImpl)); + int memory_count(0); + IE_SUPPRESS_DEPRECATED_START + for (details::CNNNetworkIterator itLayer{nGraphImpl}; itLayer != details::CNNNetworkIterator(); itLayer++) { + if ((*itLayer)->type == "Memory") + memory_count++; + } + IE_SUPPRESS_DEPRECATED_END + ASSERT_EQ(2, memory_count); +} + +TEST(ConvertFunctionToCNNNetworkTests, IteratorForMemoryLayers2) { + std::shared_ptr f(nullptr); + { + auto constReadVal = ngraph::opset6::Constant::create(ngraph::element::f32, {1, 37632}, {0}); + constReadVal->set_friendly_name("const"); + auto readVal = std::make_shared(constReadVal, "buffer_1"); + readVal->set_friendly_name("readVal_Buf1"); + + auto constVarSplit1 = ngraph::opset6::Constant::create(ngraph::element::i64, {}, {1}); + constVarSplit1->set_friendly_name("varSplitConst1"); + auto constVarSplit2 = ngraph::opset6::Constant::create(ngraph::element::i64, {2}, {5376, 32256}); + constVarSplit2->set_friendly_name("varSplitConst2"); + + auto varSplit = std::make_shared(readVal, constVarSplit1, constVarSplit2); + + auto param2 = std::make_shared(ngraph::element::f32, ngraph::Shape{1, 5376}); + auto varConcat2 = std::make_shared(ngraph::OutputVector{varSplit->output(1), param2}, 1); + + auto assign = std::make_shared(varConcat2, "buffer_1"); + f = std::make_shared(ngraph::ResultVector{}, ngraph::SinkVector{assign}, ngraph::ParameterVector{param2}); + } + + InferenceEngine::CNNNetwork nGraphImpl(f); + nGraphImpl = CNNNetwork(InferenceEngine::details::convertFunctionToICNNNetwork(f, nGraphImpl)); + int memory_count(0); + IE_SUPPRESS_DEPRECATED_START + for (details::CNNNetworkIterator itLayer{nGraphImpl}; itLayer != details::CNNNetworkIterator(); itLayer++) { + if ((*itLayer)->type == "Memory") + memory_count++; + } + IE_SUPPRESS_DEPRECATED_END + ASSERT_EQ(2, memory_count); +} + + diff --git a/inference-engine/tests/functional/inference_engine/ir_serialization/custom_ops.cpp b/inference-engine/tests/functional/inference_engine/ir_serialization/custom_ops.cpp index 43c1eb4a12ac2f..e2e633e857c66b 100644 --- a/inference-engine/tests/functional/inference_engine/ir_serialization/custom_ops.cpp +++ b/inference-engine/tests/functional/inference_engine/ir_serialization/custom_ops.cpp @@ -58,6 +58,8 @@ TEST_F(CustomOpsSerializationTest, CustomOpUser_MO) { ASSERT_TRUE(success) << message; } +#ifdef NGRAPH_ONNX_IMPORT_ENABLE + TEST_F(CustomOpsSerializationTest, CustomOpUser_ONNXImporter) { const std::string model = IR_SERIALIZATION_MODELS_PATH "custom_op.prototxt"; @@ -78,6 +80,8 @@ TEST_F(CustomOpsSerializationTest, CustomOpUser_ONNXImporter) { ASSERT_TRUE(success) << message; } +#endif + TEST_F(CustomOpsSerializationTest, CustomOpTransformation) { const std::string model = IR_SERIALIZATION_MODELS_PATH "custom_op.xml"; diff --git a/inference-engine/tests/functional/inference_engine/ir_serialization/deterministicity.cpp b/inference-engine/tests/functional/inference_engine/ir_serialization/deterministicity.cpp index 2b087bc4f40873..dd99fea97bf5c9 100644 --- a/inference-engine/tests/functional/inference_engine/ir_serialization/deterministicity.cpp +++ b/inference-engine/tests/functional/inference_engine/ir_serialization/deterministicity.cpp @@ -46,6 +46,8 @@ class SerializationDeterministicityTest : public ::testing::Test { } }; +#ifdef NGRAPH_ONNX_IMPORT_ENABLE + TEST_F(SerializationDeterministicityTest, BasicModel) { const std::string model = IR_SERIALIZATION_MODELS_PATH "add_abc.prototxt"; @@ -63,14 +65,12 @@ TEST_F(SerializationDeterministicityTest, BasicModel) { ASSERT_TRUE(files_equal(bin_1, bin_2)); } -TEST_F(SerializationDeterministicityTest, ModelWithMultipleOutputs) { +TEST_F(SerializationDeterministicityTest, ModelWithMultipleLayers) { const std::string model = - IR_SERIALIZATION_MODELS_PATH "split_equal_parts_2d.xml"; - const std::string weights = - IR_SERIALIZATION_MODELS_PATH "split_equal_parts_2d.bin"; + IR_SERIALIZATION_MODELS_PATH "addmul_abc.prototxt"; InferenceEngine::Core ie; - auto expected = ie.ReadNetwork(model, weights); + auto expected = ie.ReadNetwork(model); expected.serialize(m_out_xml_path_1, m_out_bin_path_1); expected.serialize(m_out_xml_path_2, m_out_bin_path_2); @@ -83,12 +83,16 @@ TEST_F(SerializationDeterministicityTest, ModelWithMultipleOutputs) { ASSERT_TRUE(files_equal(bin_1, bin_2)); } -TEST_F(SerializationDeterministicityTest, ModelWithMultipleLayers) { +#endif + +TEST_F(SerializationDeterministicityTest, ModelWithMultipleOutputs) { const std::string model = - IR_SERIALIZATION_MODELS_PATH "addmul_abc.prototxt"; + IR_SERIALIZATION_MODELS_PATH "split_equal_parts_2d.xml"; + const std::string weights = + IR_SERIALIZATION_MODELS_PATH "split_equal_parts_2d.bin"; InferenceEngine::Core ie; - auto expected = ie.ReadNetwork(model); + auto expected = ie.ReadNetwork(model, weights); expected.serialize(m_out_xml_path_1, m_out_bin_path_1); expected.serialize(m_out_xml_path_2, m_out_bin_path_2); diff --git a/inference-engine/tests/functional/inference_engine/ir_serialization/models/add_abc_bin.xml b/inference-engine/tests/functional/inference_engine/ir_serialization/models/add_abc_bin.xml new file mode 100644 index 00000000000000..e89f176d14c783 --- /dev/null +++ b/inference-engine/tests/functional/inference_engine/ir_serialization/models/add_abc_bin.xml @@ -0,0 +1,73 @@ + + + + + + + + 1 + + + + + + + + 1 + + + + + + + 1 + + + 1 + + + + + 1 + + + + + + + + 1 + + + + + + + 1 + + + 1 + + + + + 1 + + + + + + + 1 + + + + + + + + + + + + diff --git a/inference-engine/tests/functional/inference_engine/ir_serialization/models/nms5_dynamism.bin b/inference-engine/tests/functional/inference_engine/ir_serialization/models/nms5_dynamism.bin new file mode 100644 index 00000000000000..59fda1f234d648 Binary files /dev/null and b/inference-engine/tests/functional/inference_engine/ir_serialization/models/nms5_dynamism.bin differ diff --git a/inference-engine/tests/functional/inference_engine/ir_serialization/models/nms5_dynamism.xml b/inference-engine/tests/functional/inference_engine/ir_serialization/models/nms5_dynamism.xml new file mode 100644 index 00000000000000..e088edb10ab949 --- /dev/null +++ b/inference-engine/tests/functional/inference_engine/ir_serialization/models/nms5_dynamism.xml @@ -0,0 +1,91 @@ + + + + + + + + 1 + 1 + 1000 + + + + + + + + 1 + 1000 + 4 + + + + + + + + + + + + + + + + + + + + + + + + + + 1 + 1000 + 4 + + + 1 + 1 + 1000 + + + + + + + + 10 + 3 + + + 10 + 3 + + + 1 + + + + + + + + 10 + 3 + + + + + + + + + + + + + diff --git a/inference-engine/tests/functional/inference_engine/ir_serialization/serialize.cpp b/inference-engine/tests/functional/inference_engine/ir_serialization/serialize.cpp index 5480ecfd378e22..067ed94bc7dc06 100644 --- a/inference-engine/tests/functional/inference_engine/ir_serialization/serialize.cpp +++ b/inference-engine/tests/functional/inference_engine/ir_serialization/serialize.cpp @@ -53,13 +53,14 @@ TEST_P(SerializationTest, CompareFunctions) { bool success; std::string message; - std::tie(success, message) = compare_functions(result.getFunction(), expected.getFunction(), true, false, true); + std::tie(success, message) = compare_functions(result.getFunction(), expected.getFunction(), true, false, true, true, true); ASSERT_TRUE(success) << message; } INSTANTIATE_TEST_CASE_P(IRSerialization, SerializationTest, testing::Values(std::make_tuple("add_abc.xml", "add_abc.bin"), std::make_tuple("add_abc_f64.xml", ""), + std::make_tuple("add_abc_bin.xml", ""), std::make_tuple("split_equal_parts_2d.xml", "split_equal_parts_2d.bin"), std::make_tuple("addmul_abc.xml", "addmul_abc.bin"), std::make_tuple("add_abc_initializers.xml", "add_abc_initializers.bin"), @@ -71,10 +72,15 @@ INSTANTIATE_TEST_CASE_P(IRSerialization, SerializationTest, std::make_tuple("shape_of.xml", ""), std::make_tuple("pad_with_shape_of.xml", ""), std::make_tuple("conv_with_rt_info.xml", ""), - std::make_tuple("loop_2d_add.xml", "loop_2d_add.bin"))); + std::make_tuple("loop_2d_add.xml", "loop_2d_add.bin"), + std::make_tuple("nms5_dynamism.xml", "nms5_dynamism.bin"))); + +#ifdef NGRAPH_ONNX_IMPORT_ENABLE INSTANTIATE_TEST_CASE_P(ONNXSerialization, SerializationTest, testing::Values(std::make_tuple("add_abc.prototxt", ""), std::make_tuple("split_equal_parts_2d.prototxt", ""), std::make_tuple("addmul_abc.prototxt", ""), std::make_tuple("add_abc_initializers.prototxt", ""))); + +#endif diff --git a/inference-engine/tests/functional/inference_engine/ir_serialization/tensor_iterator.cpp b/inference-engine/tests/functional/inference_engine/ir_serialization/tensor_iterator.cpp index 79a7a3d5dc5247..cd6ac9fd8a466f 100644 --- a/inference-engine/tests/functional/inference_engine/ir_serialization/tensor_iterator.cpp +++ b/inference-engine/tests/functional/inference_engine/ir_serialization/tensor_iterator.cpp @@ -40,7 +40,7 @@ class SerializationTensorIteratorTest : public ::testing::Test { bool success; std::string message; - std::tie(success, message) = compare_functions(result.getFunction(), expected.getFunction(), true); + std::tie(success, message) = compare_functions(result.getFunction(), expected.getFunction(), true, false, false, true, true); ASSERT_TRUE(success) << message; } }; diff --git a/inference-engine/tests/functional/inference_engine/ir_serialization/tensor_names.cpp b/inference-engine/tests/functional/inference_engine/ir_serialization/tensor_names.cpp new file mode 100644 index 00000000000000..0a57affd010457 --- /dev/null +++ b/inference-engine/tests/functional/inference_engine/ir_serialization/tensor_names.cpp @@ -0,0 +1,58 @@ +// Copyright (C) 2021 Intel Corporation +// SPDX-License-Identifier: Apache-2.0 +// + +#include + +#include +#include +#include +#include "common_test_utils/ngraph_test_utils.hpp" +#include "ie_core.hpp" +#include "ngraph/ngraph.hpp" +#include "transformations/serialize.hpp" +#include + +class TensorNameSerializationTest : public CommonTestUtils::TestsCommon { +protected: + std::string test_name = GetTestName() + "_" + GetTimestamp(); + std::string m_out_xml_path = test_name + ".xml"; + std::string m_out_bin_path = test_name + ".bin"; + + void TearDown() override { + std::remove(m_out_xml_path.c_str()); + std::remove(m_out_bin_path.c_str()); + } +}; + +TEST_F(TensorNameSerializationTest, SerializeFunctionWithTensorNames) { + InferenceEngine::Core ie; + + std::shared_ptr function; + { + auto parameter = std::make_shared(ngraph::element::Type_t::f32, ngraph::Shape{1, 3, 10, 10}); + parameter->set_friendly_name("parameter"); + parameter->get_output_tensor(0).set_names({"input"}); + auto relu_prev = std::make_shared(parameter); + relu_prev->set_friendly_name("relu_prev"); + relu_prev->get_output_tensor(0).set_names({"relu_prev_t", "identity_prev_t"}); + auto relu = std::make_shared(relu_prev); + relu->set_friendly_name("relu"); + relu->get_output_tensor(0).set_names({"relu,t", "identity"}); + const ngraph::ResultVector results{std::make_shared(relu)}; + results[0]->set_friendly_name("out"); + ngraph::ParameterVector params{parameter}; + function = std::make_shared(results, params, "TensorNames"); + } + + InferenceEngine::CNNNetwork expected(function); + expected.serialize(m_out_xml_path, m_out_bin_path); + auto result = ie.ReadNetwork(m_out_xml_path, m_out_bin_path); + + bool success; + std::string message; + std::tie(success, message) = + compare_functions(result.getFunction(), expected.getFunction(), true, true, true, true); + + ASSERT_TRUE(success) << message; +} diff --git a/inference-engine/tests/functional/inference_engine/lp_transformations/add_transformation.cpp b/inference-engine/tests/functional/inference_engine/lp_transformations/add_transformation.cpp index de7acf9f799ac8..c60e041fcb358d 100644 --- a/inference-engine/tests/functional/inference_engine/lp_transformations/add_transformation.cpp +++ b/inference-engine/tests/functional/inference_engine/lp_transformations/add_transformation.cpp @@ -1,4 +1,4 @@ -// Copyright (C) 2020 Intel Corporation +// Copyright (C) 2020-2021 Intel Corporation // SPDX-License-Identifier: Apache-2.0 // @@ -195,7 +195,37 @@ const std::vector addTransformationTestValues = { }, "" }, - // U8 + + // Actual: + // + // Parameter Parameter + // |U8 |U8 + // | | + // Convert Constant Convert Constant + // \FP32 /FP32 \FP32 /FP32 + // \ / \ / + // Subtract Constant Subtract Constant + // \FP32 /FP32 \FP32 /FP32 + // \ / \ / + // Multiply Multiply + // \FP32 /FP32 + // \ / + // Add + // Transformed: + // + // Parameter + // |U8 + // | + // Convert Constant + // \FP32 /FP32 + // \ / + // Subtract Constant + // \FP32 /FP32 + // \ / + // Multiply Parameter + // \FP32 /U8 + // \ / + // Add { ngraph::element::f32, ngraph::Shape{1, 4, 16, 16}, @@ -219,6 +249,68 @@ const std::vector addTransformationTestValues = { }, "" }, + + // Actual: + // + // Parameter Constant Parameter Constant + // |U8 |U8 |U8 |U8 + // | | | | + // Convert Convert Convert Convert + // \FP32 /FP32 \FP32 /FP32 + // \ / \ / + // Subtract Constant Subtract Constant + // \FP32 /FP32 \FP32 /FP32 + // \ / \ / + // Multiply Multiply + // \FP32 /FP32 + // \ / + // Add + // Transformed: + // + // Parameter + // |U8 + // | + // Convert Constant + // \FP32 /FP32 + // \ / + // Subtract Constant + // \FP32 /FP32 + // \ / + // Multiply Parameter + // \FP32 /U8 + // \ / + // Add + { + ngraph::element::f32, + ngraph::Shape{1, 4, 16, 16}, + false, + -1, + LayerTransformation::createParamsU8I8(), + { + ngraph::element::u8, + { + {ngraph::element::f32}, + { {7.f}, ngraph::element::f32, {}, false, 1, ngraph::element::u8, true }, + { 10.f } + }, + ngraph::element::u8, + { + {ngraph::element::f32}, + { {3.f}, ngraph::element::f32, {}, false, 1, ngraph::element::u8, true }, + { 5.f } + }, + {} + }, + { + ngraph::element::u8, + { {ngraph::element::f32}, { 8.5f }, { 2.f }}, + ngraph::element::u8, + { {}, {}, {} }, + { {}, {}, {5.f} }, + {} + }, + "" + }, { ngraph::element::f32, ngraph::Shape{1, 4, 16, 16}, @@ -554,6 +646,126 @@ const std::vector addTransformationTestValues = { }, "group_convolution" }, + + // Actual: + // + // Parameter Parameter Constant + // |U8 |U8 |U8 + // | | | + // Convert Constant Convert Convert + // \FP32 /FP32 \FP32 /FP32 + // \ / \ / + // Subtract Constant Subtract Constant + // \FP32 /FP32 \FP32 /FP32 + // \ / \ / + // Multiply Multiply + // \FP32 /FP32 + // \ / + // Add + // Transformed: + // + // Parameter + // |U8 + // | + // Convert Constant + // \FP32 /FP32 + // \ / + // Subtract Constant + // \FP32 /FP32 + // \ / + // Multiply + { + ngraph::element::f32, + ngraph::Shape{1, 4, 16, 16}, + false, + 1, + LayerTransformation::createParamsU8I8(), + { + ngraph::element::u8, + { + {ngraph::element::f32}, + {7.f}, + { 10.f } + }, + ngraph::element::u8, + { + {ngraph::element::f32}, + { {3.f}, ngraph::element::f32, {}, false, 1, ngraph::element::u8, true }, + { 5.f } + }, + {10.f} + }, + { + ngraph::element::u8, + { {ngraph::element::f32}, {}, {}}, + ngraph::element::u8, + { }, + { {}, {}, {10.f} }, + {3.5f}, + "Subtract" + }, + "" + }, + + // Actual: + // + // Constant Constant Parameter + // |U8 |U8 |U8 + // | | | + // Convert Convert Convert Constant + // \FP32 /FP32 \FP32 /FP32 + // \ / \ / + // Subtract Constant Subtract Constant + // \FP32 /FP32 \FP32 /FP32 + // \ / \ / + // Multiply Multiply + // \FP32 /FP32 + // \ / + // Add + // Transformed: + // + // Parameter + // |U8 + // | + // Convert Constant + // \FP32 /FP32 + // \ / + // Subtract Constant + // \FP32 /FP32 + // \ / + // Multiply + { + ngraph::element::f32, + ngraph::Shape{1, 4, 16, 16}, + false, + 0, + LayerTransformation::createParamsU8I8(), + { + ngraph::element::u8, + { + {ngraph::element::f32}, + { {7.f}, ngraph::element::f32, {}, false, 1, ngraph::element::u8, true }, + { 10.f } + }, + ngraph::element::u8, + { + {ngraph::element::f32}, + { 3.f }, + { 5.f } + }, + { 10.f } + }, + { + ngraph::element::u8, + { {ngraph::element::f32}, {}, {}}, + ngraph::element::u8, + { }, + { {}, {}, { 5.f } }, + { -3.f }, + "Subtract" + }, + "" + }, }; INSTANTIATE_TEST_CASE_P( diff --git a/inference-engine/tests/functional/inference_engine/lp_transformations/clamp_transformation.cpp b/inference-engine/tests/functional/inference_engine/lp_transformations/clamp_transformation.cpp index 0530d526faf322..1c96e55bcf712d 100644 --- a/inference-engine/tests/functional/inference_engine/lp_transformations/clamp_transformation.cpp +++ b/inference-engine/tests/functional/inference_engine/lp_transformations/clamp_transformation.cpp @@ -1,4 +1,4 @@ -// Copyright (C) 2020 Intel Corporation +// Copyright (C) 2020-2021 Intel Corporation // SPDX-License-Identifier: Apache-2.0 // @@ -79,9 +79,7 @@ class ClampTransformation : public LayerTransformation, public testing::WithPara }; TEST_P(ClampTransformation, CompareFunctions) { - InitNodeInfo().run_on_function(actualFunction); actualFunction->validate_nodes_and_infer_types(); - auto res = compare_functions(referenceFunction, actualFunction, true, true); ASSERT_TRUE(res.first) << res.second; } @@ -104,6 +102,31 @@ const std::vector testValues = { {{}, {128.f}, {3.f}} } }, + // U8 per tensor quantization + { + ngraph::Shape({ 1, 3, 224, 224 }), + LayerTransformation::createParamsU8I8(), + // ActualValues + { + ngraph::element::u8, + { + {ngraph::element::f32}, + {{128.f}, ngraph::element::f32, {}, false, 1, ngraph::element::u8, true}, + {3.f} + } + }, + // ExpectedValues + { + ngraph::element::u8, + {{}, {}, {}}, + ngraph::element::f32, + { + {}, + {{128.f}, ngraph::element::f32, {}, false, 1, ngraph::element::u8, true}, + {3.f} + } + } + }, // I8 per tensor quantization { ngraph::Shape({ 1, 3, 224, 224 }), diff --git a/inference-engine/tests/functional/inference_engine/lp_transformations/compose_fake_quantize_transformation.cpp b/inference-engine/tests/functional/inference_engine/lp_transformations/compose_fake_quantize_transformation.cpp new file mode 100644 index 00000000000000..039e96a7fedff5 --- /dev/null +++ b/inference-engine/tests/functional/inference_engine/lp_transformations/compose_fake_quantize_transformation.cpp @@ -0,0 +1,149 @@ +// Copyright (C) 2021 Intel Corporation +// SPDX-License-Identifier: Apache-2.0 +// + +#include "layer_transformation.hpp" + +#include +#include +#include + +#include + +#include +#include +#include + +#include "common_test_utils/ngraph_test_utils.hpp" +#include "lpt_ngraph_functions/compose_fake_quantize_function.hpp" +#include "lpt_ngraph_functions/common/dequantization_operations.hpp" +#include "lpt_ngraph_functions/common/fake_quantize_on_data.hpp" + +using namespace testing; +using namespace ngraph::pass; +using namespace ngraph::builder::subgraph; + +class ComposeFakeQuantizeTransformationParams { +public: + class Values { + public: + ngraph::builder::subgraph::FakeQuantizeOnData fakeQuantize; + ngraph::builder::subgraph::DequantizationOperations dequantization1; + ngraph::builder::subgraph::DequantizationOperations dequantization2; + }; + + ngraph::element::Type originalPrecision; + Values actual; + Values expected; +}; + +typedef std::tuple< + ngraph::Shape, + ComposeFakeQuantizeTransformationParams> ComposeFakeQuantizeTransformationValues; + +class ComposeFakeQuantizeTransformation : + public LayerTransformation, + public testing::WithParamInterface { +public: + void SetUp() override { + const auto inputShape = std::get<0>(GetParam()); + const auto testValues = std::get<1>(GetParam()); + actualFunction = ngraph::builder::subgraph::ComposeFakeQuantizeFunction::get( + testValues.originalPrecision, + inputShape, + testValues.actual.fakeQuantize, + testValues.actual.dequantization1, + testValues.actual.dequantization2); + + const auto input = actualFunction->get_parameters()[0]; + const auto fakeQuantizes = input->output(0).get_target_inputs(); + const auto it = fakeQuantizes.begin(); + const auto fakeQuantize = ngraph::as_type_ptr(it->get_node()->shared_from_this()); + low_precision::NetworkHelper::composeFakeQuantize(fakeQuantize); + + referenceFunction = ngraph::builder::subgraph::ComposeFakeQuantizeFunction::get( + testValues.originalPrecision, + inputShape, + testValues.expected.fakeQuantize, + testValues.expected.dequantization1, + testValues.expected.dequantization2); + } + + static std::string getTestCaseName(testing::TestParamInfo obj) { + const auto inputShape = std::get<0>(obj.param); + const auto testValues = std::get<1>(obj.param); + + std::ostringstream result; + result << + testValues.originalPrecision << "_" << + inputShape << "_" << + testValues.actual.fakeQuantize << "_" << + testValues.actual.dequantization1 << "_" << + testValues.actual.dequantization2 << "_" << + testValues.expected.fakeQuantize << "_" << + testValues.expected.dequantization1 << "_" << + testValues.expected.dequantization2; + return result.str(); + } +}; + +TEST_P(ComposeFakeQuantizeTransformation, CompareFunctions) { + actualFunction->validate_nodes_and_infer_types(); + auto res = compare_functions(referenceFunction, actualFunction, true, false, true); + ASSERT_TRUE(res.first) << res.second; +} + +const std::vector inputShapes = { + { 1, 3, 16, 16 }, + { 4, 3, 16, 16 } +}; + +const std::vector testValues = { + { + ngraph::element::f32, + { + { 256ul, {}, { 0.f }, { 2.55f }, { 0.f }, { 255.f } }, + { {ngraph::element::f32}, {}, { 0.01f } }, + {} + }, + { + { 256ul, {}, { 0.f }, { 2.55f }, { 0.f }, { 2.55f } }, + {}, + {} + }, + }, + { + ngraph::element::f32, + { + { 256ul, {}, { 0.f }, { 2.55f }, { -128.f }, { 127.f } }, + { {ngraph::element::f32}, {-128}, { 0.01f } }, + {} + }, + { + { 256ul, {}, { 0.f }, { 2.55f }, { 0.f }, { 2.55f } }, + {}, + {} + }, + }, + { + ngraph::element::f32, + { + { 256ul, {}, { 0.f }, { 2.55f }, { -128.f }, { 127.f } }, + { {ngraph::element::f32}, {-128}, { 0.01f } }, + { {ngraph::element::f32}, {-128}, { 0.01f } } + }, + { + { 256ul, {}, { 0.f }, { 2.55f }, { -128.f }, { 127.f } }, + { {ngraph::element::f32}, {-128}, { 0.01f } }, + { {ngraph::element::f32}, {-128}, { 0.01f } } + }, + } +}; + +INSTANTIATE_TEST_CASE_P( + smoke_LPT, + ComposeFakeQuantizeTransformation, + ::testing::Combine( + ::testing::ValuesIn(inputShapes), + ::testing::ValuesIn(testValues)), + ComposeFakeQuantizeTransformation::getTestCaseName); diff --git a/inference-engine/tests/functional/inference_engine/lp_transformations/concat_transformation.cpp b/inference-engine/tests/functional/inference_engine/lp_transformations/concat_transformation.cpp index 79c2c507b7b1ba..6c90dd29253095 100644 --- a/inference-engine/tests/functional/inference_engine/lp_transformations/concat_transformation.cpp +++ b/inference-engine/tests/functional/inference_engine/lp_transformations/concat_transformation.cpp @@ -1,4 +1,4 @@ -// Copyright (C) 2020 Intel Corporation +// Copyright (C) 2020-2021 Intel Corporation // SPDX-License-Identifier: Apache-2.0 // @@ -30,25 +30,44 @@ namespace { class ConcatTransformationActualValues { public: ngraph::builder::subgraph::FakeQuantizeOnDataWithConstant fakeQuantize1; + ngraph::builder::subgraph::DequantizationOperations::Convert convert1; + ngraph::builder::subgraph::DequantizationOperations dequantization1; ngraph::builder::subgraph::FakeQuantizeOnDataWithConstant fakeQuantize2; + ngraph::builder::subgraph::DequantizationOperations::Convert convert2; + ngraph::builder::subgraph::DequantizationOperations dequantization2; }; inline std::ostream& operator<<(std::ostream& out, const ConcatTransformationActualValues& values) { - return out << "_" << values.fakeQuantize1 << "_" << values.fakeQuantize2; + return out << "_" << + values.fakeQuantize1 << "_" << + values.convert1.outPrecision << "_" << + values.dequantization1 << "_" << + values.fakeQuantize2 << "_" << + values.convert2.outPrecision << "_" << + values.dequantization2; } class ConcatTransformationResultValues { public: ngraph::builder::subgraph::FakeQuantizeOnDataWithConstant fakeQuantize1; + ngraph::builder::subgraph::DequantizationOperations::Convert convert1; + ngraph::builder::subgraph::DequantizationOperations dequantization1; ngraph::builder::subgraph::FakeQuantizeOnDataWithConstant fakeQuantize2; - ngraph::element::Type precisionBeforeOp; - ngraph::builder::subgraph::DequantizationOperations dequantizationOperations1; + ngraph::builder::subgraph::DequantizationOperations::Convert convert2; + ngraph::builder::subgraph::DequantizationOperations dequantization2; ngraph::element::Type precisionAfterOperation; - ngraph::builder::subgraph::DequantizationOperations dequantizationOperations2; + ngraph::builder::subgraph::DequantizationOperations dequantizationAfter; }; inline std::ostream& operator<<(std::ostream& out, const ConcatTransformationResultValues& values) { - return out << "_" << values.fakeQuantize1 << "_" << values.fakeQuantize2 << "_" << values.dequantizationOperations2; + return out << "_" << + values.fakeQuantize1 << "_" << + values.convert1.outPrecision << "_" << + values.dequantization1 << "_" << + values.fakeQuantize2 << "_" << + values.convert2.outPrecision << "_" << + values.dequantization2 << "_" << + values.dequantizationAfter; } class ConcatTransformationTestValues { @@ -76,11 +95,17 @@ class ConcatTransformation : public LayerTransformation, public testing::WithPar const ngraph::Shape shape = std::get<1>(GetParam()); ConcatTransformationTestValues testValues = std::get<2>(GetParam()); - actualFunction = ngraph::builder::subgraph::ConcatFunction::getOriginal( + actualFunction = ngraph::builder::subgraph::ConcatFunction::get( precision, shape, testValues.actual.fakeQuantize1, - testValues.actual.fakeQuantize2); + testValues.actual.convert1, + testValues.actual.dequantization1, + testValues.actual.fakeQuantize2, + testValues.actual.convert2, + testValues.actual.dequantization2, + ngraph::element::undefined, + {}); SimpleLowPrecisionTransformer transform; if (testValues.multiChannels) { @@ -90,15 +115,17 @@ class ConcatTransformation : public LayerTransformation, public testing::WithPar } transform.transform(actualFunction); - referenceFunction = ngraph::builder::subgraph::ConcatFunction::getReference( + referenceFunction = ngraph::builder::subgraph::ConcatFunction::get( precision, shape, testValues.result.fakeQuantize1, + testValues.result.convert1, + testValues.result.dequantization1, testValues.result.fakeQuantize2, - testValues.result.precisionBeforeOp, - testValues.result.dequantizationOperations1, + testValues.result.convert2, + testValues.result.dequantization2, testValues.result.precisionAfterOperation, - testValues.result.dequantizationOperations2); + testValues.result.dequantizationAfter); } static std::string getTestCaseName(testing::TestParamInfo obj) { @@ -134,15 +161,81 @@ const std::vector testValues = { false, { { 256ul, {}, {0.f}, {2.55f}, {0.f}, {2.55f} }, + {}, + {}, { 256ul, {}, {0.f}, {2.55f}, {0.f}, {2.55f} } }, + { + { 256ul, {}, {0.f}, {2.55f}, {0.f}, {255.f}, ngraph::element::u8 }, + {}, + {}, + { 256ul, {}, {0.f}, {2.55f}, {0.f}, {255.f}, ngraph::element::u8 }, + {}, + {}, + ngraph::element::u8, + { ngraph::element::f32, {}, { 0.01f } }, + } + }, + // U8: concat + { + LayerTransformation::createParamsU8I8(), + false, { { 256ul, {}, {0.f}, {2.55f}, {0.f}, {255.f} }, + { ngraph::element::u8 }, + { + { element::f32 }, + {}, + { 0.01f } + }, { 256ul, {}, {0.f}, {2.55f}, {0.f}, {255.f} }, + { ngraph::element::u8 }, + { + { element::f32 }, + {}, + { 0.01f } + }, + }, + { + { 256ul, {}, {0.f}, {2.55f}, {0.f}, {255.f}, ngraph::element::u8 }, + {}, + {}, + { 256ul, {}, {0.f}, {2.55f}, {0.f}, {255.f}, ngraph::element::u8 }, + {}, + {}, ngraph::element::u8, - { {}, {}, {} }, + { ngraph::element::f32, {}, { 0.01f } } + } + }, + // U8: concat + { + LayerTransformation::createParamsU8I8(), + true, + { + { 256ul, {}, {0.f}, {2.55f}, {0.f}, {255.f} }, + { ngraph::element::u8 }, + { + { element::f32 }, + {}, + { 0.01f } + }, + { 256ul, {}, {0.f}, {2.55f}, {0.f}, {255.f} }, + { ngraph::element::u8 }, + { + { element::f32 }, + {}, + { 0.01f } + }, + }, + { + { 256ul, {}, {0.f}, {2.55f}, {0.f}, {255.f}, ngraph::element::u8 }, + {}, + {}, + { 256ul, {}, {0.f}, {2.55f}, {0.f}, {255.f}, ngraph::element::u8 }, + {}, + {}, ngraph::element::u8, - { ngraph::element::f32, {}, { 0.01f } }, + { ngraph::element::f32, {}, { 0.01f } } } }, // U8: concat @@ -150,14 +243,74 @@ const std::vector testValues = { LayerTransformation::createParamsU8I8(), false, { - { 256ul, {{1}, {1}, {1}, {1}}, {0.f}, {2.55f}, {0.f}, {2.55f} }, - { 256ul, {{1}, {1}, {1}, {1}}, {0.f}, {2.55f}, {0.f}, {2.55f} } + { 256ul, {}, {0.f}, {2.55f}, {0.f}, {2.55f} }, + {}, + {}, + { 256ul, {}, {0.f}, {2.55f}, {0.f}, {255.f} }, + { ngraph::element::u8 }, + { + { element::f32 }, + {}, + { 0.01f } + }, + }, + { + { 256ul, {}, {0.f}, {2.55f}, {0.f}, {255.f}, ngraph::element::u8 }, + {}, + {}, + { 256ul, {}, {0.f}, {2.55f}, {0.f}, {255.f}, ngraph::element::u8 }, + {}, + {}, + ngraph::element::u8, + { ngraph::element::f32, {}, { 0.01f } } + } + }, + // U8: concat + { + LayerTransformation::createParamsU8I8(), + true, + { + { 256ul, {}, {0.f}, {2.55f}, {0.f}, {2.55f} }, + {}, + {}, + { 256ul, {}, {0.f}, {2.55f}, {0.f}, {255.f} }, + { ngraph::element::u8 }, + { + { element::f32 }, + {}, + { 0.01f } + }, }, { - { 256ul, {{1}, {1}, {}, {}}, {0.f}, {2.55f}, {0.f}, {255.f} }, - { 256ul, {{1}, {1}, {}, {}}, {0.f}, {2.55f}, {0.f}, {255.f} }, + { 256ul, {}, {0.f}, {2.55f}, {0.f}, {255.f}, ngraph::element::u8 }, + {}, + {}, + { 256ul, {}, {0.f}, {2.55f}, {0.f}, {255.f}, ngraph::element::u8 }, + {}, + {}, ngraph::element::u8, - { {}, {}, {} }, + { ngraph::element::f32, {}, { 0.01f } } + } + }, + // U8: concat + { + LayerTransformation::createParamsU8I8(), + false, + { + { 256ul, {{1}, {1}, {1}, {1}}, {0.f}, {2.55f}, {0.f}, {2.55f} }, + {}, + {}, + { 256ul, {{1}, {1}, {1}, {1}}, {0.f}, {2.55f}, {0.f}, {2.55f} }, + {}, + {} + }, + { + { 256ul, {{1}, {1}, {}, {}}, {0.f}, {2.55f}, {0.f}, {255.f}, ngraph::element::u8 }, + {}, + {}, + { 256ul, {{1}, {1}, {}, {}}, {0.f}, {2.55f}, {0.f}, {255.f}, ngraph::element::u8 }, + {}, + {}, ngraph::element::u8, { ngraph::element::f32, {}, { 0.01f } } } @@ -168,13 +321,19 @@ const std::vector testValues = { false, { { 256ul, {{1, 1, 1, 1}, {1, 1, 1, 1}, {1, 1, 1, 1}, {1, 1, 1, 1}}, {0.f}, {2.55f}, {0.f}, {2.55f} }, - { 256ul, {{1, 1, 1, 1}, {1, 1, 1, 1}, {1, 1, 1, 1}, {1, 1, 1, 1}}, {0.f}, {2.55f}, {0.f}, {2.55f} } + {}, + {}, + { 256ul, {{1, 1, 1, 1}, {1, 1, 1, 1}, {1, 1, 1, 1}, {1, 1, 1, 1}}, {0.f}, {2.55f}, {0.f}, {2.55f} }, + {}, + {} }, { - { 256ul, {{1, 1, 1, 1}, {1, 1, 1, 1}, {}, {}}, {0.f}, {2.55f}, {0.f}, {255.f} }, - { 256ul, {{1, 1, 1, 1}, {1, 1, 1, 1}, {}, {}}, {0.f}, {2.55f}, {0.f}, {255.f} }, - ngraph::element::u8, - { {}, {}, {} }, + { 256ul, {{1, 1, 1, 1}, {1, 1, 1, 1}, {}, {}}, {0.f}, {2.55f}, {0.f}, {255.f}, ngraph::element::u8 }, + {}, + {}, + { 256ul, {{1, 1, 1, 1}, {1, 1, 1, 1}, {}, {}}, {0.f}, {2.55f}, {0.f}, {255.f}, ngraph::element::u8 }, + {}, + {}, ngraph::element::u8, { ngraph::element::f32, {}, { 0.01f } } } @@ -185,13 +344,19 @@ const std::vector testValues = { true, { { 256ul, {}, {0.f}, {2.55f}, {0.f}, {2.55f} }, - { 256ul, {}, {0.f}, {1.275f}, {0.f}, {1.275f} } + {}, + {}, + { 256ul, {}, {0.f}, {1.275f}, {0.f}, {1.275f} }, + {}, + {} }, { - { 256ul, {}, {0.f}, {2.55f}, {0.f}, {255.f} }, - { 256ul, {}, {0.f}, {1.275f}, {0.f}, {255.f} }, - ngraph::element::u8, - { {}, {}, {} }, + { 256ul, {}, {0.f}, {2.55f}, {0.f}, {255.f}, ngraph::element::u8 }, + {}, + {}, + { 256ul, {}, {0.f}, {1.275f}, {0.f}, {255.f}, ngraph::element::u8 }, + {}, + {}, ngraph::element::u8, { ngraph::element::f32, {}, {{ 0.01f, 0.01f, 0.01f, 0.005f, 0.005f, 0.005f }} } } @@ -202,13 +367,19 @@ const std::vector testValues = { true, { { 256ul, {{1}, {1}, {1}, {1}}, {0.f}, {2.55f}, {0.f}, {2.55f} }, - { 256ul, {{1}, {1}, {1}, {1}}, {0.f}, {1.275f}, {0.f}, {1.275f} } + {}, + {}, + { 256ul, {{1}, {1}, {1}, {1}}, {0.f}, {1.275f}, {0.f}, {1.275f} }, + {}, + {} }, { - { 256ul, {{1}, {1}, {}, {}}, {0.f}, {2.55f}, {0.f}, {255.f} }, - { 256ul, {{1}, {1}, {}, {}}, {0.f}, {1.275f}, {0.f}, {255.f} }, - ngraph::element::u8, - { {}, {}, {} }, + { 256ul, {{1}, {1}, {}, {}}, {0.f}, {2.55f}, {0.f}, {255.f}, ngraph::element::u8 }, + {}, + {}, + { 256ul, {{1}, {1}, {}, {}}, {0.f}, {1.275f}, {0.f}, {255.f}, ngraph::element::u8 }, + {}, + {}, ngraph::element::u8, { ngraph::element::f32, {}, {{ 0.01f, 0.01f, 0.01f, 0.005f, 0.005f, 0.005f }} } } @@ -224,12 +395,16 @@ const std::vector testValues = { {0.f, 0.f, 0.f}, {2.55f, 2.55f, 2.55f}, {0.f, 0.f, 0.f}, {2.55f / 1.f, 2.55f / 2.f, 2.55f / 3.f}, ngraph::element::f32 }, + {}, + {}, { 256ul, {{1, 3, 1, 1}, {1, 3, 1, 1}, {1, 3, 1, 1}, {1, 3, 1, 1}}, {0.f, 0.f, 0.f}, {1.275f, 1.275f, 1.275f}, {0.f, 0.f, 0.f}, {1.275f / 1.f, 1.275f / 2.f, 1.275f / 3.f}, ngraph::element::f32 - } + }, + {}, + {} }, { { @@ -238,14 +413,16 @@ const std::vector testValues = { {0.f, 0.f, 0.f}, {2.55f, 2.55f, 2.55f}, {0.f}, {255.f}, ngraph::element::u8 }, + {}, + {}, { 256ul, {{1, 3, 1, 1}, {1, 3, 1, 1}, {}, {}}, {0.f, 0.f, 0.f}, {1.275f, 1.275f, 1.275f}, {0.f}, {255.f}, ngraph::element::u8 }, - ngraph::element::u8, - { {}, {}, {} }, + {}, + {}, ngraph::element::u8, { ngraph::element::f32, {}, {{ 0.01f / 1.f, 0.01f / 2.f, 0.01f / 3.f, 0.005f / 1.f, 0.005f / 2.f, 0.005f / 3.f }} } } @@ -256,13 +433,19 @@ const std::vector testValues = { true, { { 256ul, {}, {0.f}, {2.55f}, {0.f}, {2.55f} }, - { 256ul, {}, {1.275f}, {2.55f}, {1.275f}, {2.55f} } + {}, + {}, + { 256ul, {}, {1.275f}, {2.55f}, {1.275f}, {2.55f} }, + {}, + {} }, { - { 256ul, {}, {0.f}, {2.55f}, {0.f}, {255.f} }, - { 256ul, {}, {1.275f}, {2.55f}, {0.f}, {255.f} }, - ngraph::element::u8, - { {}, {}, {} }, + { 256ul, {}, {0.f}, {2.55f}, {0.f}, {255.f}, ngraph::element::u8 }, + {}, + {}, + { 256ul, {}, {1.275f}, {2.55f}, {0.f}, {255.f}, ngraph::element::u8 }, + {}, + {}, ngraph::element::u8, { ngraph::element::f32, @@ -277,13 +460,19 @@ const std::vector testValues = { false, { { 256ul, {}, {-1.28f}, {1.27f}, {-1.28f}, {1.27f} }, - { 256ul, {}, {-1.28f}, {1.27f}, {-1.28f}, {1.27f} } + {}, + {}, + { 256ul, {}, {-1.28f}, {1.27f}, {-1.28f}, {1.27f} }, + {}, + {} }, { - { 256ul, {}, {-1.28f}, {1.27f}, {-128.f}, {127.f} }, - { 256ul, {}, {-1.28f}, {1.27f}, {-128.f}, {127.f} }, - ngraph::element::i8, - { {}, {}, {} }, + { 256ul, {}, {-1.28f}, {1.27f}, {-128.f}, {127.f}, ngraph::element::i8 }, + {}, + {}, + { 256ul, {}, {-1.28f}, {1.27f}, {-128.f}, {127.f}, ngraph::element::i8 }, + {}, + {}, ngraph::element::i8, { ngraph::element::f32, {}, { 0.01f } } } @@ -294,13 +483,19 @@ const std::vector testValues = { false, { { 256ul, {}, {0.f}, {2.55f}, {0.f}, {2.55f} }, - { 256ul, {}, {-1.28f}, {1.27f}, {-1.28f}, {1.27f} } + {}, + {}, + { 256ul, {}, {-1.28f}, {1.27f}, {-1.28f}, {1.27f} }, + {}, + {} }, { - { 256ul, {}, {0.f}, {2.55f}, {85.f}, {255.f} }, - { 256ul, {}, {-1.28f}, {1.27f}, {0.f}, {170.f} }, - ngraph::element::u8, - { {}, {}, {} }, + { 256ul, {}, {0.f}, {2.55f}, {85.f}, {255.f}, ngraph::element::u8 }, + {}, + {}, + { 256ul, {}, {-1.28f}, {1.27f}, {0.f}, {170.f}, ngraph::element::u8 }, + {}, + {}, ngraph::element::u8, { ngraph::element::f32, { 85 }, { 0.015f } } } @@ -311,13 +506,19 @@ const std::vector testValues = { true, { { 256ul, {}, {0.f}, {2.55f}, {0.f}, {2.55f} }, - { 256ul, {}, {-1.28f}, {1.27f}, {-1.28f}, {1.27f} } + {}, + {}, + { 256ul, {}, {-1.28f}, {1.27f}, {-1.28f}, {1.27f} }, + {}, + {} }, { - { 256ul, {}, {0.f}, {2.55f}, {0.f}, {255.f} }, - { 256ul, {}, {-1.28f}, {1.27f}, {0.f}, {255.f} }, - ngraph::element::u8, - { {}, {}, {} }, + { 256ul, {}, {0.f}, {2.55f}, {0.f}, {255.f}, ngraph::element::u8 }, + {}, + {}, + { 256ul, {}, {-1.28f}, {1.27f}, {0.f}, {255.f}, ngraph::element::u8 }, + {}, + {}, ngraph::element::u8, { ngraph::element::f32, {{ 0.f, 0.f, 0.f, 128.f, 128.f, 128.f }}, { 0.01f } } } @@ -328,13 +529,19 @@ const std::vector testValues = { false, { { 256ul, {}, {-1.28f}, {1.27f}, {-1.28f}, {1.27f} }, - { 256ul, {}, {0.f}, {2.55f}, {0.f}, {2.55f} } + {}, + {}, + { 256ul, {}, {0.f}, {2.55f}, {0.f}, {2.55f} }, + {}, + {} }, { - { 256ul, {}, {-1.28f}, {1.27f}, {0.f}, {170.f} }, - { 256ul, {}, {0.f}, {2.55f}, {85.f}, {255.f} }, - ngraph::element::u8, - { {}, {}, {} }, + { 256ul, {}, {-1.28f}, {1.27f}, {0.f}, {170.f}, ngraph::element::u8 }, + {}, + {}, + { 256ul, {}, {0.f}, {2.55f}, {85.f}, {255.f}, ngraph::element::u8 }, + {}, + {}, ngraph::element::u8, { ngraph::element::f32, { 85 }, { 0.015f } } } @@ -345,13 +552,19 @@ const std::vector testValues = { false, { { 256ul, {}, {-1.28f}, {1.27f}, {0.f}, {2.3007815f} }, - { 256ul, {}, {0.f}, {2.55f}, {-3.873046875f}, {3.84375} } + {}, + {}, + { 256ul, {}, {0.f}, {2.55f}, {-3.873046875f}, {3.84375} }, + {}, + {} }, { - { 256ul, {}, {-1.28f}, {1.27f}, {128.f}, {204.f} }, - { 256ul, {}, {0.f}, {2.55f}, {0.f}, {255.f} }, - ngraph::element::u8, - { {}, {}, {} }, + { 256ul, {}, {-1.28f}, {1.27f}, {128.f}, {204.f}, ngraph::element::u8 }, + {}, + {}, + { 256ul, {}, {0.f}, {2.55f}, {0.f}, {255.f}, ngraph::element::u8 }, + {}, + {}, ngraph::element::u8, { ngraph::element::f32, { 128 }, { 0.0302619f } } } @@ -362,17 +575,23 @@ const std::vector testValues = { false, { { 256ul, {}, {0.f}, {2.55f}, {0.f}, {2.55f} }, - { 256ul, {}, {0.f}, {2.55f}, {0.f}, {2.55f} } + {}, + {}, + { 256ul, {}, {0.f}, {2.55f}, {0.f}, {2.55f} }, + {}, + {} }, { - { 256ul, {}, {0.f}, {2.55f}, {0.f}, {255.f} }, - { 256ul, {}, {0.f}, {2.55f}, {0.f}, {255.f} }, - ngraph::element::f32, - { {}, {}, {} }, + { 256ul, {}, {0.f}, {2.55f}, {0.f}, {255.f}, ngraph::element::f32 }, + {}, + {}, + { 256ul, {}, {0.f}, {2.55f}, {0.f}, {255.f}, ngraph::element::f32 }, + {}, + {}, ngraph::element::f32, { {}, {}, { 0.01f } }, } - }, + } }; const std::vector shapes = { diff --git a/inference-engine/tests/functional/inference_engine/lp_transformations/convolution_qdq_transformation.cpp b/inference-engine/tests/functional/inference_engine/lp_transformations/convolution_qdq_transformation.cpp new file mode 100644 index 00000000000000..7937d884ae2fa2 --- /dev/null +++ b/inference-engine/tests/functional/inference_engine/lp_transformations/convolution_qdq_transformation.cpp @@ -0,0 +1,415 @@ +// Copyright (C) 2021 Intel Corporation +// SPDX-License-Identifier: Apache-2.0 +// + +#include "layer_transformation.hpp" + +#include +#include +#include + +#include + +#include +#include +#include + +#include "common_test_utils/ngraph_test_utils.hpp" +#include "simple_low_precision_transformer.hpp" +#include "lpt_ngraph_functions/fake_quantize_and_convolution_function.hpp" + +using namespace testing; +using namespace ngraph; +using namespace ngraph::pass; + +class ConvolutionQDqTransformationTestValues { +public: + class Values { + public: + ngraph::element::Type precisionBeforeDequantization; + ngraph::builder::subgraph::DequantizationOperations dequantizationOnActivations; + ngraph::builder::subgraph::DequantizationOperations dequantizationOnWeights; + ngraph::builder::subgraph::Constant weights; + builder::subgraph::FakeQuantizeOnWeights fakeQuantizeOnWeights; + ngraph::element::Type precisionAfterOperation; + ngraph::builder::subgraph::DequantizationOperations dequantizationAfter; + }; + + ngraph::pass::low_precision::LayerTransformation::Params params; + Values actual; + Values expected; +}; + +typedef std::tuple< + ngraph::Shape, + ConvolutionQDqTransformationTestValues> ConvolutionQDqTransformationParams; + +class ConvolutionQDqTransformation : public LayerTransformation, public testing::WithParamInterface { +public: + void SetUp() override { + const auto inputShape = std::get<0>(GetParam()); + const auto testValues = std::get<1>(GetParam()); + + actualFunction = ngraph::builder::subgraph::FakeQuantizeAndConvolutionFunction::get( + testValues.actual.precisionBeforeDequantization, + inputShape, + {}, + {}, + testValues.actual.dequantizationOnActivations, + testValues.actual.weights, + testValues.actual.fakeQuantizeOnWeights, + {}, + testValues.actual.dequantizationOnWeights, + testValues.actual.dequantizationAfter); + + SimpleLowPrecisionTransformer transform; + transform.add(testValues.params); + transform.transform(actualFunction); + + referenceFunction = ngraph::builder::subgraph::FakeQuantizeAndConvolutionFunction::get( + testValues.actual.precisionBeforeDequantization, + inputShape, + {}, + {}, + testValues.expected.dequantizationOnActivations, + testValues.expected.weights, + testValues.expected.fakeQuantizeOnWeights, + {}, + testValues.expected.dequantizationOnWeights, + testValues.expected.dequantizationAfter); + } + + static std::string getTestCaseName(testing::TestParamInfo obj) { + auto inputShape = std::get<0>(obj.param); + ConvolutionQDqTransformationTestValues testValues = std::get<1>(obj.param); + + std::ostringstream result; + result << toString(testValues.params) << "_" << + inputShape << "_" << + testValues.actual.precisionBeforeDequantization << "_" << + testValues.actual.dequantizationOnActivations << "_" << "_weights_" << + testValues.actual.weights.outPrecision << "_" << "{ " << + testValues.actual.weights.values[0] << " }_" << + testValues.actual.fakeQuantizeOnWeights << "_" << + testValues.actual.dequantizationOnWeights; + return result.str(); + } +}; + +TEST_P(ConvolutionQDqTransformation, CompareFunctions) { + actualFunction->validate_nodes_and_infer_types(); + auto res = compare_functions(referenceFunction, actualFunction, true, true, true); + ASSERT_TRUE(res.first) << res.second; +} + +const std::vector shapes = { + ngraph::Shape({ 1, 3, 72, 48 }), + ngraph::Shape({ 4, 3, 72, 48 }) +}; + +const std::vector testValues = { + // Actual: + // Constant + // |FP32 Constant Constant Constant Constant + // | /FP32 /FP32 /FP32 /FP32 + // Parameter Constant FakeQuantize Constant + // |U8 |U8 |I8 /I8 + // | | | / + // Convert Convert Convert Convert + // \FP32 /FP32 |FP32 /FP32 + // \ / | / + // Subtract Constant Subtract Constant + // \FP32 /FP32 |FP32 /FP32 + // \ / | / + // Multiply Multiply + // \FP32 /FP32 + // \ / + // Convolution + // + // Transformed: + // + // Parameter Constant + // \U8 /U8 + // \ / + // Subtract Constant + // \FP32 /I8 + // \ / + // Convolution Constant + // \FP32 /FP32 + // \ / + // Multiply + { + LayerTransformation::createParamsU8I8().setSupportAsymmetricQuantization(true), + // ActualValues + { + ngraph::element::u8, + { + {ngraph::element::f32}, + { {127.f}, element::f32, {}, false, 1ul, element::u8, true }, + { {0.02f}, element::f32, {}, false } + }, + { + { ngraph::element::f32, false }, + { {127.f}, element::f32, {}, false, 1ul, element::i8, true }, + { {0.03f}, element::f32, {}, false } + }, + { std::vector{ 1.f }, ngraph::element::f32}, + { 255ul, Shape({ 1, 1, 1, 1 }), { -1.28f }, { 1.27f }, { -128.f }, { 127.f }, element::i8 }, + ngraph::element::f32, + {} + }, + // ExpectedValues + { + ngraph::element::u8, + { + {}, + { { 127.f }, ngraph::element::f32, { 1, 3, 1, 1 }, false }, + {} + }, + { + {}, + { { 127.f }, ngraph::element::f32, { 6, 1, 1, 1 }, false, 1ul, element::i8, false, { "DISABLED_CONSTANT_FOLDING" } }, + {} + }, + { std::vector{ 100.f }, ngraph::element::i8}, + {}, + ngraph::element::f32, + {{}, {}, {{ 0.0006f }, ngraph::element::f32, {1}}} + } + }, + + // Actual: + // + // Parameter Constant + // |U8 |U8 + // | | + // Convert Convert + // \FP32 /FP32 + // \ / + // Subtract Constant Constant + // \FP32 /FP32 |FP32 Constant Constant Constant Constant + // \ / | /FP32 /FP32 /FP32 /FP32 + // Multiply FakeQuantize + // \FP32 /FP32 + // \ / + // Convolution + // + // Transformed: + // + // Parameter Constant + // \U8 /U8 + // \ / + // Subtract Constant + // \FP32 /I8 + // \ / + // Convolution Constant + // \FP32 /FP32 + // \ / + // Multiply + { + LayerTransformation::createParamsU8I8().setSupportAsymmetricQuantization(true), + // ActualValues + { + ngraph::element::u8, + {{ngraph::element::f32}, { {127.f}, element::f32, {}, false, 1ul, element::u8, true }, { 0.02f }}, + {}, + { std::vector{ 2.f }, ngraph::element::f32}, + { 255ul, Shape({ 1, 1, 1, 1 }), { 0.f }, { 254.f }, { -1.27f }, { 1.27f } }, + ngraph::element::f32, + {} + }, + // ExpectedValues + { + ngraph::element::u8, + {{}, { { 127.f }, ngraph::element::f32, { 1, 3, 1, 1 }, false }, {}}, + {}, + { std::vector{ -125.f }, ngraph::element::i8}, + {}, + ngraph::element::f32, + {{}, {}, {{ 0.0002f }, ngraph::element::f32, { 1, 1, 1 }}} + } + }, + + // Actual & Transformed: + // + // Parameter Constant Constant Constant + // |U8 |U8 |FP32 |I8 + // | | | | + // Convert Convert Convert Convert + // \FP32 /FP32 |FP32 /FP32 + // \ / | / + // Subtract Constant Subtract Constant + // \FP32 /FP32 |FP32 /FP32 + // \ / | / + // Multiply Multiply + // \FP32 /FP32 + // \ / + // Convolution + { + LayerTransformation::createParamsU8I8().setSupportAsymmetricQuantization(true), + // ActualValues + { + ngraph::element::u8, + {{ngraph::element::f32}, { {127.f}, element::f32, {}, false, 1ul, element::u8, true }, { 0.02f }}, + {{ngraph::element::f32}, { {127.f}, element::f32, {}, false, 1ul, element::i8, true }, { 0.03f }}, + { std::vector{ 2.f }, ngraph::element::f32}, + {}, + ngraph::element::f32, + {} + }, + // ExpectedValues + { + ngraph::element::u8, + {{ngraph::element::f32}, { {127.f}, element::f32, {}, false, 1ul, element::u8, true }, { 0.02f }}, + {{ngraph::element::f32}, { {127.f}, element::f32, {}, false, 1ul, element::i8, true }, { 0.03f }}, + { std::vector{ 2.f }, ngraph::element::f32}, + {}, + ngraph::element::f32, + {} + } + }, + + // Actual: + // + // Parameter Constant Constant Constant + // |U8 |U8 |I8 |I8 + // | | | | + // Convert Convert Convert Convert + // \FP32 /FP32 |FP32 /FP32 + // \ / | / + // Subtract Constant Subtract Constant + // \FP32 /FP32 |FP32 /FP32 + // \ / | / + // Multiply Multiply + // \FP32 /FP32 + // \ / + // Convolution + // + // Transformed: + // + // Parameter Constant Constant Constant + // \U8 /U8 |I8 /I8 + // \ / | / + // Subtract Subtract + // \FP32 /FP32 + // \ / + // Convolution Constant + // \FP32 /FP32 + // \ / + // Multiply + { + LayerTransformation::createParamsU8I8().setSupportAsymmetricQuantization(true), + // ActualValues + { + ngraph::element::u8, + { + { ngraph::element::f32, false }, + { {127.f}, element::f32, {}, false, 1ul, element::u8, true }, + { {0.02f}, element::f32, {}, false } + }, + { + { ngraph::element::f32, false }, + { {127.f}, element::f32, {}, false, 1ul, element::i8, true }, + { {0.03f}, element::f32, {}, false } + }, + { std::vector{ 2.f }, ngraph::element::i8}, + {}, + ngraph::element::f32, + {} + }, + // ExpectedValues + { + ngraph::element::u8, + { + {}, + { { 127.f }, ngraph::element::f32, { 1, 3, 1, 1 }, false }, + {} + }, + { + {}, + { { 127.f }, ngraph::element::f32, { 6, 1, 1, 1 }, false, 1ul, element::i8, false, { "DISABLED_CONSTANT_FOLDING" } }, + {} + }, + { std::vector{ 2.f }, ngraph::element::i8}, + {}, + ngraph::element::f32, + {{}, {}, {{ 0.0006f }, ngraph::element::f32, { 1 }}} + } + }, + + // Actual: + // + // Parameter Constant Constant + // |U8 |U8 |I8 + // | | | + // Convert Convert Convert Constant + // \FP32 /FP32 |FP32 /FP32 + // \ / | / + // Subtract Constant Subtract Constant + // \FP32 /FP32 |FP32 /FP32 + // \ / | / + // Multiply Multiply + // \FP32 /FP32 + // \ / + // Convolution + // + // Transformed: + // + // Parameter Constant Constant Constant + // \U8 /U8 |I8 /I8 + // \ / | / + // Subtract Subtract + // \FP32 /FP32 + // \ / + // Convolution Constant + // \FP32 /FP32 + // \ / + // Multiply + { + LayerTransformation::createParamsU8I8().setSupportAsymmetricQuantization(true), + // ActualValues + { + ngraph::element::u8, + { + { ngraph::element::f32, false }, + { {127.f}, element::f32, {}, false, 1ul, element::u8, true }, + { {0.02f}, element::f32, {}, false } + }, + { + { ngraph::element::f32, false }, + { {127.f}, element::f32, {}, false }, + { {0.03f}, element::f32, {}, false } + }, + { std::vector{ 2.f }, ngraph::element::i8}, + {}, + ngraph::element::f32, + {} + }, + // ExpectedValues + { + ngraph::element::u8, + { + {}, + { { 127.f }, ngraph::element::f32, { 1, 3, 1, 1 }, false }, + {} + }, + { + {}, + { { 127.f }, ngraph::element::f32, { 6, 1, 1, 1 }, false, 1ul, element::i8, false, { "DISABLED_CONSTANT_FOLDING" } }, + {} + }, + { std::vector{ 2.f }, ngraph::element::i8}, + {}, + ngraph::element::f32, + {{}, {}, {{ 0.0006f }, ngraph::element::f32, { 1 }}} + } + } +}; + +INSTANTIATE_TEST_CASE_P( + smoke_LPT, + ConvolutionQDqTransformation, + ::testing::Combine( + ::testing::ValuesIn(shapes), + ::testing::ValuesIn(testValues)), + ConvolutionQDqTransformation::getTestCaseName); diff --git a/inference-engine/tests/functional/inference_engine/lp_transformations/convolution_transformation.cpp b/inference-engine/tests/functional/inference_engine/lp_transformations/convolution_transformation.cpp index ef8be7655b3ccd..f524d7a0402ad1 100644 --- a/inference-engine/tests/functional/inference_engine/lp_transformations/convolution_transformation.cpp +++ b/inference-engine/tests/functional/inference_engine/lp_transformations/convolution_transformation.cpp @@ -1,4 +1,4 @@ -// Copyright (C) 2020 Intel Corporation +// Copyright (C) 2020-2021 Intel Corporation // SPDX-License-Identifier: Apache-2.0 // @@ -27,7 +27,7 @@ class ConvolutionTransformationTestValues { class Actual { public: ngraph::element::Type precisionBeforeDequantization; - ngraph::builder::subgraph::DequantizationOperations dequantization; + ngraph::builder::subgraph::DequantizationOperations dequantizationOnActivations; std::shared_ptr weights; builder::subgraph::FakeQuantizeOnWeights fakeQuantizeOnWeights; }; @@ -61,7 +61,7 @@ class ConvolutionTransformation : public LayerTransformation, public testing::Wi actualFunction = ngraph::builder::subgraph::ConvolutionFunction::getOriginal( testValues.actual.precisionBeforeDequantization, inputShape, - testValues.actual.dequantization, + testValues.actual.dequantizationOnActivations, testValues.actual.weights, testValues.actual.fakeQuantizeOnWeights); @@ -70,14 +70,14 @@ class ConvolutionTransformation : public LayerTransformation, public testing::Wi transform.transform(actualFunction); referenceFunction = ngraph::builder::subgraph::ConvolutionFunction::getReference( - testValues.expected.precisionBeforeDequantization, - inputShape, - testValues.expected.dequantizationBefore, - testValues.expected.weights, - testValues.expected.fakeQuantizeOnWeights, - testValues.expected.precisionAfterOperation, - testValues.expected.dequantizationAfter, - testValues.expected.precisionAfterDequantization); + testValues.expected.precisionBeforeDequantization, + inputShape, + testValues.expected.dequantizationBefore, + testValues.expected.weights, + testValues.expected.fakeQuantizeOnWeights, + testValues.expected.precisionAfterOperation, + testValues.expected.dequantizationAfter, + testValues.expected.precisionAfterDequantization); } static std::string getTestCaseName(testing::TestParamInfo obj) { @@ -88,7 +88,7 @@ class ConvolutionTransformation : public LayerTransformation, public testing::Wi result << toString(testValues.params) << "_" << inputShape << "_" << testValues.actual.precisionBeforeDequantization << "_" << - testValues.actual.dequantization << "_" << "_weights_" << + testValues.actual.dequantizationOnActivations << "_" << "_weights_" << testValues.actual.weights->get_element_type() << "_" << "{ " << testValues.actual.weights->cast_vector()[0] << " }_" << testValues.actual.fakeQuantizeOnWeights << "_"; diff --git a/inference-engine/tests/functional/inference_engine/lp_transformations/convolution_with_incorrect_weights.cpp b/inference-engine/tests/functional/inference_engine/lp_transformations/convolution_with_incorrect_weights.cpp index 15e58bf3120266..e30ca586b4a7c9 100644 --- a/inference-engine/tests/functional/inference_engine/lp_transformations/convolution_with_incorrect_weights.cpp +++ b/inference-engine/tests/functional/inference_engine/lp_transformations/convolution_with_incorrect_weights.cpp @@ -1,4 +1,4 @@ -// Copyright (C) 2020 Intel Corporation +// Copyright (C) 2020-2021 Intel Corporation // SPDX-License-Identifier: Apache-2.0 // @@ -10,7 +10,7 @@ #include #include -#include +#include #include "common_test_utils/ngraph_test_utils.hpp" #include "lpt_ngraph_functions/common/dequantization_operations.hpp" @@ -26,23 +26,20 @@ class ConvolutionWIthIncorrectWeightsTestValues { public: class Actual { public: - ngraph::builder::subgraph::FakeQuantizeOnData fakeQuantizeOnData; + ngraph::builder::subgraph::DequantizationOperations dequantization; ngraph::builder::subgraph::FakeQuantizeOnWeights fakeQuantizeOnWeights; }; class Expected { public: - ngraph::element::Type dataPrecision; - ngraph::builder::subgraph::FakeQuantizeOnData fakeQuantizeOnData; ngraph::builder::subgraph::DequantizationOperations dequantizationBefore; ngraph::element::Type weightsPrecision; std::vector weightsValues; - ngraph::builder::subgraph::FakeQuantizeOnWeights fakeQuantizeOnWeights; ngraph::builder::subgraph::DequantizationOperations dequantizationAfter; }; + ngraph::element::Type inputPrecision; ngraph::Shape inputShape; - ngraph::element::Type precision; ngraph::pass::low_precision::LayerTransformation::Params params; bool isCorrect; Actual actual; @@ -58,25 +55,22 @@ class ConvolutionWIthIncorrectWeightsTransformation : actualFunction = ngraph::builder::subgraph::ConvolutionFunction::getOriginalWithIncorrectWeights( testValues.inputShape, - testValues.precision, + testValues.inputPrecision, testValues.actual.fakeQuantizeOnWeights, - testValues.actual.fakeQuantizeOnData, + testValues.actual.dequantization, testValues.isCorrect); SimpleLowPrecisionTransformer transform; transform.add(testValues.params); - transform.add(testValues.params); + transform.add(testValues.params); transform.transform(actualFunction); referenceFunction = ngraph::builder::subgraph::ConvolutionFunction::getReferenceWithIncorrectWeights( testValues.inputShape, - testValues.precision, - testValues.expected.dataPrecision, - testValues.expected.fakeQuantizeOnData, + testValues.inputPrecision, testValues.expected.dequantizationBefore, testValues.expected.weightsPrecision, testValues.expected.weightsValues, - testValues.expected.fakeQuantizeOnWeights, testValues.expected.dequantizationAfter, testValues.isCorrect); } @@ -102,42 +96,36 @@ TEST_P(ConvolutionWIthIncorrectWeightsTransformation, CompareFunctions) { const std::vector testValues = { // incorrect weights { + ngraph::element::u8, ngraph::Shape({ 1, 3, 224, 224 }), - ngraph::element::f32, LayerTransformation::createParamsU8I8(), - bool{ false }, + false, { - { 256ul, ngraph::Shape { 1, 1, 1, 1 }, { 0.f }, { 255.f }, { 0.f }, { 25.5f } }, + {ngraph::element::f32, {}, {0.1f}}, { 255ul, ngraph::Shape { 1, 1, 1, 1 }, { 0.f }, { 254.f }, { -127.f }, { 127.f } }, }, { - ngraph::element::u8, - { 256ul, ngraph::Shape { 1, 1, 1, 1 }, { 0.f }, { 255.f }, { 0.f }, { 255.f } }, - {{ngraph::element::f32}, {}, {0.1f}}, + {ngraph::element::f32, {}, {0.1f}}, ngraph::element::f32, - {1.f}, - { 255ul, ngraph::Shape { 1, 1, 1, 1 }, { 0.f }, { 254.f }, { -127.f }, { 127.f } }, + {-126.f}, {} }, }, // correct weights { + ngraph::element::u8, ngraph::Shape({ 1, 3, 224, 224 }), - ngraph::element::f32, LayerTransformation::createParamsU8I8(), true, { - { 256ul, ngraph::Shape { 1, 1, 1, 1 }, { 0.f }, { 255.f }, { 0.f }, { 25.5f } }, + {ngraph::element::f32, {}, {0.1f}}, { 255ul, ngraph::Shape { 1, 1, 1, 1 }, { 0.f }, { 254.f }, { -127.f }, { 127.f } }, }, { - ngraph::element::u8, - { 256ul, ngraph::Shape { 1, 1, 1, 1 }, { 0.f }, { 255.f }, { 0.f }, { 255.f } }, {}, ngraph::element::i8, {-126.f}, - {}, - {{}, {}, {0.1f}}, + {{}, {}, {{ 0.1f }, ngraph::element::f32, { 1, 1, 1 }}}, }, }, }; diff --git a/inference-engine/tests/functional/inference_engine/lp_transformations/depth_to_space_transformation.cpp b/inference-engine/tests/functional/inference_engine/lp_transformations/depth_to_space_transformation.cpp index 6d880a3ecb2e34..9fd98075bbb644 100644 --- a/inference-engine/tests/functional/inference_engine/lp_transformations/depth_to_space_transformation.cpp +++ b/inference-engine/tests/functional/inference_engine/lp_transformations/depth_to_space_transformation.cpp @@ -1,4 +1,4 @@ -// Copyright (C) 2020 Intel Corporation +// Copyright (C) 2020-2021 Intel Corporation // SPDX-License-Identifier: Apache-2.0 // @@ -116,6 +116,31 @@ const std::vector testValues = { {{ngraph::element::f32}, {0.32f}, {0.45f}} } }, + // blockSize = 2 + { + ngraph::Shape{ 1, 4, 3, 3 }, + DepthToSpace::DepthToSpaceMode::BLOCKS_FIRST, + 2, + LayerTransformation::createParamsU8I8(), + { + ngraph::element::u8, + { + {ngraph::element::f32}, + {{0.32f}, ngraph::element::f32, {}, false, 1, ngraph::element::u8, true}, + {0.45f} + } + }, + { + ngraph::element::u8, + {{}, {}, {}}, + ngraph::element::u8, + { + {ngraph::element::f32}, + {{0.32f}, ngraph::element::f32, {}, false, 1, ngraph::element::u8, true}, + {0.45f} + } + } + }, // blockSize = 3 { ngraph::Shape{ 1, 9, 3, 3 }, diff --git a/inference-engine/tests/functional/inference_engine/lp_transformations/disable_convert_on_const_path_transformation.cpp b/inference-engine/tests/functional/inference_engine/lp_transformations/disable_convert_on_const_path_transformation.cpp new file mode 100644 index 00000000000000..ceed0209b71d64 --- /dev/null +++ b/inference-engine/tests/functional/inference_engine/lp_transformations/disable_convert_on_const_path_transformation.cpp @@ -0,0 +1,171 @@ +// Copyright (C) 2021 Intel Corporation +// SPDX-License-Identifier: Apache-2.0 +// + +#include "layer_transformation.hpp" + +#include +#include +#include + +#include + +#include + +#include +#include +#include + +#include "common_test_utils/ngraph_test_utils.hpp" +#include "simple_low_precision_transformer.hpp" +#include "lpt_ngraph_functions/fake_quantize_and_convolution_function.hpp" + +using namespace testing; +using namespace ngraph; +using namespace ngraph::pass; + +class DisableConvertOnConstPathTransformationValues { +public: + class Values { + public: + ngraph::element::Type precisionBeforeDequantization; + ngraph::builder::subgraph::DequantizationOperations dequantizationOnActivations; + ngraph::builder::subgraph::DequantizationOperations dequantizationOnWeights; + ngraph::builder::subgraph::Constant weights; + builder::subgraph::FakeQuantizeOnWeights fakeQuantizeOnWeights; + ngraph::element::Type precisionAfterOperation; + ngraph::builder::subgraph::DequantizationOperations dequantizationAfter; + }; + + Values actual; + Values expected; +}; + +typedef std::tuple< + ngraph::Shape, + DisableConvertOnConstPathTransformationValues> DisableConvertOnConstPathTransformationParams; + +class DisableConvertOnConstPathTransformation : public LayerTransformation, public testing::WithParamInterface { +public: + void SetUp() override { + const auto inputShape = std::get<0>(GetParam()); + const auto testValues = std::get<1>(GetParam()); + + actualFunction = ngraph::builder::subgraph::FakeQuantizeAndConvolutionFunction::get( + testValues.actual.precisionBeforeDequantization, + inputShape, + {}, + {}, + testValues.actual.dequantizationOnActivations, + testValues.actual.weights, + testValues.actual.fakeQuantizeOnWeights, + {}, + testValues.actual.dequantizationOnWeights, + testValues.actual.dequantizationAfter); + + ngraph::pass::Manager manager; + manager.register_pass(); + manager.run_passes(actualFunction); + + referenceFunction = ngraph::builder::subgraph::FakeQuantizeAndConvolutionFunction::get( + testValues.actual.precisionBeforeDequantization, + inputShape, + {}, + {}, + testValues.expected.dequantizationOnActivations, + testValues.expected.weights, + testValues.expected.fakeQuantizeOnWeights, + {}, + testValues.expected.dequantizationOnWeights, + testValues.expected.dequantizationAfter); + } + + static std::string getTestCaseName(testing::TestParamInfo obj) { + auto inputShape = std::get<0>(obj.param); + DisableConvertOnConstPathTransformationValues testValues = std::get<1>(obj.param); + + std::ostringstream result; + result << + inputShape << "_" << + testValues.actual.precisionBeforeDequantization << "_" << + testValues.actual.dequantizationOnActivations << "_" << "_weights_" << + testValues.actual.weights.outPrecision << "_" << "{ " << + testValues.actual.weights.values[0] << " }_" << + testValues.actual.fakeQuantizeOnWeights << "_"; + return result.str(); + } +}; + +TEST_P(DisableConvertOnConstPathTransformation, CompareFunctions) { + actualFunction->validate_nodes_and_infer_types(); + auto res = compare_functions(referenceFunction, actualFunction, true, true, true); + ASSERT_TRUE(res.first) << res.second; +} + +const std::vector shapes = { ngraph::Shape({ 1, 3, 72, 48 }) }; + +const std::vector testValues = { + // Actual & Transformed: + // Constant + // |FP32 Constant Constant Constant Constant + // | /FP32 /FP32 /FP32 /FP32 + // Parameter Constant FakeQuantize + // |U8 |U8 |I8 + // | | | + // Convert Convert Convert Constant + // \FP32 /FP32 |FP32 /I8 + // \ / | / + // Subtract Constant Subtract Constant + // \FP32 /FP32 |FP32 /FP32 + // \ / | / + // Multiply Multiply + // \FP32 /FP32 + // \ / + // Convolution + { + // ActualValues + { + ngraph::element::u8, + { + {ngraph::element::f32}, + { {128.f}, element::f32, {}, false, 1ul, element::u8, true }, + { {0.02f}, element::f32, {}, false } + }, + { + { ngraph::element::f32, false }, + { {128.f}, element::f32, {}, false, 1ul, element::i8, true }, + { {0.03f}, element::f32, {}, false } + }, + { std::vector{ 1.f }, ngraph::element::f32}, + { 255ul, Shape({ 1, 1, 1, 1 }), { -1.28f }, { 1.27f }, { -128.f }, { 127.f }, element::i8 }, + ngraph::element::f32, + {} + }, + // ExpectedValues + { + ngraph::element::u8, + { + {ngraph::element::f32}, + { {128.f}, element::f32, {}, false, 1ul, element::u8, true, {}, { "DISABLED_CONSTANT_FOLDING" } }, + { {0.02f}, element::f32, {}, false } + }, + { + { ngraph::element::f32, false }, + { {128.f}, element::f32, {}, false, 1ul, element::i8, true, {}, { "DISABLED_CONSTANT_FOLDING" } }, + { {0.03f}, element::f32, {}, false } + }, + { std::vector{ 1.f }, ngraph::element::f32}, + { 255ul, Shape({ 1, 1, 1, 1 }), { -1.28f }, { 1.27f }, { -128.f }, { 127.f }, element::i8 }, + ngraph::element::f32, + {} + } + } +}; + +INSTANTIATE_TEST_CASE_P( + smoke_LPT, + DisableConvertOnConstPathTransformation, + ::testing::Combine( + ::testing::ValuesIn(shapes), + ::testing::ValuesIn(testValues)), + DisableConvertOnConstPathTransformation::getTestCaseName); diff --git a/inference-engine/tests/functional/inference_engine/lp_transformations/fake_quantize_and_two_output_branches_with_convolution.cpp b/inference-engine/tests/functional/inference_engine/lp_transformations/fake_quantize_and_two_output_branches_with_convolution.cpp index 462f57d7f3a638..d02d51f265efa0 100644 --- a/inference-engine/tests/functional/inference_engine/lp_transformations/fake_quantize_and_two_output_branches_with_convolution.cpp +++ b/inference-engine/tests/functional/inference_engine/lp_transformations/fake_quantize_and_two_output_branches_with_convolution.cpp @@ -1,4 +1,4 @@ -// Copyright (C) 2020 Intel Corporation +// Copyright (C) 2020-2021 Intel Corporation // SPDX-License-Identifier: Apache-2.0 // @@ -12,7 +12,7 @@ #include #include -#include +#include #include "common_test_utils/ngraph_test_utils.hpp" @@ -75,7 +75,7 @@ class FakeQuantizeAndTwoOutputBranchesWithConvolutionTransformation : testValues.actual.fqOnWeights2); SimpleLowPrecisionTransformer transform; - transform.add(testValues.params); + transform.add(testValues.params); transform.add(testValues.params); transform.transform(actualFunction); diff --git a/inference-engine/tests/functional/inference_engine/lp_transformations/fake_quantize_on_weights_with_unsupported_child.cpp b/inference-engine/tests/functional/inference_engine/lp_transformations/fake_quantize_on_weights_with_unsupported_child.cpp new file mode 100644 index 00000000000000..34e5bf397b5607 --- /dev/null +++ b/inference-engine/tests/functional/inference_engine/lp_transformations/fake_quantize_on_weights_with_unsupported_child.cpp @@ -0,0 +1,128 @@ +// Copyright (C) 2020 Intel Corporation +// SPDX-License-Identifier: Apache-2.0 +// + +#include "layer_transformation.hpp" + +#include +#include +#include + +#include + +#include +#include + +#include "common_test_utils/ngraph_test_utils.hpp" +#include "simple_low_precision_transformer.hpp" +#include "lpt_ngraph_functions/fake_quantize_on_weights_and_unsupported_child_function.hpp" + +using namespace testing; +using namespace ngraph; +using namespace ngraph::pass; + +class FakeQuantizeOnWeightsWithUnsupportedChildTestValues { +public: + class Actual { + public: + std::shared_ptr weights; + builder::subgraph::FakeQuantizeOnWeights fakeQuantizeOnWeights; + }; + + class Expected { + public: + std::shared_ptr weights; + builder::subgraph::FakeQuantizeOnWeights fakeQuantizeOnWeights; + }; + + ngraph::pass::low_precision::LayerTransformation::Params params; + ngraph::element::Type precision; + Actual actual; + Expected expected; +}; + +typedef std::tuple< + ngraph::Shape, + FakeQuantizeOnWeightsWithUnsupportedChildTestValues> FakeQuantizeOnWeightsWithUnsupportedChildParams; + +class FakeQuantizeOnWeightsWithUnsupportedChild : + public LayerTransformation, + public testing::WithParamInterface { +public: + void SetUp() override { + const auto inputShape = std::get<0>(GetParam()); + const auto testValues = std::get<1>(GetParam()); + + actualFunction = ngraph::builder::subgraph::FakeQuantizeOnWeightsAndUnsupportedChildFunction::get( + inputShape, + testValues.precision, + testValues.actual.weights, + testValues.actual.fakeQuantizeOnWeights); + + SimpleLowPrecisionTransformer transform; + transform.add(testValues.params); + transform.transform(actualFunction); + + referenceFunction = ngraph::builder::subgraph::FakeQuantizeOnWeightsAndUnsupportedChildFunction::get( + inputShape, + testValues.precision, + testValues.expected.weights, + testValues.expected.fakeQuantizeOnWeights); + } + + static std::string getTestCaseName(testing::TestParamInfo obj) { + auto inputShape = std::get<0>(obj.param); + FakeQuantizeOnWeightsWithUnsupportedChildTestValues testValues = std::get<1>(obj.param); + + std::ostringstream result; + result << toString(testValues.params) << "_" << + inputShape << "_" << testValues.precision << testValues.actual.fakeQuantizeOnWeights; + return result.str(); + } +}; + +TEST_P(FakeQuantizeOnWeightsWithUnsupportedChild, CompareFunctions) { + actualFunction->validate_nodes_and_infer_types(); + auto res = compare_functions(referenceFunction, actualFunction, true, true, true); + ASSERT_TRUE(res.first) << res.second; +} + +const std::vector shapes = { + ngraph::Shape({ 1, 3, 72, 48 }), + ngraph::Shape({ 4, 3, 72, 48 }) +}; + +const std::vector testValues = { + { + LayerTransformation::createParamsU8I8(), + ngraph::element::f32, + { + op::Constant::create(ngraph::element::f32, ngraph::Shape{ 3, 1, 1, 1 }, std::vector{ 1.f }), + { 255ul, Shape({ 1, 1, 1, 1 }), { 0.f }, { 254.f }, { -127.f }, { 127.f } } + }, + { + op::Constant::create(ngraph::element::f32, ngraph::Shape{ 3, 1, 1, 1 }, std::vector{ -126.f }), + {}, + } + }, + { + LayerTransformation::createParamsU8U8(), + ngraph::element::f32, + { + op::Constant::create(ngraph::element::f32, ngraph::Shape{ 3, 1, 1, 1 }, std::vector{ 1.f }), + { 255ul, Shape({ 1, 1, 1, 1 }), { 0.f }, { 254.f }, { 0.f }, { 254.f } } + }, + { + op::Constant::create(ngraph::element::f32, ngraph::Shape{ 3, 1, 1, 1 }, std::vector{ 1.f }), + {}, + } + }, +}; + +INSTANTIATE_TEST_CASE_P( + smoke_LPT, + FakeQuantizeOnWeightsWithUnsupportedChild, + ::testing::Combine( + ::testing::ValuesIn(shapes), + ::testing::ValuesIn(testValues)), + FakeQuantizeOnWeightsWithUnsupportedChild::getTestCaseName); diff --git a/inference-engine/tests/functional/inference_engine/lp_transformations/fake_quantize_precision_selection_transformation.cpp b/inference-engine/tests/functional/inference_engine/lp_transformations/fake_quantize_precision_selection_transformation.cpp index 05ffb0cc7ff5ff..2bf23ce03d5f57 100644 --- a/inference-engine/tests/functional/inference_engine/lp_transformations/fake_quantize_precision_selection_transformation.cpp +++ b/inference-engine/tests/functional/inference_engine/lp_transformations/fake_quantize_precision_selection_transformation.cpp @@ -1,20 +1,18 @@ -// Copyright (C) 2020 Intel Corporation +// Copyright (C) 2020-2021 Intel Corporation // SPDX-License-Identifier: Apache-2.0 // #include "layer_transformation.hpp" -#include -#include -#include +#include #include +#include #include -#include #include #include -#include +#include #include #include "common_test_utils/ngraph_test_utils.hpp" @@ -93,7 +91,7 @@ class FakeQuantizePrecisionSelectionTransformation : public LayerTransformation, SimpleLowPrecisionTransformer transform; transform.add(params); transform.add(precisionLimitedOperationParams); - transform.add(params); + transform.add(params); transform.add(params); transform.transform(actualFunction); diff --git a/inference-engine/tests/functional/inference_engine/lp_transformations/fake_quantize_transformation.cpp b/inference-engine/tests/functional/inference_engine/lp_transformations/fake_quantize_transformation.cpp index f05bf5ed7048f7..dc5ffb1babe37d 100644 --- a/inference-engine/tests/functional/inference_engine/lp_transformations/fake_quantize_transformation.cpp +++ b/inference-engine/tests/functional/inference_engine/lp_transformations/fake_quantize_transformation.cpp @@ -1,18 +1,17 @@ -// Copyright (C) 2020 Intel Corporation +// Copyright (C) 2020-2021 Intel Corporation // SPDX-License-Identifier: Apache-2.0 // #include "layer_transformation.hpp" -#include #include #include #include +#include #include -#include -#include +#include #include "common_test_utils/ngraph_test_utils.hpp" @@ -75,7 +74,7 @@ class FakeQuantizeTransformation : public LayerTransformation, public testing::W fakeQuantizeOnData.actual); SimpleLowPrecisionTransformer transform; - transform.add(params); + transform.add(params); transform.transform(actualFunction); referenceFunction = ngraph::builder::subgraph::FakeQuantizeFunction::getReference( diff --git a/inference-engine/tests/functional/inference_engine/lp_transformations/fake_quantize_with_dq_not_optimal_transformation.cpp b/inference-engine/tests/functional/inference_engine/lp_transformations/fake_quantize_with_dq_not_optimal_transformation.cpp new file mode 100644 index 00000000000000..b6fc476a60b6a4 --- /dev/null +++ b/inference-engine/tests/functional/inference_engine/lp_transformations/fake_quantize_with_dq_not_optimal_transformation.cpp @@ -0,0 +1,216 @@ +// Copyright (C) 2021 Intel Corporation +// SPDX-License-Identifier: Apache-2.0 +// + +#include "layer_transformation.hpp" + +#include +#include +#include +#include + +#include + +#include +#include + +#include "common_test_utils/ngraph_test_utils.hpp" +#include "simple_low_precision_transformer.hpp" + +#include "lpt_ngraph_functions/fake_quantize_and_convolution_function.hpp" +#include "lpt_ngraph_functions/common/dequantization_operations.hpp" +#include "lpt_ngraph_functions/common/constant.hpp" +#include "lpt_ngraph_functions/common/fake_quantize_on_data.hpp" +#include "lpt_ngraph_functions/common/fake_quantize_on_weights.hpp" + +using namespace testing; +using namespace ngraph; +using namespace ngraph::pass; + +class FakeQuantizeWithNotOptimalTransformationTestValues { +public: + class Values { + public: + builder::subgraph::FakeQuantizeOnDataWithConstant fqOnData; + builder::subgraph::DequantizationOperations::Convert convertOnData; + builder::subgraph::DequantizationOperations dequantizationOnData; + builder::subgraph::Constant constantOnWeights; + builder::subgraph::FakeQuantizeOnWeights fqOnWeights; + builder::subgraph::DequantizationOperations dequantizationOnWeights; + builder::subgraph::DequantizationOperations dequantizationAfter; + }; + low_precision::LayerTransformation::Params params; + Values actual; + Values expected; +}; + +inline std::ostream& operator<<(std::ostream& out, const FakeQuantizeWithNotOptimalTransformationTestValues& testValue) { + return out << "_" << + testValue.actual.fqOnData << "_" << testValue.actual.fqOnWeights << + testValue.expected.fqOnData << "_" << testValue.expected.fqOnWeights; +} + +typedef std::tuple< + ngraph::element::Type, + ngraph::Shape, + bool, + FakeQuantizeWithNotOptimalTransformationTestValues> FakeQuantizeWithNotOptimalTransformationParams; + +class FakeQuantizeWithNotOptimalTransformation : + public LayerTransformation, + public testing::WithParamInterface { +public: + void SetUp() override { + const ngraph::element::Type precision = std::get<0>(GetParam()); + const ngraph::Shape shape = std::get<1>(GetParam()); + const bool updatePrecision = std::get<2>(GetParam()); + const FakeQuantizeWithNotOptimalTransformationTestValues testValues = std::get<3>(GetParam()); + + const low_precision::LayerTransformation::Params params = low_precision::LayerTransformation::Params(testValues.params). + setUpdatePrecisions(updatePrecision); + + actualFunction = ngraph::builder::subgraph::FakeQuantizeAndConvolutionFunction::get( + precision, + shape, + testValues.actual.fqOnData, + testValues.actual.convertOnData, + testValues.actual.dequantizationOnData, + testValues.actual.constantOnWeights, + testValues.actual.fqOnWeights, + {}, + testValues.actual.dequantizationOnWeights, + testValues.actual.dequantizationAfter); + + SimpleLowPrecisionTransformer transformer; + transformer.add( + low_precision::LayerTransformation::Params(params).setPrecisionsOnActivations({ element::u8 })); + transformer.add(params); + transformer.transform(actualFunction); + + referenceFunction = ngraph::builder::subgraph::FakeQuantizeAndConvolutionFunction::get( + precision, + shape, + testValues.expected.fqOnData, + {}, + testValues.expected.dequantizationOnData, + testValues.expected.constantOnWeights, + testValues.expected.fqOnWeights, + {}, + testValues.expected.dequantizationOnWeights, + testValues.expected.dequantizationAfter); + } + + static std::string getTestCaseName(testing::TestParamInfo obj) { + ngraph::element::Type precision; + ngraph::Shape shape; + bool updatePrecision; + FakeQuantizeWithNotOptimalTransformationTestValues fakeQuantizeOnData; + std::tie(precision, shape, updatePrecision, fakeQuantizeOnData) = obj.param; + + std::ostringstream result; + result << LayerTransformation::getTestCaseNameByParams(precision, shape, fakeQuantizeOnData.params) << + (updatePrecision ? "" : "_notUpdatePrecision_") << + fakeQuantizeOnData; + return result.str(); + } +}; + +TEST_P(FakeQuantizeWithNotOptimalTransformation, CompareFunctions) { + actualFunction->validate_nodes_and_infer_types(); + auto res = compare_functions(referenceFunction, actualFunction, true, true, true); + ASSERT_TRUE(res.first) << res.second; +} + +const std::vector precisions = { + ngraph::element::f32, + //ngraph::element::i32, + //ngraph::element::f16 +}; + +const std::vector updatePrecisions = { true/*, false*/ }; + +const std::vector fakeQuantizeTransformationTestValues = { + // Actual: + // + // FakeQuantize + // |FP32 + // | + // Convert Constant + // |I8 |I8 + // | | + // Convert Convert + // \FP32 /FP32 + // \ / + // Subtract Constant Constant + // \FP32 /FP32 |FP32 Constant Constant Constant Constant + // \ / | /FP32 /FP32 /FP32 /FP32 + // Multiply FakeQuantize + // \FP32 /FP32 + // \ / + // Convolution + // + // Transformed: + // + // FakeQuantize Constant + // \U8 /U8 + // \ / + // Subtract Constant + // \FP32 /I8 + // \ / + // Convolution Constant + // \FP32 /FP32 + // \ / + // Multiply + { + LayerTransformation::createParamsU8I8AndI8(), + { + { 256ul, {{ 1, 1, 1, 1 }}, { 0.f }, { 2.55f }, { -128.f }, { 127.f }, ngraph::element::i8 }, + { ngraph::element::i8, false }, + { + { ngraph::element::f32, false }, + { {-128.f}, ngraph::element::f32, {}, false, 1ul, ngraph::element::i8, true }, + { {0.01f}, ngraph::element::f32, {}, false } + }, + {{5.f}, ngraph::element::i8}, + {}, + { + { ngraph::element::f32, false }, + { {127.f}, ngraph::element::f32, {}, false, 1ul, ngraph::element::i8, true }, + { {0.03f}, ngraph::element::f32, {}, false } + }, + {} + }, + { + { 256ul, {{ 1, 1, 1, 1 }, { 1, 1, 1, 1 }, {}, {}}, { 0.f }, { 2.55f }, { 0.f }, { 255.f }, ngraph::element::u8 }, + { ngraph::element::u8, false }, + {}, + {{5.f}, ngraph::element::i8}, + {}, + { + {}, + { std::vector(64, 127.f), ngraph::element::f32, {64, 1, 1, 1}, false, 1ul, ngraph::element::i8, false, {"DISABLED_CONSTANT_FOLDING"}}, + {} + }, + { + { }, + { }, + { {0.0003f}, ngraph::element::f32, {1}} + } + }, + } +}; + +const std::vector shapes = { + { 1, 32, 72, 48 }, + // TODO: 3D tensor +}; + +INSTANTIATE_TEST_CASE_P( + smoke_LPT, + FakeQuantizeWithNotOptimalTransformation, + ::testing::Combine( + ::testing::ValuesIn(precisions), + ::testing::ValuesIn(shapes), + ::testing::ValuesIn(updatePrecisions), + ::testing::ValuesIn(fakeQuantizeTransformationTestValues)), + FakeQuantizeWithNotOptimalTransformation::getTestCaseName); diff --git a/inference-engine/tests/functional/inference_engine/lp_transformations/fake_quantize_with_dynamic_intervals_transformation.cpp b/inference-engine/tests/functional/inference_engine/lp_transformations/fake_quantize_with_dynamic_intervals_transformation.cpp index a83e0128e3a6bf..1de0abcf37baac 100644 --- a/inference-engine/tests/functional/inference_engine/lp_transformations/fake_quantize_with_dynamic_intervals_transformation.cpp +++ b/inference-engine/tests/functional/inference_engine/lp_transformations/fake_quantize_with_dynamic_intervals_transformation.cpp @@ -1,4 +1,4 @@ -// Copyright (C) 2020 Intel Corporation +// Copyright (C) 2020-2021 Intel Corporation // SPDX-License-Identifier: Apache-2.0 // @@ -6,12 +6,10 @@ #include #include -#include #include +#include #include - -#include #include #include "common_test_utils/ngraph_test_utils.hpp" diff --git a/inference-engine/tests/functional/inference_engine/lp_transformations/fold_convert_transformation.cpp b/inference-engine/tests/functional/inference_engine/lp_transformations/fold_convert_transformation.cpp new file mode 100644 index 00000000000000..24c34da3bcdb8e --- /dev/null +++ b/inference-engine/tests/functional/inference_engine/lp_transformations/fold_convert_transformation.cpp @@ -0,0 +1,131 @@ +// Copyright (C) 2021 Intel Corporation +// SPDX-License-Identifier: Apache-2.0 +// + +#include "layer_transformation.hpp" + +#include +#include +#include + +#include + +#include +#include +#include + +#include "common_test_utils/ngraph_test_utils.hpp" +#include "simple_low_precision_transformer.hpp" + +#include +#include "lpt_ngraph_functions/common/builders.hpp" +#include "lpt_ngraph_functions/common/dequantization_operations.hpp" + +using namespace testing; +using namespace ngraph::pass; +using namespace ngraph::builder::subgraph; + +class FoldConvertTransformationTestValues { +public: + ngraph::pass::low_precision::LayerTransformation::Params params; + ngraph::element::Type precision; + ngraph::Shape inputShape; + ngraph::builder::subgraph::DequantizationOperations dequantizationActual; + ngraph::builder::subgraph::DequantizationOperations dequantizationExpected; +}; + +class FoldConvertTransformation : public LayerTransformation, public testing::WithParamInterface { +public: + void SetUp() override { + const FoldConvertTransformationTestValues testValues = GetParam(); + + const auto createFunction = []( + const ngraph::element::Type precision, + const ngraph::Shape& inputShape, + const ngraph::builder::subgraph::DequantizationOperations& dequantization) -> std::shared_ptr { + auto input = std::make_shared(precision, inputShape); + std::shared_ptr output = makeDequantization(input, dequantization); + output->set_friendly_name("output"); + + return std::make_shared( + ngraph::ResultVector{ std::make_shared(output) }, + ngraph::ParameterVector{ input }, + "FoldConvertTransformation"); + }; + actualFunction = createFunction(testValues.precision, testValues.inputShape, testValues.dequantizationActual); + + SimpleLowPrecisionTransformer transform; + transform.add( + low_precision::LayerTransformation::Params(testValues.params)); + transform.transform(actualFunction); + + referenceFunction = createFunction(testValues.precision, testValues.inputShape, testValues.dequantizationExpected); + } + + static std::string getTestCaseName(testing::TestParamInfo obj) { + const FoldConvertTransformationTestValues testValues = obj.param; + + std::ostringstream result; + result << + testValues.precision << "_" << + testValues.inputShape << "_" << + testValues.dequantizationActual << "_" << + testValues.dequantizationExpected; + return result.str(); + } +}; + +TEST_P(FoldConvertTransformation, CompareFunctions) { + actualFunction->validate_nodes_and_infer_types(); + auto res = compare_functions(referenceFunction, actualFunction, true, true, true); + ASSERT_TRUE(res.first) << res.second; +} + +const std::vector testValues = { + // Actual: + // + // Parameter Constant + // |U8 |U8 + // | | + // Convert Convert + // \FP32 /FP32 + // \ / + // Subtract Constant + // \FP32 /FP32 + // \ / + // Multiply + // + // Transformed: + // + // Parameter + // |U8 + // | + // Convert Constant + // \FP32 /FP32 + // \ / + // Subtract Constant + // \FP32 /FP32 + // \ / + // Multiply + { + LayerTransformation::createParamsU8I8(), + ngraph::element::f32, + ngraph::Shape{1, 4, 16, 16}, + { + {ngraph::element::f32}, + { {7.f}, ngraph::element::f32, {}, false, 1, ngraph::element::u8, true }, + { 10.f } + }, + { + {ngraph::element::f32}, + { {7.f}, ngraph::element::f32, {}, false, 1 }, + { 10.f } + } + } +}; + +INSTANTIATE_TEST_CASE_P( + smoke_LPT, + FoldConvertTransformation, + ::testing::ValuesIn(testValues), + FoldConvertTransformation::getTestCaseName); diff --git a/inference-engine/tests/functional/inference_engine/lp_transformations/fold_fake_quantize_in_transformations.cpp b/inference-engine/tests/functional/inference_engine/lp_transformations/fold_fake_quantize_in_transformations.cpp index 4afe614a2f783d..750e1bad4a8295 100644 --- a/inference-engine/tests/functional/inference_engine/lp_transformations/fold_fake_quantize_in_transformations.cpp +++ b/inference-engine/tests/functional/inference_engine/lp_transformations/fold_fake_quantize_in_transformations.cpp @@ -1,4 +1,4 @@ -// Copyright (C) 2020 Intel Corporation +// Copyright (C) 2020-2021 Intel Corporation // SPDX-License-Identifier: Apache-2.0 // @@ -6,12 +6,10 @@ #include #include -#include #include +#include #include - -#include #include #include "common_test_utils/ngraph_test_utils.hpp" diff --git a/inference-engine/tests/functional/inference_engine/lp_transformations/fuse_fake_quantize_transformation.cpp b/inference-engine/tests/functional/inference_engine/lp_transformations/fuse_fake_quantize_transformation.cpp index 1b0fe309f8cef5..a4a4a7b300f4ba 100644 --- a/inference-engine/tests/functional/inference_engine/lp_transformations/fuse_fake_quantize_transformation.cpp +++ b/inference-engine/tests/functional/inference_engine/lp_transformations/fuse_fake_quantize_transformation.cpp @@ -1,4 +1,4 @@ -// Copyright (C) 2020 Intel Corporation +// Copyright (C) 2020-2021 Intel Corporation // SPDX-License-Identifier: Apache-2.0 // @@ -14,6 +14,7 @@ #include #include #include +#include #include "lpt_ngraph_functions/common/add.hpp" #include "lpt_ngraph_functions/common/fake_quantize_on_data.hpp" #include "lpt_ngraph_functions/common/dequantization_operations.hpp" @@ -74,6 +75,9 @@ class FuseFakeQuantizeTransformation : public LayerTransformation, public testin testValues.actual.fakeQuantizeOnData); SimpleLowPrecisionTransformer transformer; + transformer.add(testValues.params); + transformer.transform(actualFunction); + transformer.add(testValues.params); transformer.transform(actualFunction); diff --git a/inference-engine/tests/functional/inference_engine/lp_transformations/get_dequantization_below_transformation.cpp b/inference-engine/tests/functional/inference_engine/lp_transformations/get_dequantization_below_transformation.cpp new file mode 100644 index 00000000000000..f96b0f1dbe3c9f --- /dev/null +++ b/inference-engine/tests/functional/inference_engine/lp_transformations/get_dequantization_below_transformation.cpp @@ -0,0 +1,134 @@ +// Copyright (C) 2021 Intel Corporation +// SPDX-License-Identifier: Apache-2.0 +// + +#include "layer_transformation.hpp" + +#include +#include +#include +#include +#include + +#include +#include + +#include "common_test_utils/ngraph_test_utils.hpp" +#include "lpt_ngraph_functions/get_dequantization_function.hpp" +#include +#include "low_precision/network_helper.hpp" + +using namespace testing; +using namespace ngraph; +using namespace ngraph::pass; + +class GetDequantizationBelowTestValues { +public: + builder::subgraph::FakeQuantizeOnData fakeQuantize; + builder::subgraph::DequantizationOperations dequantization; +}; + +inline std::ostream& operator<<(std::ostream& os, const std::vector& values) { + os << "{ "; + for (size_t i = 0; i < values.size(); ++i) { + os << values[i]; + if (i != (values.size() - 1ul)) { + os << ", "; + } + } + os << " }"; + return os; +} + +inline std::ostream& operator<<(std::ostream& out, const GetDequantizationBelowTestValues& testValue) { + return out << "_" << testValue.fakeQuantize << "_" << testValue.dequantization; +} + +typedef std::tuple< + ngraph::element::Type, + ngraph::Shape, + GetDequantizationBelowTestValues> GetDequantizationBelowParams; + +class GetDequantizationBelowTransformation : public LayerTransformation, public testing::WithParamInterface { +public: + void SetUp() override { + const ngraph::element::Type precision = std::get<0>(GetParam()); + const ngraph::Shape shape = std::get<1>(GetParam()); + const GetDequantizationBelowTestValues testValues = std::get<2>(GetParam()); + + auto const function = ngraph::builder::subgraph::GetDequantizationFunction::get( + precision, + shape, + testValues.fakeQuantize, + testValues.dequantization); + + auto const fakeQuantize = function->get_parameters()[0]->output(0).get_target_inputs().begin()->get_node()->shared_from_this(); + auto dequantization = ngraph::pass::low_precision::NetworkHelper::getDequantizationBelow(fakeQuantize); + + actualFunction = ngraph::builder::subgraph::GetDequantizationFunction::get( + precision, + shape, + testValues.fakeQuantize, + dequantization); + + referenceFunction = ngraph::builder::subgraph::GetDequantizationFunction::get( + precision, + shape, + testValues.fakeQuantize, + testValues.dequantization); + } + + static std::string getTestCaseName(testing::TestParamInfo obj) { + ngraph::element::Type precision; + ngraph::Shape shape; + GetDequantizationBelowTestValues testValues; + std::tie(precision, shape, testValues) = obj.param; + + std::ostringstream result; + result << precision << "_" << shape << "_" << testValues; + return result.str(); + } +}; + +TEST_P(GetDequantizationBelowTransformation, CompareFunctions) { + actualFunction->validate_nodes_and_infer_types(); + auto res = compare_functions(referenceFunction, actualFunction, true, true, true); + ASSERT_TRUE(res.first) << res.second; +} + +const std::vector precisions = { + ngraph::element::f32, +}; + +const std::vector testValues = { + { + { 256ul, {}, { 0.f }, { 2.55f }, { 0.f }, { 2.55f }, ngraph::element::u8 }, + { ngraph::element::f32, {}, { 0.01f } } + }, + { + { 256ul, {}, { 0.f }, { 2.55f }, { 0.f }, { 2.55f }, ngraph::element::u8 }, + { ngraph::element::f32, { 127.f }, { 0.01f } } + }, + { + { 256ul, {}, { 0.f }, { 2.55f }, { 0.f }, { 2.55f }, ngraph::element::u8 }, + { + ngraph::element::f32, + {{ 127.f }, ngraph::element::f32, {}, false, 1, ngraph::element::u8, true}, + { 0.01f } + } + } +}; + +const std::vector shapes = { + { 1, 32, 72, 48 }, + // TODO: 3D tensor +}; + +INSTANTIATE_TEST_CASE_P( + smoke_LPT, + GetDequantizationBelowTransformation, + ::testing::Combine( + ::testing::ValuesIn(precisions), + ::testing::ValuesIn(shapes), + ::testing::ValuesIn(testValues)), + GetDequantizationBelowTransformation::getTestCaseName); diff --git a/inference-engine/tests/functional/inference_engine/lp_transformations/get_dequantization_test.cpp b/inference-engine/tests/functional/inference_engine/lp_transformations/get_dequantization_test.cpp index d4d4eafced6f21..1086b5c4f2464d 100644 --- a/inference-engine/tests/functional/inference_engine/lp_transformations/get_dequantization_test.cpp +++ b/inference-engine/tests/functional/inference_engine/lp_transformations/get_dequantization_test.cpp @@ -1,4 +1,4 @@ -// Copyright (C) 2020 Intel Corporation +// Copyright (C) 2020-2021 Intel Corporation // SPDX-License-Identifier: Apache-2.0 // @@ -28,7 +28,7 @@ typedef std::tuple< // mulDataInput size_t> GetDequantizationTestValues; -class GetDequantizationTransformation : public LayerTransformation, public testing::WithParamInterface { +class GetDequantizationTestTransformation : public LayerTransformation, public testing::WithParamInterface { public: void SetUp() override { bool isConvert; @@ -70,7 +70,7 @@ std::vector subDataInput = { 0ul, 1ul }; std::vector mulDataInput = { 0ul, 1ul }; -TEST_P(GetDequantizationTransformation, CompareFunctions) { +TEST_P(GetDequantizationTestTransformation, CompareFunctions) { InitNodeInfo().run_on_function(actualFunction); actualFunction->validate_nodes_and_infer_types(); @@ -78,11 +78,11 @@ TEST_P(GetDequantizationTransformation, CompareFunctions) { ASSERT_TRUE(res.first) << res.second; } -INSTANTIATE_TEST_CASE_P(smoke_LPT, GetDequantizationTransformation, +INSTANTIATE_TEST_CASE_P(smoke_LPT, GetDequantizationTestTransformation, ::testing::Combine( ::testing::ValuesIn(isConvert), ::testing::ValuesIn(isSubtract), ::testing::ValuesIn(subDataInput), ::testing::ValuesIn(mulDataInput)), - GetDequantizationTransformation::getTestCaseName); + GetDequantizationTestTransformation::getTestCaseName); } // namespace diff --git a/inference-engine/tests/functional/inference_engine/lp_transformations/get_dequantization_transformation.cpp b/inference-engine/tests/functional/inference_engine/lp_transformations/get_dequantization_transformation.cpp new file mode 100644 index 00000000000000..72e9a2b453dd8e --- /dev/null +++ b/inference-engine/tests/functional/inference_engine/lp_transformations/get_dequantization_transformation.cpp @@ -0,0 +1,165 @@ +// Copyright (C) 2021 Intel Corporation +// SPDX-License-Identifier: Apache-2.0 +// + +#include "layer_transformation.hpp" + +#include +#include +#include +#include +#include + +#include +#include + +#include "common_test_utils/ngraph_test_utils.hpp" +#include "lpt_ngraph_functions/get_dequantization_function.hpp" +#include +#include "low_precision/network_helper.hpp" + +using namespace testing; +using namespace ngraph; +using namespace ngraph::pass; + +class GetDequantizationTestValues { +public: + builder::subgraph::FakeQuantizeOnData fakeQuantize; + // actual dequantization to create nGraph function to run NetworkHelper::getDequantization + builder::subgraph::DequantizationOperations actualDequantization; + builder::subgraph::DequantizationOperations expectedDequantization; +}; + +inline std::ostream& operator<<(std::ostream& os, const std::vector& values) { + os << "{ "; + for (size_t i = 0; i < values.size(); ++i) { + os << values[i]; + if (i != (values.size() - 1ul)) { + os << ", "; + } + } + os << " }"; + return os; +} + +inline std::ostream& operator<<(std::ostream& out, const GetDequantizationTestValues& testValue) { + return out << "_" << testValue.fakeQuantize << "_" << testValue.actualDequantization; +} + +typedef std::tuple< + ngraph::element::Type, + ngraph::Shape, + GetDequantizationTestValues> GetDequantizationParams; + +class GetDequantizationTransformation : public LayerTransformation, public testing::WithParamInterface { +public: + void SetUp() override { + const ngraph::element::Type precision = std::get<0>(GetParam()); + const ngraph::Shape shape = std::get<1>(GetParam()); + const GetDequantizationTestValues testValues = std::get<2>(GetParam()); + + actualFunction = ngraph::builder::subgraph::GetDequantizationFunction::get( + precision, + shape, + testValues.fakeQuantize, + testValues.actualDequantization); + + const auto output = actualFunction->get_output_op(0); + auto dequantization = ngraph::pass::low_precision::NetworkHelper::getDequantization(output); + } + + static std::string getTestCaseName(testing::TestParamInfo obj) { + ngraph::element::Type precision; + ngraph::Shape shape; + GetDequantizationTestValues testValues; + std::tie(precision, shape, testValues) = obj.param; + + std::ostringstream result; + result << precision << "_" << shape << "_" << testValues; + return result.str(); + } +}; + +TEST_P(GetDequantizationTransformation, CompareFunctions) { + const GetDequantizationTestValues testValues = std::get<2>(GetParam()); + + const auto output = actualFunction->get_output_op(0); + const ngraph::pass::low_precision::FakeQuantizeDequantization dequantization = ngraph::pass::low_precision::NetworkHelper::getDequantization(output); + ngraph::builder::subgraph::DequantizationOperations actualDequantization = toDequantizationOperations(dequantization); + actualDequantization.subtract.constantShapeIsDefined = testValues.expectedDequantization.subtract.constantShapeIsDefined; + actualDequantization.subtract.outPrecision = testValues.expectedDequantization.subtract.outPrecision; + actualDequantization.multiply.constantShapeIsDefined = testValues.expectedDequantization.multiply.constantShapeIsDefined; + actualDequantization.multiply.outPrecision = testValues.expectedDequantization.multiply.outPrecision; + ASSERT_TRUE(actualDequantization == testValues.expectedDequantization); +} + +const std::vector precisions = { + ngraph::element::f32, +}; + +const std::vector testValues = { + { + { 256ul, {}, { 0.f }, { 2.55f }, { 0.f }, { 2.55f }, ngraph::element::u8 }, + { ngraph::element::f32, {}, { 0.01f } }, + { ngraph::element::f32, {}, { 0.01f } } + }, + { + { 256ul, {}, { 0.f }, { 2.55f }, { 0.f }, { 2.55f }, ngraph::element::u8 }, + { ngraph::element::f32, { 127.f }, { 0.01f } }, + { ngraph::element::f32, { 127.f }, { 0.01f } } + }, + { + { 256ul, {}, { 0.f }, { 2.55f }, { 0.f }, { 2.55f }, ngraph::element::u8 }, + { + ngraph::element::f32, + {{ 127.f }, ngraph::element::f32, {}, false, 1, ngraph::element::u8, true}, + {{ 0.1f }, ngraph::element::f32, {}, false, 1}, + }, + { + ngraph::element::f32, + {{ 127.f }, ngraph::element::f32, {}, false, 1, ngraph::element::u8, true}, + {{ 0.1f }, ngraph::element::f32, {}, false, 1}, + } + }, + { + // unexpected Subtract shape + { 256ul, {}, { 0.f }, { 2.55f }, { 0.f }, { 2.55f }, ngraph::element::u8 }, + { + ngraph::element::f32, + {std::vector(12ul, 127.0), ngraph::element::f32, {1, 3, 2, 2}, false, 0, ngraph::element::u8, true}, + {{ 0.1f }, ngraph::element::f32, {}, false, 1}, + }, + { + {}, + {}, + {{ 0.1f }, ngraph::element::f32, {}, false, 1}, + } + }, + { + { 256ul, {}, { 0.f }, { 2.55f }, { 0.f }, { 2.55f }, ngraph::element::u8 }, + { + ngraph::element::f32, + {{ 127.f }, ngraph::element::f32, {}, false, 0, ngraph::element::u8, true}, + {{ 0.1f }, ngraph::element::f32, {}, false, 0}, + }, + { + ngraph::element::f32, + {{ 127.f }, ngraph::element::f32, {}, false, 0, ngraph::element::u8, true}, + {{ 0.1f }, ngraph::element::f32, {}, false, 0}, + } + } +}; + +const std::vector shapes = { + { 1, 1, 2, 2 }, + // TODO: 3D tensor +}; + +INSTANTIATE_TEST_CASE_P( + smoke_LPT, + GetDequantizationTransformation, + ::testing::Combine( + ::testing::ValuesIn(precisions), + ::testing::ValuesIn(shapes), + ::testing::ValuesIn(testValues)), + GetDequantizationTransformation::getTestCaseName); diff --git a/inference-engine/tests/functional/inference_engine/lp_transformations/group_convolution_transformation.cpp b/inference-engine/tests/functional/inference_engine/lp_transformations/group_convolution_transformation.cpp index eaba214a633634..29cd58c9733806 100644 --- a/inference-engine/tests/functional/inference_engine/lp_transformations/group_convolution_transformation.cpp +++ b/inference-engine/tests/functional/inference_engine/lp_transformations/group_convolution_transformation.cpp @@ -1,4 +1,4 @@ -// Copyright (C) 2020 Intel Corporation +// Copyright (C) 2020-2021 Intel Corporation // SPDX-License-Identifier: Apache-2.0 // @@ -34,6 +34,7 @@ class GroupConvolutionTestValues { ngraph::builder::subgraph::DequantizationOperations dequantization; std::shared_ptr weights; builder::subgraph::FakeQuantizeOnWeights fakeQuantizeOnWeights; + ngraph::builder::subgraph::DequantizationOperations dequantizationOnWeights; }; class Expected { @@ -42,6 +43,7 @@ class GroupConvolutionTestValues { ngraph::builder::subgraph::DequantizationOperations dequantizationBefore; std::shared_ptr weights; builder::subgraph::FakeQuantizeOnWeights fakeQuantizeOnWeights; + ngraph::builder::subgraph::DequantizationOperations dequantizationOnWeights; ngraph::element::Type precisionAfterOperation; ngraph::builder::subgraph::DequantizationOperations dequantizationAfter; ngraph::element::Type precisionAfterDequantization; @@ -60,20 +62,24 @@ class GroupConvolutionTransformation : public LayerTransformation, public testin void SetUp() override { const GroupConvolutionTestValues testValues = GetParam(); - actualFunction = ngraph::builder::subgraph::GroupConvolutionFunction::getOriginal( + actualFunction = ngraph::builder::subgraph::GroupConvolutionFunction::get( testValues.actual.precisionBeforeDequantization, testValues.inputShape, testValues.outputShape, testValues.group, testValues.actual.dequantization, testValues.actual.weights, - testValues.actual.fakeQuantizeOnWeights); + testValues.actual.fakeQuantizeOnWeights, + testValues.actual.dequantizationOnWeights, + ngraph::element::f32, + {}, + ngraph::element::f32); SimpleLowPrecisionTransformer transform; transform.add(testValues.params); transform.transform(actualFunction); - referenceFunction = ngraph::builder::subgraph::GroupConvolutionFunction::getReference( + referenceFunction = ngraph::builder::subgraph::GroupConvolutionFunction::get( testValues.expected.precisionBeforeDequantization, testValues.inputShape, testValues.outputShape, @@ -81,6 +87,7 @@ class GroupConvolutionTransformation : public LayerTransformation, public testin testValues.expected.dequantizationBefore, testValues.expected.weights, testValues.expected.fakeQuantizeOnWeights, + testValues.expected.dequantizationOnWeights, testValues.expected.precisionAfterOperation, testValues.expected.dequantizationAfter, testValues.expected.precisionAfterDequantization); @@ -121,7 +128,8 @@ const std::vector testValues = { ngraph::element::u8, {{ngraph::element::f32}, { 128.f }, { 0.02f }}, op::Constant::create(ngraph::element::f32, ngraph::Shape{}, std::vector{ 2.f }), - { 255ul, Shape({ 1, 1, 1, 1 }), { 0.f }, { 254.f }, { -1.27f }, { 1.27f } } + { 255ul, Shape({ 1, 1, 1, 1 }), { 0.f }, { 254.f }, { -1.27f }, { 1.27f } }, + {} }, // ExpectedValues { @@ -129,6 +137,7 @@ const std::vector testValues = { {{}, { { 128.f }, ngraph::element::f32, { 1, 6, 1, 1 }, false }, {}}, op::Constant::create(ngraph::element::i8, ngraph::Shape{}, std::vector{ -125.f }), {}, + {}, ngraph::element::f32, {{}, {}, {{ 0.0002f }, ngraph::element::f32, { 24, 1, 1 }}} // 0.0002 = 0.02 (on data) * 0.01 (on weights) } @@ -144,7 +153,8 @@ const std::vector testValues = { ngraph::element::u8, {{ngraph::element::f32}, { 128.f }, { 0.02f }}, op::Constant::create(ngraph::element::f32, ngraph::Shape{}, std::vector{ 2.f }), - { 255ul, Shape({ 1, 1, 1, 1 }), { 0.f }, { 254.f }, { -1.27f }, { 1.27f } } + { 255ul, Shape({ 1, 1, 1, 1 }), { 0.f }, { 254.f }, { -1.27f }, { 1.27f } }, + {} }, // ExpectedValues { @@ -152,6 +162,7 @@ const std::vector testValues = { {{ ngraph::element::f32 }, { 128.f }, { 0.02f }}, op::Constant::create(ngraph::element::f32, ngraph::Shape{}, std::vector{ 2.f }), { 255ul, Shape({ 1, 1, 1, 1 }), { 0.f }, { 254.f }, { -1.27f }, { 1.27f } }, + {}, ngraph::element::f32, {} } @@ -167,7 +178,8 @@ const std::vector testValues = { ngraph::element::f32, {{}, { 128.f }, { 0.02f }}, op::Constant::create(ngraph::element::f32, ngraph::Shape{}, std::vector{ 2.f }), - { 255ul, Shape({ 1, 1, 1, 1 }), { 0.f }, { 254.f }, { -1.27f }, { 1.27f } } + { 255ul, Shape({ 1, 1, 1, 1 }), { 0.f }, { 254.f }, { -1.27f }, { 1.27f } }, + {} }, // ExpectedValues { @@ -175,6 +187,7 @@ const std::vector testValues = { {{}, { { 128.f }, ngraph::element::f32, { 1, 6, 1, 1 }, false }, {}}, op::Constant::create(ngraph::element::f32, ngraph::Shape{}, std::vector{ -125.f }), {}, + {}, ngraph::element::f32, {{}, {}, {{ 0.0002f }, ngraph::element::f32, { 24, 1, 1 }}} // 0.0002 = 0.02 (on data) * 0.01 (on weights) } @@ -194,7 +207,8 @@ const std::vector testValues = { {{ 0.02f, 0.02f, 0.04f, 0.04f, 0.08f, 0.08f }, ngraph::element::f32, {1, 6, 1, 1}} }, op::Constant::create(ngraph::element::f32, ngraph::Shape{}, std::vector{ 2.f }), - { 255ul, Shape({ 1, 1, 1, 1 }), { 0.f }, { 254.f }, { -1.27f }, { 1.27f } } + { 255ul, Shape({ 1, 1, 1, 1 }), { 0.f }, { 254.f }, { -1.27f }, { 1.27f } }, + {} }, // ExpectedValues { @@ -202,6 +216,7 @@ const std::vector testValues = { {}, op::Constant::create(ngraph::element::i8, ngraph::Shape{}, std::vector{ -125.f }), {}, + {}, ngraph::element::f32, { {}, @@ -235,7 +250,8 @@ const std::vector testValues = { {{ 0.02f }, ngraph::element::f32, {1, 6, 1, 1}} }, op::Constant::create(ngraph::element::f32, ngraph::Shape{}, std::vector{ 2.f }), - { 255ul, Shape({ 1, 1, 1, 1 }), { 0.f }, { 254.f }, { -1.27f }, { 1.27f } } + { 255ul, Shape({ 1, 1, 1, 1 }), { 0.f }, { 254.f }, { -1.27f }, { 1.27f } }, + {}, }, // ExpectedValues { @@ -243,6 +259,7 @@ const std::vector testValues = { {}, op::Constant::create(ngraph::element::i8, ngraph::Shape{}, std::vector{ -125.f }), {}, + {}, ngraph::element::f32, { {}, @@ -262,7 +279,8 @@ const std::vector testValues = { ngraph::element::f32, {{}, {}, { 0.02f }}, op::Constant::create(ngraph::element::f32, ngraph::Shape{}, std::vector{ 2.f }), - { 255ul, Shape({ 1, 1, 1, 1 }), { 0.f }, { 254.f }, { -1.27f }, { 1.27f } } + { 255ul, Shape({ 1, 1, 1, 1 }), { 0.f }, { 254.f }, { -1.27f }, { 1.27f } }, + {} }, // ExpectedValues { @@ -270,6 +288,7 @@ const std::vector testValues = { {{}, {}, { 0.02f }}, op::Constant::create(ngraph::element::f32, ngraph::Shape{}, std::vector{ 2.f }), { 255ul, Shape({ 1, 1, 1, 1 }), { 0.f }, { 254.f }, { -1.27f }, { 1.27f } }, + {}, ngraph::element::f32, {} } @@ -285,7 +304,8 @@ const std::vector testValues = { ngraph::element::u8, {{element::f32}, {}, { 0.02f }}, op::Constant::create(ngraph::element::f32, ngraph::Shape{}, std::vector{ 2.f }), - { 255ul, Shape({ 1, 1, 1, 1 }), { 0.f }, { 254.f }, { -1.27f }, { 1.27f } } + { 255ul, Shape({ 1, 1, 1, 1 }), { 0.f }, { 254.f }, { -1.27f }, { 1.27f } }, + {} }, // ExpectedValues { @@ -293,11 +313,12 @@ const std::vector testValues = { {}, op::Constant::create(ngraph::element::i8, ngraph::Shape{}, std::vector{ -125.f }), {}, + {}, ngraph::element::f32, {{}, {}, {{ 0.0002f }, ngraph::element::f32, { 24, 1, 1 }}} } }, - // depth-wise convolution, tensor quantization, with zero point + // depth-wise convolution, per-tensor quantization, with zero point { LayerTransformation::createParamsU8I8(), { 1, 6, 224, 224 }, @@ -308,7 +329,8 @@ const std::vector testValues = { ngraph::element::u8, {{ngraph::element::f32}, { 128.f }, { 0.02f }}, op::Constant::create(ngraph::element::f32, ngraph::Shape{}, std::vector{ 2.f }), - { 255ul, Shape({ 1, 1, 1, 1 }), { 0.f }, { 254.f }, { -1.27f }, { 1.27f } } + { 255ul, Shape({ 1, 1, 1, 1 }), { 0.f }, { 254.f }, { -1.27f }, { 1.27f } }, + {} }, // ExpectedValues { @@ -316,6 +338,7 @@ const std::vector testValues = { {{}, { { 128.f }, ngraph::element::f32, { 1, 6, 1, 1 }, false }, {}}, op::Constant::create(ngraph::element::i8, ngraph::Shape{}, std::vector{ -125.f }), {}, + {}, ngraph::element::f32, {{}, {}, {{ 0.0002f }, ngraph::element::f32, { 6, 1, 1 }}} } @@ -331,7 +354,8 @@ const std::vector testValues = { ngraph::element::f32, {{}, { 128.f }, { 0.02f }}, op::Constant::create(ngraph::element::f32, ngraph::Shape{}, std::vector{ 2.f }), - { 255ul, Shape({ 1, 1, 1, 1 }), { 0.f }, { 254.f }, { -1.27f }, { 1.27f } } + { 255ul, Shape({ 1, 1, 1, 1 }), { 0.f }, { 254.f }, { -1.27f }, { 1.27f } }, + {} }, // ExpectedValues { @@ -339,6 +363,7 @@ const std::vector testValues = { {{}, { { 128.f }, ngraph::element::f32, { 1, 6, 1, 1 }, false }, {}}, op::Constant::create(ngraph::element::f32, ngraph::Shape{}, std::vector{ -125.f }), {}, + {}, ngraph::element::f32, {{}, {}, {{ 0.0002f }, ngraph::element::f32, { 6, 1, 1 }}} } @@ -358,7 +383,8 @@ const std::vector testValues = { {{ 0.02f, 0.02f, 0.04f, 0.04f, 0.08f, 0.08f }, ngraph::element::f32, {1, 6, 1, 1}} }, op::Constant::create(ngraph::element::f32, ngraph::Shape{}, std::vector{ 2.f }), - { 255ul, Shape({ 1, 1, 1, 1 }), { 0.f }, { 254.f }, { -1.27f }, { 1.27f } } + { 255ul, Shape({ 1, 1, 1, 1 }), { 0.f }, { 254.f }, { -1.27f }, { 1.27f } }, + {} }, // ExpectedValues { @@ -366,6 +392,7 @@ const std::vector testValues = { {}, op::Constant::create(ngraph::element::i8, ngraph::Shape{}, std::vector{ -125.f }), {}, + {}, ngraph::element::f32, { {}, @@ -381,7 +408,7 @@ const std::vector testValues = { }, } }, - // depth-wise convolution, per-channel quantization with the same values, without zero point + // depth-wise convolution, per-tensor quantization with the same values, without zero point { LayerTransformation::createParamsU8I8(), { 1, 6, 224, 224 }, @@ -396,7 +423,8 @@ const std::vector testValues = { {{ 0.02f }, ngraph::element::f32, {1, 6, 1, 1}} }, op::Constant::create(ngraph::element::f32, ngraph::Shape{}, std::vector{ 2.f }), - { 255ul, Shape({ 1, 1, 1, 1 }), { 0.f }, { 254.f }, { -1.27f }, { 1.27f } } + { 255ul, Shape({ 1, 1, 1, 1 }), { 0.f }, { 254.f }, { -1.27f }, { 1.27f } }, + {} }, // ExpectedValues { @@ -404,6 +432,7 @@ const std::vector testValues = { {}, op::Constant::create(ngraph::element::i8, ngraph::Shape{}, std::vector{ -125.f }), {}, + {}, ngraph::element::f32, { {}, @@ -423,7 +452,8 @@ const std::vector testValues = { ngraph::element::f32, {{}, {}, { 0.02f }}, op::Constant::create(ngraph::element::f32, ngraph::Shape{}, std::vector{ 2.f }), - { 255ul, Shape({ 1, 1, 1, 1 }), { 0.f }, { 254.f }, { -1.27f }, { 1.27f } } + { 255ul, Shape({ 1, 1, 1, 1 }), { 0.f }, { 254.f }, { -1.27f }, { 1.27f } }, + {} }, // ExpectedValues { @@ -431,6 +461,7 @@ const std::vector testValues = { {{}, {}, { 0.02f }}, op::Constant::create(ngraph::element::f32, ngraph::Shape{}, std::vector{ 2.f }), { 255ul, Shape({ 1, 1, 1, 1 }), { 0.f }, { 254.f }, { -1.27f }, { 1.27f } }, + {}, ngraph::element::f32, {} } @@ -446,7 +477,8 @@ const std::vector testValues = { ngraph::element::u8, {{element::f32}, {}, { 0.02f }}, op::Constant::create(ngraph::element::f32, ngraph::Shape{}, std::vector{ 2.f }), - { 255ul, Shape({ 1, 1, 1, 1 }), { 0.f }, { 254.f }, { -1.27f }, { 1.27f } } + { 255ul, Shape({ 1, 1, 1, 1 }), { 0.f }, { 254.f }, { -1.27f }, { 1.27f } }, + {} }, // ExpectedValues { @@ -454,6 +486,7 @@ const std::vector testValues = { {}, op::Constant::create(ngraph::element::i8, ngraph::Shape{}, std::vector{ -125.f }), {}, + {}, ngraph::element::f32, {{}, {}, {{ 0.0002f }, ngraph::element::f32, { 6, 1, 1 }}} } @@ -469,7 +502,8 @@ const std::vector testValues = { ngraph::element::f32, {}, op::Constant::create(ngraph::element::f32, ngraph::Shape{}, std::vector{ 2.f }), - { 255ul, Shape({ 1, 1, 1, 1 }), { 0.f }, { 254.f }, { -1.27f }, { 1.27f } } + { 255ul, Shape({ 1, 1, 1, 1 }), { 0.f }, { 254.f }, { -1.27f }, { 1.27f } }, + {} }, // ExpectedValues { @@ -477,10 +511,111 @@ const std::vector testValues = { {}, op::Constant::create(ngraph::element::f32, ngraph::Shape{}, std::vector{ 2.f }), { 255ul, Shape({ 1, 1, 1, 1 }), { 0.f }, { 254.f }, { -1.27f }, { 1.27f } }, + {}, ngraph::element::f32, {} } }, + + // per-channel quantization with different values, without zero point + { + LayerTransformation::createParamsU8I8(), + { 1, 6, 224, 224 }, + { 1, 24, 218, 218 }, + 3ul, + // ActualValues + { + ngraph::element::u8, + { + {ngraph::element::f32}, + {}, + {{ 0.02f, 0.02f, 0.04f, 0.04f, 0.08f, 0.08f }, ngraph::element::f32, {1, 6, 1, 1}} + }, + op::Constant::create(ngraph::element::i8, ngraph::Shape{}, std::vector{ 2.f }), + {}, + { ngraph::element::f32, {}, {0.01}} + }, + // ExpectedValues + { + ngraph::element::u8, + {}, + op::Constant::create(ngraph::element::i8, ngraph::Shape{}, std::vector{ 2.f }), + {}, + {}, + ngraph::element::f32, + { + {}, + {}, + { + { + // 0.0002 = 0.02 (on data) * 0.01 (on weights) + 0.0002f, 0.0002f, 0.0002f, 0.0002f, 0.0002f, 0.0002f, 0.0002f, 0.0002f, + // 0.0004 = 0.04 (on data) * 0.01 (on weights) + 0.0004f, 0.0004f, 0.0004f, 0.0004f, 0.0004f, 0.0004f, 0.0004f, 0.0004f, + // 0.0008 = 0.08 (on data) * 0.01 (on weights) + 0.0008f, 0.0008f, 0.0008f, 0.0008f, 0.0008f, 0.0008f, 0.0008f, 0.0008f + }, + ngraph::element::f32, {24, 1, 1} + } + }, + } + }, + + // per-channel quantization with different values, without zero point + { + LayerTransformation::createParamsU8I8(), + { 1, 6, 224, 224 }, + { 1, 24, 218, 218 }, + 3ul, + // ActualValues + { + ngraph::element::u8, + { + {ngraph::element::f32}, + {{255}, ngraph::element::f32, {}, true, 1, ngraph::element::u8, true}, + {{ 0.02f, 0.02f, 0.04f, 0.04f, 0.08f, 0.08f }, ngraph::element::f32, {1, 6, 1, 1}} + }, + op::Constant::create(ngraph::element::i8, ngraph::Shape{}, std::vector{ 2.f }), + {}, + { + ngraph::element::f32, + {{127}, ngraph::element::f32, {}, true, 1, ngraph::element::i8, true}, + {0.01} + } + }, + // ExpectedValues + { + ngraph::element::u8, + { + {}, + {std::vector(6ul, 255.f), ngraph::element::f32, {1, 6, 1, 1}, false, 1, ngraph::element::u8}, + {} + }, + op::Constant::create(ngraph::element::i8, ngraph::Shape{}, std::vector{ 2.f }), + {}, + { + {}, + {std::vector(24ul, 127.f), ngraph::element::f32, {24, 1, 1, 1}, false, 1, ngraph::element::i8, false, {"DISABLED_CONSTANT_FOLDING"}}, + {} + }, + ngraph::element::f32, + { + {}, + {}, + { + { + // 0.0002 = 0.02 (on data) * 0.01 (on weights) + 0.0002f, 0.0002f, 0.0002f, 0.0002f, 0.0002f, 0.0002f, 0.0002f, 0.0002f, + // 0.0004 = 0.04 (on data) * 0.01 (on weights) + 0.0004f, 0.0004f, 0.0004f, 0.0004f, 0.0004f, 0.0004f, 0.0004f, 0.0004f, + // 0.0008 = 0.08 (on data) * 0.01 (on weights) + 0.0008f, 0.0008f, 0.0008f, 0.0008f, 0.0008f, 0.0008f, 0.0008f, 0.0008f + }, + ngraph::element::f32, {24, 1, 1} + } + }, + } + } }; INSTANTIATE_TEST_CASE_P( diff --git a/inference-engine/tests/functional/inference_engine/lp_transformations/is_function_quantized_transformation.cpp b/inference-engine/tests/functional/inference_engine/lp_transformations/is_function_quantized_transformation.cpp new file mode 100644 index 00000000000000..76582309ea8afd --- /dev/null +++ b/inference-engine/tests/functional/inference_engine/lp_transformations/is_function_quantized_transformation.cpp @@ -0,0 +1,116 @@ +// Copyright (C) 2021 Intel Corporation +// SPDX-License-Identifier: Apache-2.0 +// + +#include "layer_transformation.hpp" + +#include +#include +#include + +#include +#include "lpt_ngraph_functions/common/builders.hpp" + +using namespace testing; +using namespace ngraph; +using namespace ngraph::pass; + +class IsFunctionQuantizedTransformationValues { +public: + ngraph::Shape shape; + ngraph::element::Type precision; + builder::subgraph::FakeQuantizeOnDataWithConstant fakeQuantize; + bool constantSubgraphOnParameters; + bool inputOnParameters; + + bool isQuantized; +}; + +class IsFunctionQuantizedTransformation : public LayerTransformation, public testing::WithParamInterface { +public: + void SetUp() override { + const auto testValues = GetParam(); + + const auto input = std::make_shared(testValues.precision, ngraph::Shape(testValues.shape)); + const auto fakeQuantize = ngraph::builder::subgraph::makeFakeQuantize( + input, + testValues.precision, + testValues.fakeQuantize, + testValues.constantSubgraphOnParameters); + + if (testValues.inputOnParameters) { + replace_node(fakeQuantize->get_input_node_shared_ptr(3), input); + } + + ngraph::ResultVector results{ std::make_shared(fakeQuantize) }; + function = std::make_shared(results, ngraph::ParameterVector{ input }, "IsFunctionQuantizedFunction"); + function->validate_nodes_and_infer_types(); + } + + static std::string getTestCaseName(testing::TestParamInfo obj) { + IsFunctionQuantizedTransformationValues testValues = obj.param; + + std::ostringstream result; + result << + testValues.shape << "_" << + testValues.precision << "_" << + testValues.fakeQuantize << + testValues.constantSubgraphOnParameters << "_" << + testValues.inputOnParameters << "_" << + testValues.isQuantized; + return result.str(); + } + +protected: + std::shared_ptr function; +}; + +TEST_P(IsFunctionQuantizedTransformation, Run) { + const bool isQuantized = ngraph::pass::low_precision::LowPrecisionTransformer::isFunctionQuantized(function); + + const auto testValues = GetParam(); + ASSERT_EQ(testValues.isQuantized, isQuantized); +} + +const std::vector shapes = { ngraph::Shape({ 1, 3, 72, 48 }) }; + +const std::vector testValues = { + { + ngraph::Shape{1, 3, 9, 9}, + ngraph::element::f32, + { 255ul, {{ 1, 1, 1, 1 }}, { -1.28f }, { 1.27f }, { -128.f }, { 127.f }, element::i8 }, + false, + false, + true + }, + { + ngraph::Shape{1, 3, 9, 9}, + ngraph::element::f32, + { 255ul, {{ 1, 1, 1, 1 }}, { -1.28f }, { 1.27f }, { -128.f }, { 127.f }, element::i8 }, + true, + false, + false + }, + { + ngraph::Shape{1, 3, 9, 9}, + ngraph::element::f32, + { 255ul, {{ 1, 1, 1, 1 }}, { -1.28f }, { 1.27f }, { -128.f }, { 127.f }, element::i8 }, + false, + true, + false + }, + { + ngraph::Shape{1, 3, 9, 9}, + ngraph::element::f32, + { 255ul, {{ 1, 1, 1, 1 }}, { -1.28f }, { 1.27f }, { -128.f }, { 127.f }, element::i8 }, + true, + true, + false + } +}; + +INSTANTIATE_TEST_CASE_P( + smoke_LPT, + IsFunctionQuantizedTransformation, + ::testing::ValuesIn(testValues), + IsFunctionQuantizedTransformation::getTestCaseName); diff --git a/inference-engine/tests/functional/inference_engine/lp_transformations/layer_transformation.cpp b/inference-engine/tests/functional/inference_engine/lp_transformations/layer_transformation.cpp index 572cce8a1aef7e..edd4649e265e49 100644 --- a/inference-engine/tests/functional/inference_engine/lp_transformations/layer_transformation.cpp +++ b/inference-engine/tests/functional/inference_engine/lp_transformations/layer_transformation.cpp @@ -5,6 +5,7 @@ #include "layer_transformation.hpp" #include +#include #include "simple_low_precision_transformer.hpp" using namespace testing; @@ -76,3 +77,54 @@ std::string LayerTransformation::getTestCaseNameByParams( result << type << "_" << shape << "_" << toString(params); return result.str(); } + +ngraph::builder::subgraph::DequantizationOperations LayerTransformation::toDequantizationOperations( + const ngraph::pass::low_precision::FakeQuantizeDequantization& dequantization) { + const auto convert = dequantization.convert != nullptr ? + ngraph::builder::subgraph::DequantizationOperations::Convert(dequantization.convert->output(0).get_element_type()) : + ngraph::builder::subgraph::DequantizationOperations::Convert(); + + ngraph::builder::subgraph::DequantizationOperations::Subtract subtract; + { + const bool addDequantizationAttribute = dequantization.subtract != nullptr ? + dequantization.subtract->get_rt_info().count("DEQUANTIZATION") != 0 : + true; + + const size_t constantIndex = dequantization.subtractConstant && dequantization.subtract ? + ngraph::pass::low_precision::NetworkHelper::getChildInputIndex( + dequantization.subtractConvert ? std::dynamic_pointer_cast(dequantization.subtractConvert) : dequantization.subtractConstant, + dequantization.subtract) : + 0ul; + + subtract = dequantization.subtractConstant != nullptr ? + ngraph::builder::subgraph::DequantizationOperations::Subtract( + dequantization.subtractConstant->cast_vector(), + dequantization.subtractConstant->output(0).get_element_type(), + dequantization.subtractConstant->output(0).get_shape(), + addDequantizationAttribute, + constantIndex) : + ngraph::builder::subgraph::DequantizationOperations::Subtract(); + } + + ngraph::builder::subgraph::DequantizationOperations::Multiply multiply; + { + const bool addDequantizationAttribute = dequantization.multiply != nullptr ? + dequantization.multiply->get_rt_info().count("DEQUANTIZATION") != 0 : + true; + + const size_t constantIndex = dequantization.multiplyConstant && dequantization.multiply ? + ngraph::pass::low_precision::NetworkHelper::getChildInputIndex(dequantization.multiplyConstant, dequantization.multiply) : + 0ul; + + multiply = dequantization.multiplyConstant != nullptr ? + ngraph::builder::subgraph::DequantizationOperations::Multiply( + dequantization.multiplyConstant->cast_vector(), + dequantization.multiplyConstant->output(0).get_element_type(), + dequantization.multiplyConstant->output(0).get_shape(), + addDequantizationAttribute, + constantIndex) : + ngraph::builder::subgraph::DequantizationOperations::Multiply(); + } + + return ngraph::builder::subgraph::DequantizationOperations(convert, subtract, multiply); +} diff --git a/inference-engine/tests/functional/inference_engine/lp_transformations/layer_transformation.hpp b/inference-engine/tests/functional/inference_engine/lp_transformations/layer_transformation.hpp index 63d936377df821..743ec4091a71df 100644 --- a/inference-engine/tests/functional/inference_engine/lp_transformations/layer_transformation.hpp +++ b/inference-engine/tests/functional/inference_engine/lp_transformations/layer_transformation.hpp @@ -1,4 +1,4 @@ -// Copyright (C) 2020 Intel Corporation +// Copyright (C) 2020-2021 Intel Corporation // SPDX-License-Identifier: Apache-2.0 // @@ -8,6 +8,7 @@ #include "low_precision/layer_transformation.hpp" #include "low_precision/transformation_context.hpp" #include "low_precision/transformer.hpp" +#include "lpt_ngraph_functions/common/dequantization_operations.hpp" typedef std::tuple< ngraph::element::Type, @@ -28,6 +29,9 @@ class LayerTransformation : public CommonTestUtils::TestsCommon { const ngraph::Shape& shape, const ngraph::pass::low_precision::LayerTransformation::Params& params); + static ngraph::builder::subgraph::DequantizationOperations toDequantizationOperations( + const ngraph::pass::low_precision::FakeQuantizeDequantization& dequantization); + protected: void transform(std::shared_ptr function); void transform( diff --git a/inference-engine/tests/functional/inference_engine/lp_transformations/mat_mul_transformation.cpp b/inference-engine/tests/functional/inference_engine/lp_transformations/mat_mul_transformation.cpp index fa6cca4283f7b0..0592ad6731cc00 100644 --- a/inference-engine/tests/functional/inference_engine/lp_transformations/mat_mul_transformation.cpp +++ b/inference-engine/tests/functional/inference_engine/lp_transformations/mat_mul_transformation.cpp @@ -1,4 +1,4 @@ -// Copyright (C) 2020 Intel Corporation +// Copyright (C) 2020-2021 Intel Corporation // SPDX-License-Identifier: Apache-2.0 // @@ -171,7 +171,7 @@ std::vector testValues = { // }, // U8 + I8 { - LayerTransformation::createParamsU8U8().setSupportAsymmetricQuantization(true), + LayerTransformation::createParamsU8U8().setSupportAsymmetricQuantization(false), { ngraph::element::u8, { ngraph::element::f32, { 127.f }, { 0.02f } }, @@ -180,17 +180,17 @@ std::vector testValues = { }, { ngraph::element::u8, - { {}, {{127.f}, ngraph::element::f32, ngraph::Shape{ }, false}, {} }, + { ngraph::element::f32, { 127.f }, { 0.02f } }, ngraph::element::i8, - { }, + { ngraph::element::f32, {}, { 0.03f } }, ngraph::element::f32, ngraph::element::f32, - { {}, {}, { 0.0006f } }, + { {}, {}, {} }, } }, // I8 + I8 { - LayerTransformation::createParamsU8U8().setSupportAsymmetricQuantization(true), + LayerTransformation::createParamsU8U8().setSupportAsymmetricQuantization(false), { ngraph::element::i8, { ngraph::element::f32, { 127.f }, { 0.02f } }, @@ -199,17 +199,17 @@ std::vector testValues = { }, { ngraph::element::i8, - { {}, {{127.f}, ngraph::element::f32, ngraph::Shape{ }, false}, {} }, + { ngraph::element::f32, { 127.f }, { 0.02f } }, ngraph::element::i8, - { }, + { ngraph::element::f32, {}, { 0.03f } }, ngraph::element::f32, ngraph::element::f32, - { {}, {}, { 0.0006f } }, + { {}, {}, {} }, } }, // U8 + I8, Subtract with not int { - LayerTransformation::createParamsU8U8().setSupportAsymmetricQuantization(true), + LayerTransformation::createParamsU8U8().setSupportAsymmetricQuantization(false), { ngraph::element::u8, { ngraph::element::f32, { 127.5f }, { 0.02f } }, @@ -218,17 +218,17 @@ std::vector testValues = { }, { ngraph::element::u8, - { {}, {{128.f}, ngraph::element::f32, ngraph::Shape{ }, false}, {} }, + { ngraph::element::f32, { 127.5f }, { 0.02f } }, ngraph::element::i8, - { }, + { ngraph::element::f32, {}, { 0.03f } }, ngraph::element::f32, ngraph::element::f32, - { {}, {}, { 0.0006f } }, + { {}, {}, {} }, } }, // U8 + FP32 { - LayerTransformation::createParamsU8U8().setSupportAsymmetricQuantization(true), + LayerTransformation::createParamsU8U8().setSupportAsymmetricQuantization(false), { ngraph::element::u8, { ngraph::element::f32, { 127.f }, { 0.02f } }, @@ -247,7 +247,7 @@ std::vector testValues = { }, // FP32 + I8 { - LayerTransformation::createParamsU8U8().setSupportAsymmetricQuantization(true), + LayerTransformation::createParamsU8U8().setSupportAsymmetricQuantization(false), { ngraph::element::f32, { {}, { 127.f }, { 0.02f } }, @@ -264,24 +264,6 @@ std::vector testValues = { { }, } }, - { - LayerTransformation::createParamsU8U8().setSupportAsymmetricQuantization(false), - { - ngraph::element::u8, - { ngraph::element::f32, { 127.f }, { 0.02f } }, - ngraph::element::i8, - { ngraph::element::f32, {}, { 0.03f } }, - }, - { - ngraph::element::u8, - { ngraph::element::f32, { 127.f }, { 0.02f } }, - ngraph::element::i8, - { ngraph::element::f32, {}, { 0.03f } }, - ngraph::element::f32, - ngraph::element::f32, - { }, - } - }, { LayerTransformation::createParamsU8U8().setSupportAsymmetricQuantization(false), { diff --git a/inference-engine/tests/functional/inference_engine/lp_transformations/mat_mul_with_constant_transformation.cpp b/inference-engine/tests/functional/inference_engine/lp_transformations/mat_mul_with_constant_transformation.cpp index f8d4a6a6c59843..a8c709ade90812 100644 --- a/inference-engine/tests/functional/inference_engine/lp_transformations/mat_mul_with_constant_transformation.cpp +++ b/inference-engine/tests/functional/inference_engine/lp_transformations/mat_mul_with_constant_transformation.cpp @@ -177,7 +177,7 @@ std::vector testValues = { } }, - // not supported 3D: U8 & I8 + // 3D: U8 & I8 { LayerTransformation::createParamsU8I8(), { @@ -191,17 +191,72 @@ std::vector testValues = { { { 1, 3, 4 }, ngraph::element::u8, - { ngraph::element::f32, {}, { {0.01f, 0.02f, 0.03f} } }, + { {}, {}, {} }, ngraph::element::i8, {4, 4}, + std::vector(4 * 4, -126.f), + ngraph::element::f32, + { {}, {}, { {0.001f, 0.002f, 0.003f} } }, + {}, + } + }, + + // 3D: U8 & I8 + { + LayerTransformation::createParamsU8I8(), + { + { 1, 3, 4 }, + ngraph::element::u8, + { ngraph::element::f32, {}, { 0.02f } }, + { 4, 4 }, std::vector(4 * 4, 1.f), + { + 255, + { 1, 4 }, + {0.f, 0.f, 0.f, 0.f}, + {254.f, 254.f, 254.f, 254.f}, + {-127.f, -12.7f, -1.27f , -0.127f}, + {127.f, 12.7f, 1.27f , 0.127f}, + }, + }, + { + { 1, 3, 4 }, + ngraph::element::u8, + { {}, {}, {} }, + ngraph::element::i8, + { 4, 4 }, + std::vector(4 * 4, -126.f), ngraph::element::f32, + { {}, {}, {{ 0.02f, 0.002f, 0.0002f, 0.00002f }, ngraph::element::f32, ngraph::Shape{ 1, 1, 4 }}}, {}, + } + }, + + // 3D: U8 & I8: dequantization by columns in first input: can't be transformed + { + LayerTransformation::createParamsU8I8(), + { + { 1, 3, 4 }, + ngraph::element::u8, + { ngraph::element::f32, {}, { {0.01f, 0.02f, 0.03f, 0.01f}, ngraph::element::f32, ngraph::Shape{1, 1, 4} } }, + { 4, 4 }, + std::vector(4 * 4, 1.f), + { 255, { 1, 1 }, {0.f}, {254.f}, {-12.7f}, {12.7} }, + }, + { + { 1, 3, 4 }, + ngraph::element::u8, + { ngraph::element::f32, {}, { {0.01f, 0.02f, 0.03f, 0.01f}, ngraph::element::f32, ngraph::Shape{1, 1, 4} } }, + ngraph::element::f32, + { 4, 4 }, + std::vector(4 * 4, 1.f), + ngraph::element::f32, + {{}, {}, {}}, { 255, { 1, 1 }, {0.f}, {254.f}, {-12.7f}, {12.7} }, } }, - // not supported 3D: U8 & I8 + // U8 & I8: dequantization by rows in second input: can't be transformed { LayerTransformation::createParamsU8I8(), { @@ -215,28 +270,28 @@ std::vector testValues = { { 4, 1 }, {0.f, 0.f, 0.f, 0.f}, {254.f, 254.f, 254.f, 254.f}, - {-12.7f / 4.f, -12.7f / 3.f, -12.7f / 2.f, -12.7f}, - {12.7f / 4.f, 12.7f / 3.f, 12.7f / 2.f, 12.7f} + {-127.f, -12.7f, -1.27f , -0.127f}, + {127.f, 12.7f, 1.27f , 0.127f}, }, }, { { 1, 3, 4 }, ngraph::element::u8, { ngraph::element::f32, {}, { 0.02f } }, - ngraph::element::i8, - {4, 4}, + ngraph::element::f32, + { 4, 4 }, std::vector(4 * 4, 1.f), ngraph::element::f32, - {}, + { {}, {}, { 0.02f * 0.1f } }, { 255, { 4, 1 }, {0.f, 0.f, 0.f, 0.f}, {254.f, 254.f, 254.f, 254.f}, - {-12.7f / 4.f, -12.7f / 3.f, -12.7f / 2.f, -12.7f}, - {12.7f / 4.f, 12.7f / 3.f, 12.7f / 2.f, 12.7f} + {-127.f, -12.7f, -1.27f , -0.127f}, + {127.f, 12.7f, 1.27f , 0.127f}, }, - } + }, }, // 2D: U8 & I8 diff --git a/inference-engine/tests/functional/inference_engine/lp_transformations/multiply_to_group_convolution_transformation.cpp b/inference-engine/tests/functional/inference_engine/lp_transformations/multiply_to_group_convolution_transformation.cpp index f51206b10425cc..f2a2d025e9562a 100644 --- a/inference-engine/tests/functional/inference_engine/lp_transformations/multiply_to_group_convolution_transformation.cpp +++ b/inference-engine/tests/functional/inference_engine/lp_transformations/multiply_to_group_convolution_transformation.cpp @@ -1,4 +1,4 @@ -// Copyright (C) 2020 Intel Corporation +// Copyright (C) 2020-2021 Intel Corporation // SPDX-License-Identifier: Apache-2.0 // @@ -157,6 +157,31 @@ const std::vector testValues } } }, + // subtract + multiply + { + ngraph::Shape{ 1, 4, 1, 1 }, + LayerTransformation::createParamsU8I8(), + true, + false, + { + ngraph::element::u8, + { + {ngraph::element::f32}, + {{1.f, 2.f, 3.f, 4.f}, ngraph::element::f32, {1, 4, 1, 1}, true, 1, ngraph::element::u8, true}, + {{0.45f, 0.82f, 0.71f, 0.37f}} + } + }, + { + ngraph::element::u8, + std::make_shared(ngraph::element::i8, ngraph::Shape{4, 1, 1, 1, 1}, std::vector{1.f, 1.f, 1.f, 1.f}), + std::make_shared(ngraph::element::f32, ngraph::Shape{1, 4, 1, 1}, std::vector{-1.f, -2.f, -3.f, -4.f}), + { + {}, + {}, + {{0.45f, 0.82f, 0.71f, 0.37f}} + } + } + }, // without convert { ngraph::Shape{ 1, 4, 1, 1 }, diff --git a/inference-engine/tests/functional/inference_engine/lp_transformations/multiply_transformation.cpp b/inference-engine/tests/functional/inference_engine/lp_transformations/multiply_transformation.cpp index 73a9bb09d12341..da4079c78c8f61 100644 --- a/inference-engine/tests/functional/inference_engine/lp_transformations/multiply_transformation.cpp +++ b/inference-engine/tests/functional/inference_engine/lp_transformations/multiply_transformation.cpp @@ -1,4 +1,4 @@ -// Copyright (C) 2020 Intel Corporation +// Copyright (C) 2020-2021 Intel Corporation // SPDX-License-Identifier: Apache-2.0 // @@ -273,6 +273,36 @@ const std::vector multiplyTransformationTestVa } }, + // Actual: + // + // Parameter + // |I8 + // | + // Convert Constant Parameter + // \FP32 /FP32 |I8 + // \ / | + // Subtract Constant Convert Constant + // \FP32 /FP32 \FP32 /FP32 + // \ / \ / + // Multiply Multiply + // \FP32 /FP32 + // \ / + // Multiply + // Transformed: + // + // Parameter + // |I8 + // | + // Convert Constant + // \FP32 /FP32 + // \ / + // Subtract Constant + // \FP32 /FP32 + // \ / + // Multiply Parameter + // \FP32 /I8 + // \ / + // Multiply { LayerTransformation::createParamsI8I8(), { @@ -307,6 +337,74 @@ const std::vector multiplyTransformationTestVa } }, + // Actual: + // + // Parameter Constant + // |I8 |I8 + // | | + // Convert Convert Parameter + // \FP32 /FP32 |I8 + // \ / | + // Subtract Constant Convert Constant + // \FP32 /FP32 \FP32 /FP32 + // \ / \ / + // Multiply Multiply + // \FP32 /FP32 + // \ / + // Multiply + // Transformed: + // + // Parameter + // |I8 + // | + // Convert Constant + // \FP32 /FP32 + // \ / + // Subtract Constant + // \FP32 /FP32 + // \ / + // Multiply Parameter + // \FP32 /I8 + // \ / + // Multiply + { + LayerTransformation::createParamsI8I8(), + { + { + { 1, 3, 8, 16 }, + {}, + ngraph::element::i8, + { + ngraph::element::f32, + { {2.f}, ngraph::element::f32, {}, true, 1ul, ngraph::element::i8, true }, + { 10.f } + } + }, + { + { 1, 3, 8, 16 }, + {}, + ngraph::element::i8, + {ngraph::element::f32, { }, { 7.f }} + }, + false + }, + { + { + { 1, 3, 8, 16 }, + {}, + ngraph::element::i8, + {ngraph::element::f32, { 2.f }, { 70.f }}, + }, + { + { 1, 3, 8, 16 }, + {}, + ngraph::element::i8, + {} + }, + false + } + }, + { LayerTransformation::createParamsI8I8(), { @@ -442,7 +540,190 @@ const std::vector multiplyTransformationTestVa }, true } - } + }, + + // Constant as input + { + LayerTransformation::createParamsU8I8(), + { + { + { 1, 3, 8, 16 }, + {}, + ngraph::element::i8, + {ngraph::element::f32, { }, { 0.2f }}, + }, + { + {}, + {{ 7.f }, ngraph::element::i8}, // Constant as input + ngraph::element::i8, + {ngraph::element::f32, { }, { 0.5f }}, + }, + false + }, + { + { + { 1, 3, 8, 16 }, + {}, + ngraph::element::i8, + {ngraph::element::f32, {}, {}}, + }, + { + {}, + {{ 0.7f }, ngraph::element::f32}, + ngraph::element::f32, + {} + }, + true + } + }, + + // Actual: + // + // Parameter Constant Constant Constant + // |I8 |I8 |I8 |I8 + // | | | | + // Convert Convert Convert Convert + // \FP32 /FP32 |I8 /FP32 + // \ / | / + // Subtract Constant Subtract Constant + // \FP32 /FP32 \FP32 /FP32 + // \ / \ / + // Multiply Multiply + // \FP32 /FP32 + // \ / + // Multiply + // Transformed: + // + // Parameter Constant + // |I8 |I8 + // | | + // Convert Convert + // \FP32 /FP32 + // \ / + // Subtract Constant + // \FP32 /FP32 + // \ / + // Multiply + // + { + LayerTransformation::createParamsU8I8(), + { + { + { 1, 3, 8, 16 }, + {}, + ngraph::element::i8, + { + ngraph::element::f32, + { {127.f}, ngraph::element::f32, {}, false, 1, ngraph::element::i8, true }, + { 0.2f } + }, + }, + { + {}, + {{ 7.f }, ngraph::element::i8}, // Constant as input + ngraph::element::i8, + { + ngraph::element::f32, + { {127.f}, ngraph::element::f32, {}, false, 1, ngraph::element::i8, true }, + { 0.5f } + }, + }, + false + }, + { + { + { 1, 3, 8, 16 }, + {}, + ngraph::element::i8, + { + ngraph::element::f32, + { {127.f}, ngraph::element::f32, {}, false, 1, ngraph::element::i8, true }, + {} + }, + }, + { + {}, + {{ -12.f }, ngraph::element::f32}, + ngraph::element::f32, + {} + }, + true + } + }, + + // Actual: + // + // Constant Constant Parameter Constant + // |I8 |I8 |I8 |I8 + // | | | | + // Convert Convert Convert Convert + // \FP32 /FP32 |I8 /FP32 + // \ / | / + // Subtract Constant Subtract Constant + // \FP32 /FP32 \FP32 /FP32 + // \ / \ / + // Multiply Multiply + // \FP32 /FP32 + // \ / + // Multiply + // Transformed: + // + // Parameter Constant + // |I8 |I8 + // | | + // Convert Convert + // \FP32 /FP32 + // \ / + // Subtract Constant + // \FP32 /FP32 + // \ / + // Multiply + // + { + LayerTransformation::createParamsU8I8(), + { + { + {}, + {{ 7.f }, ngraph::element::i8}, // Constant as input + ngraph::element::i8, + { + ngraph::element::f32, + { {127.f}, ngraph::element::f32, {}, false, 1, ngraph::element::i8, true }, + { 0.5f } + }, + }, + { + { 1, 3, 8, 16 }, + {}, + ngraph::element::i8, + { + ngraph::element::f32, + { {127.f}, ngraph::element::f32, {}, false, 1, ngraph::element::i8, true }, + { 0.2f } + }, + }, + false + }, + { + { + { 1, 3, 8, 16 }, + {}, + ngraph::element::i8, + { + ngraph::element::f32, + { {127.f}, ngraph::element::f32, {}, false, 1, ngraph::element::i8, true }, + {} + }, + }, + { + {}, + {{ -12.f }, ngraph::element::f32}, + ngraph::element::f32, + {} + }, + true + } + }, }; INSTANTIATE_TEST_CASE_P( diff --git a/inference-engine/tests/functional/inference_engine/lp_transformations/separate_in_standalone_branch_transformation.cpp b/inference-engine/tests/functional/inference_engine/lp_transformations/separate_in_standalone_branch_transformation.cpp new file mode 100644 index 00000000000000..4d48966c46f318 --- /dev/null +++ b/inference-engine/tests/functional/inference_engine/lp_transformations/separate_in_standalone_branch_transformation.cpp @@ -0,0 +1,165 @@ +// Copyright (C) 2021 Intel Corporation +// SPDX-License-Identifier: Apache-2.0 +// + +#include "layer_transformation.hpp" + +#include +#include +#include + +#include + +#include +#include +#include +#include + +#include "common_test_utils/ngraph_test_utils.hpp" +#include "lpt_ngraph_functions/common/builders.hpp" +#include "lpt_ngraph_functions/mat_mul_function.hpp" +#include "lpt_ngraph_functions/common/dequantization_operations.hpp" + +#include "ngraph_functions/subgraph_builders.hpp" +#include "simple_low_precision_transformer.hpp" +#include "lpt_ngraph_functions/common/dequantization_operations.hpp" + +namespace { + +using namespace testing; +using namespace ngraph::pass; + +class SeparateInStandaloneBranchTransformationTestValues { +public: + ngraph::pass::low_precision::LayerTransformation::Params params; + ngraph::element::Type precisionBefore; + ngraph::builder::subgraph::DequantizationOperations dequantization; +}; + +inline std::ostream& operator << (std::ostream& out, const SeparateInStandaloneBranchTransformationTestValues& testValues) { + return out << "_" << testValues.dequantization; +} + +typedef std::tuple< + ngraph::Shape, + SeparateInStandaloneBranchTransformationTestValues> SeparateInStandaloneBranchTransformationParams; + +class SeparateInStandaloneBranchTransformation : + public LayerTransformation, + public testing::WithParamInterface { +public: + void SetUp() override { + const ngraph::Shape shape = std::get<0>(GetParam()); + const SeparateInStandaloneBranchTransformationTestValues testValues = std::get<1>(GetParam()); + + const auto createActualFunction = []( + const ngraph::element::Type precision, + const ngraph::Shape& inputShape, + const ngraph::builder::subgraph::DequantizationOperations& dequantizations) -> std::shared_ptr { + const std::shared_ptr input = std::make_shared(precision, inputShape); + const auto relu = std::make_shared(input); + const auto dequantizationsNode = ngraph::builder::subgraph::makeDequantization(relu, dequantizations); + + const std::shared_ptr reshape1 = std::make_shared( + dequantizationsNode, + std::make_shared(ngraph::element::i32, ngraph::Shape{ 2 }, std::vector({0, -1})), + true); + reshape1->set_friendly_name("reshape1"); + + const std::shared_ptr reshape2 = std::make_shared( + dequantizationsNode, + std::make_shared(ngraph::element::i32, ngraph::Shape{ 2 }, std::vector({0, -1})), + true); + reshape2->set_friendly_name("reshape2"); + + return std::make_shared( + ngraph::ResultVector{ + std::make_shared(reshape1), + std::make_shared(reshape2) + }, + std::vector> { input }, + "SeparateInStandaloneBranchTransformation"); + }; + actualFunction = createActualFunction(testValues.precisionBefore, shape, testValues.dequantization); + + const auto result = actualFunction->get_results()[0]; + ngraph::pass::low_precision::NetworkHelper::separateInStandaloneBranch(result->get_input_node_shared_ptr(0)); + + const auto createReferenceFunction = []( + const ngraph::element::Type precision, + const ngraph::Shape& inputShape, + const ngraph::builder::subgraph::DequantizationOperations& dequantization) -> std::shared_ptr { + const std::shared_ptr input = std::make_shared(precision, inputShape); + const auto relu = std::make_shared(input); + + const std::shared_ptr reshape1 = std::make_shared( + ngraph::builder::subgraph::makeDequantization(relu, dequantization), + std::make_shared(ngraph::element::i32, ngraph::Shape{ 2 }, std::vector({0, -1})), + true); + reshape1->set_friendly_name("reshape1"); + + const std::shared_ptr reshape2 = std::make_shared( + ngraph::builder::subgraph::makeDequantization(relu, dequantization), + std::make_shared(ngraph::element::i32, ngraph::Shape{ 2 }, std::vector({0, -1})), + true); + reshape2->set_friendly_name("reshape2"); + + return std::make_shared( + ngraph::ResultVector{ + std::make_shared(reshape1), + std::make_shared(reshape2) + }, + std::vector> { input }, + "SeparateInStandaloneBranchTransformation"); + }; + referenceFunction = createReferenceFunction(testValues.precisionBefore, shape, testValues.dequantization); + } + + static std::string getTestCaseName(testing::TestParamInfo obj) { + ngraph::Shape shapes; + SeparateInStandaloneBranchTransformationTestValues testValues; + std::tie(shapes, testValues) = obj.param; + + std::stringstream ss; + ss << shapes << "_" << "_" << testValues; + return ss.str(); + } +}; + +TEST_P(SeparateInStandaloneBranchTransformation, CompareFunctions) { + actualFunction->validate_nodes_and_infer_types(); + auto res = compare_functions(referenceFunction, actualFunction, true, true, true); + ASSERT_TRUE(res.first) << res.second; +} + +const std::vector shapes = { + { 1, 3, 9, 9 }, + { 4, 3, 9, 9 } +}; + +std::vector testValues = { + { + LayerTransformation::createParamsU8U8().setSupportAsymmetricQuantization(true), + ngraph::element::u8, + { ngraph::element::f32, { 127.f }, { 0.02f } } + }, + { + LayerTransformation::createParamsU8U8().setSupportAsymmetricQuantization(true), + ngraph::element::u8, + { + ngraph::element::f32, + { {127.f}, ngraph::element::f32, {}, true, 1ul, ngraph::element::u8, true}, + { 0.02f } + } + } +}; + +INSTANTIATE_TEST_CASE_P( + smoke_LPT, + SeparateInStandaloneBranchTransformation, + ::testing::Combine( + ::testing::ValuesIn(shapes), + ::testing::ValuesIn(testValues)), + SeparateInStandaloneBranchTransformation::getTestCaseName); + +} // namespace diff --git a/inference-engine/tests/functional/inference_engine/lp_transformations/transformer_is_function_quantized.cpp b/inference-engine/tests/functional/inference_engine/lp_transformations/transformer_is_function_quantized.cpp index 768ed9eb7f1753..f9bd7b69aa3340 100644 --- a/inference-engine/tests/functional/inference_engine/lp_transformations/transformer_is_function_quantized.cpp +++ b/inference-engine/tests/functional/inference_engine/lp_transformations/transformer_is_function_quantized.cpp @@ -6,12 +6,10 @@ #include #include -#include #include +#include #include - -#include #include #include diff --git a/inference-engine/tests/functional/inference_engine/lp_transformations/transpose_transformation.cpp b/inference-engine/tests/functional/inference_engine/lp_transformations/transpose_transformation.cpp index 0ee715d736012e..3d4bf602567328 100644 --- a/inference-engine/tests/functional/inference_engine/lp_transformations/transpose_transformation.cpp +++ b/inference-engine/tests/functional/inference_engine/lp_transformations/transpose_transformation.cpp @@ -1,4 +1,4 @@ -// Copyright (C) 2020 Intel Corporation +// Copyright (C) 2020-2021 Intel Corporation // SPDX-License-Identifier: Apache-2.0 // @@ -98,6 +98,30 @@ class TransposeTransformation : public LayerTransformation, public testing::With }; const std::vector testValues = { + // U8: per-tensor quantization + { + ngraph::Shape({ 1, 1000, 1, 1}), + { 0, 1, 3, 2}, + LayerTransformation::createParamsU8I8(), + { + ngraph::element::u8, + { + {ngraph::element::f32}, + { {128}, ngraph::element::f32, {}, true, 1, ngraph::element::u8, true }, + {0.1f} + } + }, + { + ngraph::element::u8, + {{}, {}, {}}, + ngraph::element::u8, + { + {ngraph::element::f32}, + { {128}, ngraph::element::f32, {}, true, 1, ngraph::element::u8, true }, + {0.1f} + } + } + }, // U8: per-tensor quantization { ngraph::Shape({ 1, 1000, 1, 1}), diff --git a/inference-engine/tests/functional/inference_engine/ngraph_reader/mvn_tests.cpp b/inference-engine/tests/functional/inference_engine/ngraph_reader/mvn_tests.cpp index 0453d7190389c3..7a11642b564e51 100644 --- a/inference-engine/tests/functional/inference_engine/ngraph_reader/mvn_tests.cpp +++ b/inference-engine/tests/functional/inference_engine/ngraph_reader/mvn_tests.cpp @@ -96,3 +96,174 @@ TEST_F(NGraphReaderTests, ReadMVNNetwork) { compareIRs(model, modelV5, 0); } + +TEST_F(NGraphReaderTests, ReadMVN6Network) { + std::string model = R"V0G0N( + + + + + + + 1 + 3 + 22 + 22 + + + + + + + + 3 + + + + + + + + 1 + 3 + 22 + 22 + + + 3 + + + + + 1 + 3 + 22 + 22 + + + + + + + 1 + 3 + 22 + 22 + + + + + + + + + + +)V0G0N"; + std::string modelV5 = R"V0G0N( + + + + + + 1 + 3 + 22 + 22 + + + + + + + 3 + + + + + + + + + + + 1 + 3 + 22 + 22 + + + 3 + + + + + 1 + 3 + 1 + 1 + + + + + + + + 1 + 3 + 1 + 1 + + + + + 1 + 3 + 1 + 1 + + + + + + + + 1 + 3 + 22 + 22 + + + 1 + 3 + 1 + 1 + + + + + 1 + 3 + 22 + 22 + + + + + + + + + + + + +)V0G0N"; + + compareIRs(model, modelV5, 24, [](Blob::Ptr& weights) { + auto* buffer = weights->buffer().as(); + buffer[0] = 0; + buffer[1] = 2; + buffer[2] = 3; + }); +} diff --git a/inference-engine/tests/functional/inference_engine/ngraph_reader/tensor_names.cpp b/inference-engine/tests/functional/inference_engine/ngraph_reader/tensor_names.cpp new file mode 100644 index 00000000000000..b3e9dfae4fb131 --- /dev/null +++ b/inference-engine/tests/functional/inference_engine/ngraph_reader/tensor_names.cpp @@ -0,0 +1,90 @@ +// Copyright (C) 2018-2021 Intel Corporation +// SPDX-License-Identifier: Apache-2.0 +// + +#include +#include "ngraph_reader_tests.hpp" + +TEST_F(NGraphReaderTests, ReadNetworkWithTensorNames) { + std::string model = R"V0G0N( + + + + + + + 1 + 3 + 22 + 22 + + + + + + + 1 + 3 + 22 + 22 + + + + + 1 + 3 + 22 + 22 + + + + + + + 1 + 3 + 22 + 22 + + + + + + + + + +)V0G0N"; + Core ie; + Blob::Ptr weights; + + auto network = ie.ReadNetwork(model, weights); + auto function = network.getFunction(); + auto inputs = network.getInputsInfo(); + auto outputs = network.getOutputsInfo(); + std::unordered_set inNames; + for (const auto& in : inputs) + inNames.emplace(in.first); + std::unordered_set outNames; + for (const auto& out : outputs) + outNames.emplace(out.first); + + ASSERT_EQ(1, inputs.size()); + ASSERT_EQ(1, outputs.size()); + ASSERT_EQ(1, function->get_results().size()); + + for (const auto& param : function->get_parameters()) { + ASSERT_TRUE(inNames.count(network.getOVNameForOperation(param->get_friendly_name()))); + ASSERT_TRUE(!param->get_output_tensor(0).get_names().empty()); + for (const auto& name : param->get_output_tensor(0).get_names()) + ASSERT_TRUE(inNames.count(network.getOVNameForTensor(name))); + } + + for (const auto& result : function->get_results()) { + ASSERT_TRUE(outNames.count(network.getOVNameForOperation(result->get_friendly_name()))); + ASSERT_TRUE(!result->get_input_tensor(0).get_names().empty()); + for (const auto& name : result->get_input_tensor(0).get_names()) + ASSERT_TRUE(outNames.count(network.getOVNameForTensor(name))); + } + ASSERT_NO_THROW(network.getOVNameForTensor("relu,t")); +} diff --git a/inference-engine/tests/functional/inference_engine/parameter_tests.cpp b/inference-engine/tests/functional/inference_engine/parameter_tests.cpp index fc7ba4f6d5b89d..7146df5a9f8d92 100644 --- a/inference-engine/tests/functional/inference_engine/parameter_tests.cpp +++ b/inference-engine/tests/functional/inference_engine/parameter_tests.cpp @@ -51,7 +51,11 @@ TEST_F(ParameterTests, ParameterAsInt) { TEST_F(ParameterTests, ParameterAsUInt) { Parameter p = 4u; ASSERT_TRUE(p.is()); +#ifdef __i386__ + ASSERT_TRUE(p.is()); +#else ASSERT_FALSE(p.is()); +#endif unsigned int test = p; ASSERT_EQ(4, test); } diff --git a/inference-engine/tests/functional/inference_engine/serialization/single_layer/convolution.cpp b/inference-engine/tests/functional/inference_engine/serialization/single_layer/convolution.cpp new file mode 100644 index 00000000000000..992518e9a8ae48 --- /dev/null +++ b/inference-engine/tests/functional/inference_engine/serialization/single_layer/convolution.cpp @@ -0,0 +1,62 @@ +// Copyright (C) 2021 Intel Corporation +// SPDX-License-Identifier: Apache-2.0 +// + +#include + +#include "shared_test_classes/single_layer/convolution.hpp" + +using namespace LayerTestsDefinitions; + +namespace { +TEST_P(ConvolutionLayerTest, Serialize) { + Serialize(); +} + +const std::vector netPrecisions = { + InferenceEngine::Precision::FP32, InferenceEngine::Precision::FP16, + InferenceEngine::Precision::I16, InferenceEngine::Precision::I32, + InferenceEngine::Precision::I64}; +const std::vector> kernels = {{3, 5}}; +const std::vector> strides = {{1, 3}}; +const std::vector> padBegins = {{0, 3}}; +const std::vector> padEnds = {{0, 3}}; +const std::vector> dilations = {{3, 1}}; +const std::vector numOutChannels = {5}; + +const auto conv2DParams_ExplicitPadding = ::testing::Combine( + ::testing::ValuesIn(kernels), ::testing::ValuesIn(strides), + ::testing::ValuesIn(padBegins), ::testing::ValuesIn(padEnds), + ::testing::ValuesIn(dilations), ::testing::ValuesIn(numOutChannels), + ::testing::Values(ngraph::op::PadType::EXPLICIT)); +const auto conv2DParams_AutoPadValid = ::testing::Combine( + ::testing::ValuesIn(kernels), ::testing::ValuesIn(strides), + ::testing::Values(std::vector({0, 0})), + ::testing::Values(std::vector({0, 0})), + ::testing::ValuesIn(dilations), ::testing::ValuesIn(numOutChannels), + ::testing::Values(ngraph::op::PadType::VALID)); + +INSTANTIATE_TEST_CASE_P( + smoke_Convolution2D_Serialization_ExplicitPadding, ConvolutionLayerTest, + ::testing::Combine( + conv2DParams_ExplicitPadding, ::testing::ValuesIn(netPrecisions), + ::testing::Values(InferenceEngine::Precision::UNSPECIFIED), + ::testing::Values(InferenceEngine::Precision::UNSPECIFIED), + ::testing::Values(InferenceEngine::Layout::ANY), + ::testing::Values(InferenceEngine::Layout::ANY), + ::testing::Values(std::vector({1, 3, 30, 30})), + ::testing::Values(CommonTestUtils::DEVICE_CPU)), + ConvolutionLayerTest::getTestCaseName); + +INSTANTIATE_TEST_CASE_P( + smoke_Convolution2D__Serialization_AutoPadValid, ConvolutionLayerTest, + ::testing::Combine( + conv2DParams_AutoPadValid, ::testing::ValuesIn(netPrecisions), + ::testing::Values(InferenceEngine::Precision::UNSPECIFIED), + ::testing::Values(InferenceEngine::Precision::UNSPECIFIED), + ::testing::Values(InferenceEngine::Layout::ANY), + ::testing::Values(InferenceEngine::Layout::ANY), + ::testing::Values(std::vector({1, 3, 30, 30})), + ::testing::Values(CommonTestUtils::DEVICE_CPU)), + ConvolutionLayerTest::getTestCaseName); +} // namespace diff --git a/inference-engine/tests/functional/inference_engine/serialization/single_layer/group_convolution.cpp b/inference-engine/tests/functional/inference_engine/serialization/single_layer/group_convolution.cpp new file mode 100644 index 00000000000000..62438e7da90ea4 --- /dev/null +++ b/inference-engine/tests/functional/inference_engine/serialization/single_layer/group_convolution.cpp @@ -0,0 +1,55 @@ +// Copyright (C) 2021 Intel Corporation +// SPDX-License-Identifier: Apache-2.0 +// + +#include + +#include "shared_test_classes/single_layer/group_convolution.hpp" + +using namespace LayerTestsDefinitions; + +namespace { + +TEST_P(GroupConvolutionLayerTest, Serialize) { + Serialize(); +} + +const std::vector precisions = { + InferenceEngine::Precision::FP64, InferenceEngine::Precision::FP32, + InferenceEngine::Precision::FP16, InferenceEngine::Precision::BF16, + InferenceEngine::Precision::I8, InferenceEngine::Precision::I16, + InferenceEngine::Precision::I32, InferenceEngine::Precision::I64, + InferenceEngine::Precision::U8, InferenceEngine::Precision::U16, + InferenceEngine::Precision::U32, InferenceEngine::Precision::U64, +}; +const std::vector> kernels = {{3, 3}}; +const std::vector> strides = {{1, 1}}; +const std::vector> padBegins = {{0, 0}}; +const std::vector> padEnds = {{0, 0}}; +const std::vector> dilations = {{1, 1}}; +const std::vector numOutChannels = {8, 16}; +const std::vector numGroups = {2, 8}; +const std::vector pad_types = { + ngraph::op::PadType::EXPLICIT, ngraph::op::PadType::VALID, + ngraph::op::PadType::SAME_LOWER, ngraph::op::PadType::SAME_UPPER}; +const auto inputShapes = std::vector({1, 16, 30, 30}); + +const auto groupConv2DParams = ::testing::Combine( + ::testing::ValuesIn(kernels), ::testing::ValuesIn(strides), + ::testing::ValuesIn(padBegins), ::testing::ValuesIn(padEnds), + ::testing::ValuesIn(dilations), ::testing::ValuesIn(numOutChannels), + ::testing::ValuesIn(numGroups), ::testing::ValuesIn(pad_types)); + +INSTANTIATE_TEST_CASE_P( + smoke_GroupConvolution2D_Serialization, GroupConvolutionLayerTest, + ::testing::Combine( + groupConv2DParams, ::testing::ValuesIn(precisions), + ::testing::Values(InferenceEngine::Precision::UNSPECIFIED), + ::testing::Values(InferenceEngine::Precision::UNSPECIFIED), + ::testing::Values(InferenceEngine::Layout::ANY), + ::testing::Values(InferenceEngine::Layout::ANY), + ::testing::Values(inputShapes), + ::testing::Values(CommonTestUtils::DEVICE_CPU)), + GroupConvolutionLayerTest::getTestCaseName); + +} // namespace diff --git a/inference-engine/tests/functional/inference_engine/serialization/single_layer/non_max_suppression.cpp b/inference-engine/tests/functional/inference_engine/serialization/single_layer/non_max_suppression.cpp new file mode 100644 index 00000000000000..21c0ffa6f3c0fc --- /dev/null +++ b/inference-engine/tests/functional/inference_engine/serialization/single_layer/non_max_suppression.cpp @@ -0,0 +1,58 @@ + +// Copyright (C) 2021 Intel Corporation +// SPDX-License-Identifier: Apache-2.0 +// + +#include + +#include "shared_test_classes/single_layer/non_max_suppression.hpp" + +using namespace ngraph; +using namespace LayerTestsDefinitions; + +namespace { + TEST_P(NmsLayerTest, Serialize) { + Serialize(); + } + + const std::vector netPrecisions = { + InferenceEngine::Precision::FP32, + InferenceEngine::Precision::FP16 + }; + + /* ============= NO MAX SUPPRESSION ============= */ + + const std::vector inShapeParams = { + InputShapeParams{3, 100, 5}, + InputShapeParams{1, 10, 50}, + InputShapeParams{2, 50, 50} + }; + + const std::vector maxOutBoxPerClass = {5, 20}; + const std::vector threshold = {0.3f, 0.7f}; + const std::vector sigmaThreshold = {0.0f, 0.5f}; + const std::vector encodType = {op::v5::NonMaxSuppression::BoxEncodingType::CENTER, + op::v5::NonMaxSuppression::BoxEncodingType::CORNER}; + const std::vector sortResDesc = {true, false}; + const std::vector outType = {element::i32, element::i64}; + + const auto inPrecisions = ::testing::Combine( + ::testing::Values(InferenceEngine::Precision::FP32), + ::testing::Values(InferenceEngine::Precision::I32), + ::testing::Values(InferenceEngine::Precision::FP32)); + + const auto nmsParams = ::testing::Combine( + ::testing::ValuesIn(inShapeParams), + inPrecisions, + ::testing::ValuesIn(maxOutBoxPerClass), + ::testing::ValuesIn(threshold), // IOU threshold + ::testing::ValuesIn(threshold), // Score threshold + ::testing::ValuesIn(sigmaThreshold), + ::testing::ValuesIn(encodType), + ::testing::ValuesIn(sortResDesc), + ::testing::ValuesIn(outType), + ::testing::Values(CommonTestUtils::DEVICE_CPU)); + + INSTANTIATE_TEST_CASE_P(smoke_NmsLayerTest, NmsLayerTest, nmsParams, NmsLayerTest::getTestCaseName); +} // namespace + diff --git a/inference-engine/tests/functional/inference_engine/transformations/compare_functions_test.cpp b/inference-engine/tests/functional/inference_engine/transformations/compare_functions_test.cpp index e82980c3db3a4c..337daa3a56403d 100644 --- a/inference-engine/tests/functional/inference_engine/transformations/compare_functions_test.cpp +++ b/inference-engine/tests/functional/inference_engine/transformations/compare_functions_test.cpp @@ -4,6 +4,8 @@ #include +#include + #include "common_test_utils/test_common.hpp" #include #include @@ -208,4 +210,315 @@ TEST(TransformationTests, CompareFunctoinsTINegative) { auto res = compare_functions(f, f_ref); EXPECT_FALSE(res.first); EXPECT_EQ(res.second, "LSTMCell/4 != Relu/0"); -} \ No newline at end of file +} + +TEST(TransformationTests, ConstantNegativeDifferentElementType) { + const auto createConstantFunc = [](ngraph::element::Type t) { + using namespace ngraph::opset5; + auto constant = Constant::create(t, ngraph::Shape{1}, {1.1}); + + return std::make_shared( + ngraph::NodeVector{constant}, ngraph::ParameterVector{}); + }; + + const auto& f1 = createConstantFunc(ngraph::element::f64); + const auto& f2 = createConstantFunc(ngraph::element::f32); + + const auto fc = FunctionsComparator::with_default().enable(FunctionsComparator::ATTRIBUTES); + const auto res = fc.compare(f1, f2); + EXPECT_FALSE(res.valid); + EXPECT_THAT(res.message, HasSubstr("mismatch in value: 'element_type' : [f64] vs [f32]")); +} + +TEST(TransformationTests, ConstantNegativeDifferentValues) { + const auto createConstantFunc = [](double value) { + using namespace ngraph::opset5; + auto constant = Constant::create(ngraph::element::f32, ngraph::Shape{1}, {value}); + + return std::make_shared( + ngraph::NodeVector{constant}, ngraph::ParameterVector{}); + }; + + const auto& f1 = createConstantFunc(1.0); + const auto& f2 = createConstantFunc(10.0); + + const auto fc = FunctionsComparator::with_default().enable(FunctionsComparator::ATTRIBUTES); + const auto res = fc.compare(f1, f2); + EXPECT_FALSE(res.valid); + EXPECT_THAT(res.message, HasSubstr("mismatch in value: 'value' : look in to the mem buffer")); +} + +TEST(TransformationTests, ConstantNegativeDifferentShapes) { + const auto createConstantFunc = [](const ngraph::Shape& s) { + using namespace ngraph::opset5; + auto constant = Constant::create(ngraph::element::f32, s, {1.1}); + + return std::make_shared( + ngraph::NodeVector{constant}, ngraph::ParameterVector{}); + }; + + const auto& f1 = createConstantFunc(ngraph::Shape{2}); + const auto& f2 = createConstantFunc(ngraph::Shape{2, 2}); + + const auto fc = FunctionsComparator::with_default().enable(FunctionsComparator::ATTRIBUTES); + const auto res = fc.compare(f1, f2); + EXPECT_FALSE(res.valid); + EXPECT_THAT(res.message, HasSubstr("mismatch in value: 'shape' : [2] vs [2, 2]")); +} + +TEST(TransformationTests, ClampNegativeDifferentMin) { + const auto createClampFunc = [](double min) { + using namespace ngraph::opset5; + auto constant = Constant::create(ngraph::element::f32, ngraph::Shape{1}, {1.0}); + auto clamp = std::make_shared(constant, min, 20.); + + return std::make_shared( + ngraph::NodeVector{clamp}, ngraph::ParameterVector{}); + }; + + const auto& f1 = createClampFunc(1.0); + const auto& f2 = createClampFunc(11.0); + + const auto fc = FunctionsComparator::with_default().enable(FunctionsComparator::ATTRIBUTES); + const auto res = fc.compare(f1, f2); + EXPECT_FALSE(res.valid); + EXPECT_THAT(res.message, HasSubstr("mismatch in value: 'min' ")); +} + +TEST(TransformationTests, ClampNegativeDifferentMax) { + const auto createClampFunc = [](double max) { + using namespace ngraph::opset5; + auto constant = Constant::create(ngraph::element::f32, ngraph::Shape{1}, {1.0}); + auto clamp = std::make_shared(constant, 1., max); + + return std::make_shared( + ngraph::NodeVector{clamp}, ngraph::ParameterVector{}); + }; + + const auto& f1 = createClampFunc(10.1); + const auto& f2 = createClampFunc(101.1); + + const auto fc = FunctionsComparator::with_default().enable(FunctionsComparator::ATTRIBUTES); + const auto res = fc.compare(f1, f2); + EXPECT_FALSE(res.valid); + EXPECT_THAT(res.message, HasSubstr("mismatch in value: 'max' ")); +} + +TEST(TransformationTests, ConcatNegativeDifferentMax) { + const auto createConcatFunc = [](int64_t axis) { + using namespace ngraph::opset5; + auto constant = + Constant::create(ngraph::element::f32, ngraph::Shape{10, 10, 2, 2, 3}, {1.0}); + auto clamp = std::make_shared(ngraph::OutputVector{constant}, axis); + + return std::make_shared( + ngraph::NodeVector{clamp}, ngraph::ParameterVector{}); + }; + + const auto& f1 = createConcatFunc(1); + const auto& f2 = createConcatFunc(2); + + const auto fc = FunctionsComparator::with_default().enable(FunctionsComparator::ATTRIBUTES); + const auto res = fc.compare(f1, f2); + EXPECT_FALSE(res.valid); + EXPECT_THAT(res.message, HasSubstr("mismatch in value: 'axis' : [1] vs [2]")); +} + +TEST(TransformationTests, GreaterNegativeDifferentMax) { + const auto createGreaterFunc = [](ngraph::op::AutoBroadcastType t) { + using namespace ngraph::opset5; + + auto input1 = std::make_shared(ngraph::element::f16, ngraph::Shape{15, 20, 3}); + auto input2 = std::make_shared(ngraph::element::f16, ngraph::Shape{15, 20, 3}); + auto node = std::make_shared(input1, input2, t); + + return std::make_shared(OutputVector{node}, ParameterVector{input1, input2}); + }; + + const auto& f1 = createGreaterFunc(ngraph::op::AutoBroadcastType::NUMPY); + const auto& f2 = createGreaterFunc(ngraph::op::AutoBroadcastType::PDPD); + + const auto fc = FunctionsComparator::with_default().enable(FunctionsComparator::ATTRIBUTES); + const auto res = fc.compare(f1, f2); + EXPECT_FALSE(res.valid); + EXPECT_THAT(res.message, HasSubstr(" mismatch in value: 'auto_broadcast' : [numpy] vs [pdpd]")); +} + +TEST(TransformationTests, ReadValueNegativeDifferentMax) { + const auto createReadValueFunc = [](const std::string& variable_id) { + using namespace ngraph::opset5; + + auto input1 = std::make_shared(ngraph::element::f16, ngraph::Shape{15, 20, 3}); + auto node = std::make_shared(input1, variable_id); + + return std::make_shared(OutputVector{node}, ParameterVector{input1}); + }; + + const auto& f1 = createReadValueFunc("10"); + const auto& f2 = createReadValueFunc("20"); + + const auto fc = FunctionsComparator::with_default().enable(FunctionsComparator::ATTRIBUTES); + const auto res = fc.compare(f1, f2); + EXPECT_FALSE(res.valid); + EXPECT_THAT(res.message, HasSubstr("mismatch in value: 'variable_id' : [10] vs [20]")); +} + +TEST(TransformationTests, ReorgYoloNegativeDifferentMax) { + const auto createReorgYoloFunc = [](const Strides& stride) { + using namespace ngraph::opset5; + + auto param = + std::make_shared(ngraph::element::f32, ngraph::Shape{10, 10, 10, 10}); + auto reorg_yolo = std::make_shared(param, stride); + + return std::make_shared( + std::make_shared(reorg_yolo), ngraph::ParameterVector{param}); + }; + + const auto& f1 = createReorgYoloFunc({1, 2}); + const auto& f2 = createReorgYoloFunc({2, 2}); + + const auto fc = FunctionsComparator::with_default().enable(FunctionsComparator::ATTRIBUTES); + const auto res = fc.compare(f1, f2); + EXPECT_FALSE(res.valid); + EXPECT_THAT(res.message, HasSubstr(" mismatch in value: 'stride' : [1, 2] vs [2, 2]")); +} + +namespace { + +template +class DummyConstant : public ngraph::op::Op { +public: + DummyConstant() = default; + + DummyConstant(const Member& member) + : m_element_type(element::Type_t::u8), m_shape({1, 1}), m_member(member) { + constructor_validate_and_infer_types(); + } + + DummyConstant(const DummyConstant& o) + : m_element_type(o.m_element_type), m_shape(o.m_shape), m_member(o.m_member) { + constructor_validate_and_infer_types(); + } + + DummyConstant& operator=(const DummyConstant&) = delete; + + const NodeTypeInfo& get_type_info() const override { + static const NodeTypeInfo type_info{typeid(this).name(), 0}; + return type_info; + } + + void validate_and_infer_types() override { + set_output_type(0, m_element_type, m_shape); // !!?? + } + + bool visit_attributes(AttributeVisitor& visitor) override { + visitor.on_attribute("member", m_member); + return true; + } + + bool evaluate(const HostTensorVector& outputs, const HostTensorVector& inputs) const override { + return true; + } + + // Don't constant fold a constant; it would make a copy + bool constant_fold(OutputVector& outputs, const OutputVector& inputs) override { + return false; + } + + std::shared_ptr clone_with_new_inputs(const OutputVector& new_args) const override { + check_new_args_count(this, new_args); + return std::make_shared(*this); + } + +protected: + element::Type m_element_type{element::Type_t::i64}; + Shape m_shape{1, 1}; + Member m_member{}; +}; + +template +std::shared_ptr createDummyFunc(const Member& m) { + auto constant = std::make_shared>(m); + + return std::make_shared( + ngraph::NodeVector{constant}, ngraph::ParameterVector{}); +} + +} // namespace + +TEST(TransformationTests, DummyOpNegativeDifferentElementType) { + const auto& f1 = createDummyFunc(element::Type_t::i64); + const auto& f2 = createDummyFunc(element::Type_t::f64); + + const auto fc = FunctionsComparator::with_default().enable(FunctionsComparator::ATTRIBUTES); + const auto res = fc.compare(f1, f2); + EXPECT_FALSE(res.valid); + EXPECT_THAT(res.message, HasSubstr(" mismatch in value: 'member' : [i64] vs [f64]")); +} + +TEST(TransformationTests, DummyOpNegativeDifferentIntVector) { + const auto& f1 = createDummyFunc(std::vector{1, 2, 3}); + const auto& f2 = createDummyFunc(std::vector{3, 2, 1}); + + const auto fc = FunctionsComparator::with_default().enable(FunctionsComparator::ATTRIBUTES); + const auto res = fc.compare(f1, f2); + EXPECT_FALSE(res.valid); + EXPECT_THAT(res.message, HasSubstr(" mismatch in value: 'member' : [1, 2, 3] vs [3, 2, 1]")); +} + +TEST(TransformationTests, DummyOpNegativeDifferentFloatVector) { + const auto& f1 = createDummyFunc(std::vector{1., 2., 3.}); + const auto& f2 = createDummyFunc(std::vector{3., 2., 1.}); + + const auto fc = FunctionsComparator::with_default().enable(FunctionsComparator::ATTRIBUTES); + const auto res = fc.compare(f1, f2); + EXPECT_FALSE(res.valid); + EXPECT_THAT(res.message, HasSubstr(" mismatch in value: 'member' : [1, 2, 3] vs [3, 2, 1]")); +} + +TEST(TransformationTests, DummyOpNegativeDifferentStringVector) { + const auto& f1 = createDummyFunc(std::vector{"a", "ba"}); + const auto& f2 = createDummyFunc(std::vector{"b", "ab"}); + + const auto fc = FunctionsComparator::with_default().enable(FunctionsComparator::ATTRIBUTES); + const auto res = fc.compare(f1, f2); + EXPECT_FALSE(res.valid); + EXPECT_THAT(res.message, HasSubstr(" mismatch in value: 'member' : [a, ba] vs [b, ab]")); +} + +namespace ngraph { + +struct TestDummyDataTypeTransformationTests_NO_NGRAPH_NAME_COLISION {}; + +template <> +class AttributeAdapter + : public DirectValueAccessor { +public: + AttributeAdapter(TestDummyDataTypeTransformationTests_NO_NGRAPH_NAME_COLISION& value) + : DirectValueAccessor(value) { + } + + static constexpr DiscreteTypeInfo type_info{ + "TestDummyDataTypeTransformationTests_NO_NGRAPH_NAME_COLISION", 0}; + + const DiscreteTypeInfo& get_type_info() const override { + return type_info; + } +}; + +constexpr DiscreteTypeInfo + AttributeAdapter::type_info; + +} // namespace ngraph + +TEST(TransformationTests, DummyOpNegativeNotSupportedType) { + TestDummyDataTypeTransformationTests_NO_NGRAPH_NAME_COLISION m{}; + const auto& f1 = createDummyFunc(m); + const auto& f2 = createDummyFunc(m); + + const auto fc = FunctionsComparator::with_default().enable(FunctionsComparator::ATTRIBUTES); + const auto res = fc.compare(f1, f2); + EXPECT_FALSE(res.valid); + EXPECT_THAT(res.message, HasSubstr(" [drop `void` comparison which is '")); +} diff --git a/inference-engine/tests/functional/inference_engine/transformations/fq_decomposition_test.cpp b/inference-engine/tests/functional/inference_engine/transformations/fq_decomposition_test.cpp new file mode 100644 index 00000000000000..25e2bf481e4666 --- /dev/null +++ b/inference-engine/tests/functional/inference_engine/transformations/fq_decomposition_test.cpp @@ -0,0 +1,249 @@ +// Copyright (C) 2021 Intel Corporation +// SPDX-License-Identifier: Apache-2.0 +// + +#include + +#include +#include +#include +#include +#include +#include +#include + +#include "common_test_utils/ngraph_test_utils.hpp" +#include "common_test_utils/common_utils.hpp" + +using FakeQuantizeDecompositionBasicParams = std::tuple; + +using FakeQuantizeDecompositionParamsSet = std::tuple, // il and ih values + bool // should be decompos +>; + +class FakeQuantizeDecompositionTest : public CommonTestUtils::TestsCommon, public ::testing::WithParamInterface { +public: + static std::string getTestCaseName(::testing::TestParamInfo obj) { + FakeQuantizeDecompositionBasicParams basic_params; + std::pair input_ranges_values; + bool should_be_decompos; + std::tie(basic_params, input_ranges_values, should_be_decompos) = obj.param; + + ngraph::Shape data_shape, il_shape, ih_shape, ol_shape, oh_shape; + ngraph::element::Type_t data_prec, ranges_prec; + size_t levels; + std::tie(data_prec, data_shape, ranges_prec, il_shape, ih_shape, ol_shape, oh_shape, levels) = basic_params; + + std::ostringstream result; + result << "DATA=" << CommonTestUtils::vec2str(data_shape) << "_"; + result << "DATA_PRC=" << ngraph::element::Type(data_prec) << "_"; + result << "IL=" << CommonTestUtils::vec2str(il_shape) << "_" << input_ranges_values.first << "_"; + result << "IH=" << CommonTestUtils::vec2str(ih_shape) << "_" << input_ranges_values.second << "_"; + result << "OL=" << CommonTestUtils::vec2str(ol_shape) << "_"; + result << "OH=" << CommonTestUtils::vec2str(oh_shape) << "_"; + result << "RANGES_PRC=" << ngraph::element::Type(ranges_prec) << "_"; + result << "LEVELS=" << levels; + return result.str(); + } + +protected: + void SetUp() { + FakeQuantizeDecompositionBasicParams basic_params; + std::pair input_ranges_values; + bool should_be_decompos; + std::tie(basic_params, input_ranges_values, should_be_decompos) = this->GetParam(); + + ngraph::Shape data_shape, il_shape, ih_shape, ol_shape, oh_shape; + ngraph::element::Type_t data_prec, ranges_prec; + size_t levels; + std::tie(data_prec, data_shape, ranges_prec, il_shape, ih_shape, ol_shape, oh_shape, levels) = basic_params; + + bool need_convert = data_prec != ranges_prec; + + std::shared_ptr f(nullptr), f_ref(nullptr); + { + const auto data = std::make_shared(data_prec, ngraph::PartialShape(data_shape)); + const auto il = std::make_shared(ranges_prec, il_shape, input_ranges_values.first); + const auto ih = std::make_shared(ranges_prec, ih_shape, input_ranges_values.second); + const auto ol = std::make_shared(ranges_prec, ol_shape); + const auto oh = std::make_shared(ranges_prec, oh_shape); + + const auto fq = std::make_shared(data, il, ih, ol, oh, levels); + f = std::make_shared(ngraph::NodeVector{fq}, ngraph::ParameterVector{data}); + + ngraph::pass::Manager manager; + manager.register_pass(); + manager.register_pass(); + manager.run_passes(f); + + ASSERT_NO_THROW(check_rt_info(f)); + } + + { + auto input_data = std::make_shared(data_prec, ngraph::PartialShape(data_shape)); + ngraph::ParameterVector params; + params.push_back(input_data); + std::shared_ptr data = input_data; + const auto il = std::make_shared(ranges_prec, il_shape, input_ranges_values.first); + const auto ih = std::make_shared(ranges_prec, ih_shape, input_ranges_values.second); + const auto ol = std::make_shared(ranges_prec, ol_shape); + const auto oh = std::make_shared(ranges_prec, oh_shape); + + if (should_be_decompos) { + if (need_convert) { + data = std::make_shared(data, ranges_prec); + } + + const auto max = std::make_shared(data, il); + const auto min = std::make_shared(max, ih); + + const auto levels_minus_one = std::make_shared(ranges_prec, ngraph::Shape{}, levels - 1); + + const auto sub_in_high_low = std::make_shared(ih, il); + const auto isc = std::make_shared(levels_minus_one, sub_in_high_low); + const auto ish = std::make_shared(il, isc); + + const auto after_isc_apply = std::make_shared(min, isc); + const auto after_ish_apply = std::make_shared(after_isc_apply, ish); + + const auto round = std::make_shared(after_ish_apply, ngraph::opset5::Round::RoundMode::HALF_TO_EVEN); + + const auto sub_out_high_low = std::make_shared(oh, ol); + const auto osc = std::make_shared(sub_out_high_low, levels_minus_one); + + const auto after_osc_apply = std::make_shared(round, osc); + const auto after_out_low_add = std::make_shared(after_osc_apply, ol); + std::shared_ptr result = after_out_low_add; + + if (need_convert) { + result = std::make_shared(result, data_prec); + } + + f_ref = std::make_shared(ngraph::NodeVector{result}, params); + } else { + const auto fq = std::make_shared(data, il, ih, ol, oh, levels); + f_ref = std::make_shared(ngraph::NodeVector{fq}, params); + } + } + + const auto res = compare_functions(f, f_ref); + ASSERT_TRUE(res.first) << res.second; + } +}; + +TEST_P(FakeQuantizeDecompositionTest, CompareFunctions) {} + +const std::vector precisions = {ngraph::element::Type_t::f16, ngraph::element::Type_t::f32}; + +const std::vector levels = {16, 255, 256}; + +const std::vector> input_ranges_supported = { + {-10.0f, 10.f} +}; + +const auto simple_fq_basic = ::testing::Combine(::testing::ValuesIn(precisions), + ::testing::Values(ngraph::Shape{2, 3, 4, 5}), + ::testing::ValuesIn(precisions), + ::testing::Values(ngraph::Shape{1, 3, 1, 1}), + ::testing::Values(ngraph::Shape{1, 3, 1, 1}), + ::testing::Values(ngraph::Shape{1, 3, 1, 1}), + ::testing::Values(ngraph::Shape{1, 3, 1, 1}), + ::testing::ValuesIn(levels)); + +const auto broadcast_fq_basic = ::testing::Combine(::testing::ValuesIn(precisions), + ::testing::Values(ngraph::Shape{2, 3, 4, 5}), + ::testing::ValuesIn(precisions), + ::testing::Values(ngraph::Shape{1, 3, 4, 1}), + ::testing::Values(ngraph::Shape{1, 1, 4, 5}), + ::testing::Values(ngraph::Shape{1, 1, 1, 1}), + ::testing::Values(ngraph::Shape{1, 1, 1, 1}), + ::testing::ValuesIn(levels)); + +const auto elementwise_fq_basic = ::testing::Combine(::testing::ValuesIn(precisions), + ::testing::Values(ngraph::Shape{2, 3, 4, 5}), + ::testing::ValuesIn(precisions), + ::testing::Values(ngraph::Shape{2, 3, 4, 5}), + ::testing::Values(ngraph::Shape{2, 3, 4, 1}), + ::testing::Values(ngraph::Shape{2, 3, 4, 5}), + ::testing::Values(ngraph::Shape{2, 3, 4, 5}), + ::testing::ValuesIn(levels)); + +const auto broadcast_6D_fq_basic = ::testing::Combine(::testing::ValuesIn(precisions), + ::testing::Values(ngraph::Shape{2, 3, 4, 5, 6, 7}), + ::testing::ValuesIn(precisions), + ::testing::Values(ngraph::Shape{2, 3, 4, 1, 1, 1}), + ::testing::Values(ngraph::Shape{1, 3, 4, 5, 1, 1}), + ::testing::Values(ngraph::Shape{1, 1, 1, 5, 6, 7}), + ::testing::Values(ngraph::Shape{1, 1, 1, 5, 6, 7}), + ::testing::ValuesIn(levels)); + +INSTANTIATE_TEST_CASE_P(SimpleFakeQuantize_Decomposition, FakeQuantizeDecompositionTest, + ::testing::Combine( + simple_fq_basic, + ::testing::ValuesIn(input_ranges_supported), + ::testing::Values(true)), + FakeQuantizeDecompositionTest::getTestCaseName); + +INSTANTIATE_TEST_CASE_P(BroadcastFakeQuantize_Decomposition, FakeQuantizeDecompositionTest, + ::testing::Combine( + broadcast_fq_basic, + ::testing::ValuesIn(input_ranges_supported), + ::testing::Values(true)), + FakeQuantizeDecompositionTest::getTestCaseName); + +INSTANTIATE_TEST_CASE_P(ElementwiseFakeQuantize_Decomposition, FakeQuantizeDecompositionTest, + ::testing::Combine( + elementwise_fq_basic, + ::testing::ValuesIn(input_ranges_supported), + ::testing::Values(true)), + FakeQuantizeDecompositionTest::getTestCaseName); + +INSTANTIATE_TEST_CASE_P(FakeQuantize6D_Decomposition, FakeQuantizeDecompositionTest, + ::testing::Combine( + broadcast_6D_fq_basic, + ::testing::ValuesIn(input_ranges_supported), + ::testing::Values(true)), + FakeQuantizeDecompositionTest::getTestCaseName); + +const std::vector> input_ranges_unsupported = { + {10.0f, -10.f}, + {5.0f, 5.0f}, + {-5.0f, -5.0f} +}; + +INSTANTIATE_TEST_CASE_P(SimpleFakeQuantize_NoDecomposition, FakeQuantizeDecompositionTest, + ::testing::Combine( + simple_fq_basic, + ::testing::ValuesIn(input_ranges_unsupported), + ::testing::Values(false)), + FakeQuantizeDecompositionTest::getTestCaseName); + +INSTANTIATE_TEST_CASE_P(BroadcastFakeQuantize_NoDecomposition, FakeQuantizeDecompositionTest, + ::testing::Combine( + broadcast_fq_basic, + ::testing::ValuesIn(input_ranges_unsupported), + ::testing::Values(false)), + FakeQuantizeDecompositionTest::getTestCaseName); + +INSTANTIATE_TEST_CASE_P(ElementwiseFakeQuantize_NoDecomposition, FakeQuantizeDecompositionTest, + ::testing::Combine( + elementwise_fq_basic, + ::testing::ValuesIn(input_ranges_unsupported), + ::testing::Values(false)), + FakeQuantizeDecompositionTest::getTestCaseName); + +INSTANTIATE_TEST_CASE_P(FakeQuantize6D_NoDecomposition, FakeQuantizeDecompositionTest, + ::testing::Combine( + broadcast_6D_fq_basic, + ::testing::ValuesIn(input_ranges_unsupported), + ::testing::Values(false)), + FakeQuantizeDecompositionTest::getTestCaseName); diff --git a/inference-engine/tests/functional/inference_engine/transformations/hswish_fusion_test.cpp b/inference-engine/tests/functional/inference_engine/transformations/hswish_fusion_test.cpp index b360595e582594..ac8542b10a8057 100644 --- a/inference-engine/tests/functional/inference_engine/transformations/hswish_fusion_test.cpp +++ b/inference-engine/tests/functional/inference_engine/transformations/hswish_fusion_test.cpp @@ -8,7 +8,7 @@ #include #include -#include +#include #include #include #include @@ -21,15 +21,15 @@ using namespace testing; TEST(TransformationTests, HSwishFusionWithReluDivF16) { std::shared_ptr f(nullptr), f_ref(nullptr); { - auto input = std::make_shared(ngraph::element::f16, ngraph::PartialShape::dynamic(1)); - auto add_constant = ngraph::opset4::Constant::create(ngraph::element::f16, ngraph::Shape{}, {3.0}); - auto add = std::make_shared(input, add_constant); - auto relu = std::make_shared(add); - auto min_constant = ngraph::opset4::Constant::create(ngraph::element::f16, ngraph::Shape{}, {6.0}); - auto min = std::make_shared(relu, min_constant); - auto mul = std::make_shared(input, min); - auto div_constant = ngraph::opset4::Constant::create(ngraph::element::f16, ngraph::Shape{}, {6.0}); - auto div = std::make_shared(mul, div_constant); + auto input = std::make_shared(ngraph::element::f16, ngraph::PartialShape::dynamic(1)); + auto add_constant = ngraph::opset6::Constant::create(ngraph::element::f16, ngraph::Shape{}, {3.0}); + auto add = std::make_shared(input, add_constant); + auto relu = std::make_shared(add); + auto min_constant = ngraph::opset6::Constant::create(ngraph::element::f16, ngraph::Shape{}, {6.0}); + auto min = std::make_shared(relu, min_constant); + auto mul = std::make_shared(input, min); + auto div_constant = ngraph::opset6::Constant::create(ngraph::element::f16, ngraph::Shape{}, {6.0}); + auto div = std::make_shared(mul, div_constant); f = std::make_shared(ngraph::NodeVector{div}, ngraph::ParameterVector{input}); @@ -41,8 +41,8 @@ TEST(TransformationTests, HSwishFusionWithReluDivF16) { } { - auto input = std::make_shared(ngraph::element::f16, ngraph::PartialShape::dynamic(1)); - auto hswish = std::make_shared(input); + auto input = std::make_shared(ngraph::element::f16, ngraph::PartialShape::dynamic(1)); + auto hswish = std::make_shared(input); f_ref = std::make_shared(ngraph::NodeVector{hswish}, ngraph::ParameterVector{input}); } @@ -54,15 +54,15 @@ TEST(TransformationTests, HSwishFusionWithReluDivF16) { TEST(TransformationTests, HSwishFusionWithReluDivF32) { std::shared_ptr f(nullptr), f_ref(nullptr); { - auto input = std::make_shared(ngraph::element::f32, ngraph::Shape{}); - auto add_constant = ngraph::opset4::Constant::create(ngraph::element::f32, ngraph::Shape{}, {3.0}); - auto add = std::make_shared(input, add_constant); - auto relu = std::make_shared(add); - auto min_constant = ngraph::opset4::Constant::create(ngraph::element::f32, ngraph::Shape{}, {6.0}); - auto min = std::make_shared(relu, min_constant); - auto mul = std::make_shared(input, min); - auto div_constant = ngraph::opset4::Constant::create(ngraph::element::f32, ngraph::Shape{}, {6.0}); - auto div = std::make_shared(mul, div_constant); + auto input = std::make_shared(ngraph::element::f32, ngraph::Shape{}); + auto add_constant = ngraph::opset6::Constant::create(ngraph::element::f32, ngraph::Shape{}, {3.0}); + auto add = std::make_shared(input, add_constant); + auto relu = std::make_shared(add); + auto min_constant = ngraph::opset6::Constant::create(ngraph::element::f32, ngraph::Shape{}, {6.0}); + auto min = std::make_shared(relu, min_constant); + auto mul = std::make_shared(input, min); + auto div_constant = ngraph::opset6::Constant::create(ngraph::element::f32, ngraph::Shape{}, {6.0}); + auto div = std::make_shared(mul, div_constant); f = std::make_shared(ngraph::NodeVector{div}, ngraph::ParameterVector{input}); @@ -74,8 +74,8 @@ TEST(TransformationTests, HSwishFusionWithReluDivF32) { } { - auto input = std::make_shared(ngraph::element::f32, ngraph::Shape{}); - auto hswish = std::make_shared(input); + auto input = std::make_shared(ngraph::element::f32, ngraph::Shape{}); + auto hswish = std::make_shared(input); f_ref = std::make_shared(ngraph::NodeVector{hswish}, ngraph::ParameterVector{input}); } @@ -87,15 +87,15 @@ TEST(TransformationTests, HSwishFusionWithReluDivF32) { TEST(TransformationTests, HSwishFusionWithReluMul) { std::shared_ptr f(nullptr), f_ref(nullptr); { - auto input = std::make_shared(ngraph::element::f16, ngraph::PartialShape::dynamic(1)); - auto add_constant = ngraph::opset4::Constant::create(ngraph::element::f16, ngraph::Shape{}, {3.0}); - auto add = std::make_shared(input, add_constant); - auto relu = std::make_shared(add); - auto min_constant = ngraph::opset4::Constant::create(ngraph::element::f16, ngraph::Shape{}, {6.0}); - auto min = std::make_shared(relu, min_constant); - auto mul_first = std::make_shared(input, min); - auto mul_constant = ngraph::opset4::Constant::create(ngraph::element::f16, ngraph::Shape{}, {0.1666666716}); - auto mul_second = std::make_shared(mul_first, mul_constant); + auto input = std::make_shared(ngraph::element::f16, ngraph::PartialShape::dynamic(1)); + auto add_constant = ngraph::opset6::Constant::create(ngraph::element::f16, ngraph::Shape{}, {3.0}); + auto add = std::make_shared(input, add_constant); + auto relu = std::make_shared(add); + auto min_constant = ngraph::opset6::Constant::create(ngraph::element::f16, ngraph::Shape{}, {6.0}); + auto min = std::make_shared(relu, min_constant); + auto mul_first = std::make_shared(input, min); + auto mul_constant = ngraph::opset6::Constant::create(ngraph::element::f16, ngraph::Shape{}, {0.1666666716}); + auto mul_second = std::make_shared(mul_first, mul_constant); f = std::make_shared(ngraph::NodeVector{mul_second}, ngraph::ParameterVector{input}); @@ -107,8 +107,8 @@ TEST(TransformationTests, HSwishFusionWithReluMul) { } { - auto input = std::make_shared(ngraph::element::f16, ngraph::PartialShape::dynamic(1)); - auto hswish = std::make_shared(input); + auto input = std::make_shared(ngraph::element::f16, ngraph::PartialShape::dynamic(1)); + auto hswish = std::make_shared(input); f_ref = std::make_shared(ngraph::NodeVector{hswish}, ngraph::ParameterVector{input}); } @@ -120,16 +120,16 @@ TEST(TransformationTests, HSwishFusionWithReluMul) { TEST(TransformationTests, HSwishFusionWithoutRelu) { std::shared_ptr f(nullptr), f_ref(nullptr); { - auto input = std::make_shared(ngraph::element::f16, ngraph::PartialShape::dynamic(1)); - auto add_constant = ngraph::opset4::Constant::create(ngraph::element::f16, ngraph::Shape{}, {3.0}); - auto add = std::make_shared(input, add_constant); - auto max_constant = ngraph::opset4::Constant::create(ngraph::element::f16, ngraph::Shape{}, {0.0}); - auto max = std::make_shared(add, max_constant); - auto min_constant = ngraph::opset4::Constant::create(ngraph::element::f16, ngraph::Shape{}, {6.0}); - auto min = std::make_shared(max, min_constant); - auto div_constant = ngraph::opset4::Constant::create(ngraph::element::f16, ngraph::Shape{}, {6.0}); - auto div = std::make_shared(min, div_constant); - auto mul = std::make_shared(input, div); + auto input = std::make_shared(ngraph::element::f16, ngraph::PartialShape::dynamic(1)); + auto add_constant = ngraph::opset6::Constant::create(ngraph::element::f16, ngraph::Shape{}, {3.0}); + auto add = std::make_shared(input, add_constant); + auto max_constant = ngraph::opset6::Constant::create(ngraph::element::f16, ngraph::Shape{}, {0.0}); + auto max = std::make_shared(add, max_constant); + auto min_constant = ngraph::opset6::Constant::create(ngraph::element::f16, ngraph::Shape{}, {6.0}); + auto min = std::make_shared(max, min_constant); + auto div_constant = ngraph::opset6::Constant::create(ngraph::element::f16, ngraph::Shape{}, {6.0}); + auto div = std::make_shared(min, div_constant); + auto mul = std::make_shared(input, div); f = std::make_shared(ngraph::NodeVector{mul}, ngraph::ParameterVector{input}); @@ -141,8 +141,8 @@ TEST(TransformationTests, HSwishFusionWithoutRelu) { } { - auto input = std::make_shared(ngraph::element::f16, ngraph::PartialShape::dynamic(1)); - auto hswish = std::make_shared(input); + auto input = std::make_shared(ngraph::element::f16, ngraph::PartialShape::dynamic(1)); + auto hswish = std::make_shared(input); f_ref = std::make_shared(ngraph::NodeVector{hswish}, ngraph::ParameterVector{input}); } @@ -151,29 +151,60 @@ TEST(TransformationTests, HSwishFusionWithoutRelu) { ASSERT_TRUE(res.first) << res.second; } -TEST(TransformationTests, HSwishFusionWithClamp) { +TEST(TransformationTests, HSwishFusionWithClampMul) { std::shared_ptr f(nullptr), f_ref(nullptr); { - auto input = std::make_shared(ngraph::element::f16, ngraph::PartialShape::dynamic(1)); - auto add_constant = ngraph::opset4::Constant::create(ngraph::element::f16, ngraph::Shape{}, {3.0}); - auto add = std::make_shared(input, add_constant); - auto clamp = std::make_shared(add, 0.0f, 6.0f); - auto mul_constant = ngraph::opset4::Constant::create(ngraph::element::f16, ngraph::Shape{}, {1.0 / 6.0}); - auto mul_first = std::make_shared(clamp, mul_constant); - auto mul_second = std::make_shared(input, mul_first); + auto input = std::make_shared(ngraph::element::f16, ngraph::PartialShape::dynamic(1)); + auto add_constant = ngraph::opset6::Constant::create(ngraph::element::f16, ngraph::Shape{}, {3.0}); + auto add = std::make_shared(input, add_constant); + auto clamp = std::make_shared(add, 0.0f, 6.0f); + auto mul_constant = ngraph::opset6::Constant::create(ngraph::element::f16, ngraph::Shape{}, {1.0 / 6.0}); + auto mul_first = std::make_shared(clamp, mul_constant); + auto mul_second = std::make_shared(input, mul_first); f = std::make_shared(ngraph::NodeVector{mul_second}, ngraph::ParameterVector{input}); ngraph::pass::Manager manager; manager.register_pass(); - manager.register_pass(); + manager.register_pass(); manager.run_passes(f); ASSERT_NO_THROW(check_rt_info(f)); } { - auto input = std::make_shared(ngraph::element::f16, ngraph::PartialShape::dynamic(1)); - auto hswish = std::make_shared(input); + auto input = std::make_shared(ngraph::element::f16, ngraph::PartialShape::dynamic(1)); + auto hswish = std::make_shared(input); + + f_ref = std::make_shared(ngraph::NodeVector{hswish}, ngraph::ParameterVector{input}); + } + + auto res = compare_functions(f, f_ref); + ASSERT_TRUE(res.first) << res.second; +} + +TEST(TransformationTests, HSwishFusionWithClampDiv) { + std::shared_ptr f(nullptr), f_ref(nullptr); + { + auto input = std::make_shared(ngraph::element::f16, ngraph::PartialShape::dynamic(1)); + auto add_constant = ngraph::opset6::Constant::create(ngraph::element::f16, ngraph::Shape{}, {3.0}); + auto add = std::make_shared(input, add_constant); + auto clamp = std::make_shared(add, 0.0f, 6.0f); + auto div_constant = ngraph::opset6::Constant::create(ngraph::element::f16, ngraph::Shape{}, {6.0}); + auto div = std::make_shared(clamp, div_constant); + auto mul = std::make_shared(input, div); + + f = std::make_shared(ngraph::NodeVector{mul}, ngraph::ParameterVector{input}); + + ngraph::pass::Manager manager; + manager.register_pass(); + manager.register_pass(); + manager.run_passes(f); + ASSERT_NO_THROW(check_rt_info(f)); + } + + { + auto input = std::make_shared(ngraph::element::f16, ngraph::PartialShape::dynamic(1)); + auto hswish = std::make_shared(input); f_ref = std::make_shared(ngraph::NodeVector{hswish}, ngraph::ParameterVector{input}); } @@ -185,15 +216,15 @@ TEST(TransformationTests, HSwishFusionWithClamp) { TEST(TransformationTests, HSwishFusionWithReluMulWrongConstValue) { std::shared_ptr f(nullptr), f_ref(nullptr); { - auto input = std::make_shared(ngraph::element::f16, ngraph::PartialShape::dynamic(1)); - auto add_constant = ngraph::opset4::Constant::create(ngraph::element::f16, ngraph::Shape{}, {3.0}); - auto add = std::make_shared(input, add_constant); - auto relu = std::make_shared(add); - auto min_constant = ngraph::opset4::Constant::create(ngraph::element::f16, ngraph::Shape{}, {6.0}); - auto min = std::make_shared(relu, min_constant); - auto mul_first = std::make_shared(input, min); - auto mul_constant = ngraph::opset4::Constant::create(ngraph::element::f16, ngraph::Shape{}, {0.167}); - auto mul_second = std::make_shared(mul_first, mul_constant); + auto input = std::make_shared(ngraph::element::f16, ngraph::PartialShape::dynamic(1)); + auto add_constant = ngraph::opset6::Constant::create(ngraph::element::f16, ngraph::Shape{}, {3.0}); + auto add = std::make_shared(input, add_constant); + auto relu = std::make_shared(add); + auto min_constant = ngraph::opset6::Constant::create(ngraph::element::f16, ngraph::Shape{}, {6.0}); + auto min = std::make_shared(relu, min_constant); + auto mul_first = std::make_shared(input, min); + auto mul_constant = ngraph::opset6::Constant::create(ngraph::element::f16, ngraph::Shape{}, {0.167}); + auto mul_second = std::make_shared(mul_first, mul_constant); f = std::make_shared(ngraph::NodeVector{mul_second}, ngraph::ParameterVector{input}); @@ -205,15 +236,15 @@ TEST(TransformationTests, HSwishFusionWithReluMulWrongConstValue) { } { - auto input = std::make_shared(ngraph::element::f16, ngraph::PartialShape::dynamic(1)); - auto add_constant = ngraph::opset4::Constant::create(ngraph::element::f16, ngraph::Shape{}, {3.0}); - auto add = std::make_shared(input, add_constant); - auto relu = std::make_shared(add); - auto min_constant = ngraph::opset4::Constant::create(ngraph::element::f16, ngraph::Shape{}, {6.0}); - auto min = std::make_shared(relu, min_constant); - auto mul_first = std::make_shared(input, min); - auto mul_constant = ngraph::opset4::Constant::create(ngraph::element::f16, ngraph::Shape{}, {0.167}); - auto mul_second = std::make_shared(mul_first, mul_constant); + auto input = std::make_shared(ngraph::element::f16, ngraph::PartialShape::dynamic(1)); + auto add_constant = ngraph::opset6::Constant::create(ngraph::element::f16, ngraph::Shape{}, {3.0}); + auto add = std::make_shared(input, add_constant); + auto relu = std::make_shared(add); + auto min_constant = ngraph::opset6::Constant::create(ngraph::element::f16, ngraph::Shape{}, {6.0}); + auto min = std::make_shared(relu, min_constant); + auto mul_first = std::make_shared(input, min); + auto mul_constant = ngraph::opset6::Constant::create(ngraph::element::f16, ngraph::Shape{}, {0.167}); + auto mul_second = std::make_shared(mul_first, mul_constant); f_ref = std::make_shared(ngraph::NodeVector{mul_second}, ngraph::ParameterVector{input}); } @@ -225,15 +256,15 @@ TEST(TransformationTests, HSwishFusionWithReluMulWrongConstValue) { TEST(TransformationTests, HSwishFusionWithReluDivWrongConstValue) { std::shared_ptr f(nullptr), f_ref(nullptr); { - auto input = std::make_shared(ngraph::element::f16, ngraph::Shape{}); - auto add_constant = ngraph::opset4::Constant::create(ngraph::element::f16, ngraph::Shape{}, {3.01}); - auto add = std::make_shared(input, add_constant); - auto relu = std::make_shared(add); - auto min_constant = ngraph::opset4::Constant::create(ngraph::element::f16, ngraph::Shape{}, {6.002}); - auto min = std::make_shared(relu, min_constant); - auto mul = std::make_shared(input, min); - auto div_constant = ngraph::opset4::Constant::create(ngraph::element::f16, ngraph::Shape{}, {0.0}); - auto div = std::make_shared(mul, div_constant); + auto input = std::make_shared(ngraph::element::f16, ngraph::Shape{}); + auto add_constant = ngraph::opset6::Constant::create(ngraph::element::f16, ngraph::Shape{}, {3.01}); + auto add = std::make_shared(input, add_constant); + auto relu = std::make_shared(add); + auto min_constant = ngraph::opset6::Constant::create(ngraph::element::f16, ngraph::Shape{}, {6.002}); + auto min = std::make_shared(relu, min_constant); + auto mul = std::make_shared(input, min); + auto div_constant = ngraph::opset6::Constant::create(ngraph::element::f16, ngraph::Shape{}, {0.0}); + auto div = std::make_shared(mul, div_constant); f = std::make_shared(ngraph::NodeVector{div}, ngraph::ParameterVector{input}); @@ -245,15 +276,15 @@ TEST(TransformationTests, HSwishFusionWithReluDivWrongConstValue) { } { - auto input = std::make_shared(ngraph::element::f16, ngraph::Shape{}); - auto add_constant = ngraph::opset4::Constant::create(ngraph::element::f16, ngraph::Shape{}, {3.01}); - auto add = std::make_shared(input, add_constant); - auto relu = std::make_shared(add); - auto min_constant = ngraph::opset4::Constant::create(ngraph::element::f16, ngraph::Shape{}, {6.002}); - auto min = std::make_shared(relu, min_constant); - auto mul = std::make_shared(input, min); - auto div_constant = ngraph::opset4::Constant::create(ngraph::element::f16, ngraph::Shape{}, {0.0}); - auto div = std::make_shared(mul, div_constant); + auto input = std::make_shared(ngraph::element::f16, ngraph::Shape{}); + auto add_constant = ngraph::opset6::Constant::create(ngraph::element::f16, ngraph::Shape{}, {3.01}); + auto add = std::make_shared(input, add_constant); + auto relu = std::make_shared(add); + auto min_constant = ngraph::opset6::Constant::create(ngraph::element::f16, ngraph::Shape{}, {6.002}); + auto min = std::make_shared(relu, min_constant); + auto mul = std::make_shared(input, min); + auto div_constant = ngraph::opset6::Constant::create(ngraph::element::f16, ngraph::Shape{}, {0.0}); + auto div = std::make_shared(mul, div_constant); f_ref = std::make_shared(ngraph::NodeVector{div}, ngraph::ParameterVector{input}); } @@ -265,16 +296,16 @@ TEST(TransformationTests, HSwishFusionWithReluDivWrongConstValue) { TEST(TransformationTests, HSwishFusionWithoutReluWrongConstValue) { std::shared_ptr f(nullptr), f_ref(nullptr); { - auto input = std::make_shared(ngraph::element::f16, ngraph::PartialShape::dynamic(1)); - auto add_constant = ngraph::opset4::Constant::create(ngraph::element::f16, ngraph::Shape{}, {3.11}); - auto add = std::make_shared(input, add_constant); - auto max_constant = ngraph::opset4::Constant::create(ngraph::element::f16, ngraph::Shape{}, {0.22}); - auto max = std::make_shared(add, max_constant); - auto min_constant = ngraph::opset4::Constant::create(ngraph::element::f16, ngraph::Shape{}, {6.01}); - auto min = std::make_shared(max, min_constant); - auto div_constant = ngraph::opset4::Constant::create(ngraph::element::f16, ngraph::Shape{}, {6.002}); - auto div = std::make_shared(min, div_constant); - auto mul = std::make_shared(input, div); + auto input = std::make_shared(ngraph::element::f16, ngraph::PartialShape::dynamic(1)); + auto add_constant = ngraph::opset6::Constant::create(ngraph::element::f16, ngraph::Shape{}, {3.11}); + auto add = std::make_shared(input, add_constant); + auto max_constant = ngraph::opset6::Constant::create(ngraph::element::f16, ngraph::Shape{}, {0.22}); + auto max = std::make_shared(add, max_constant); + auto min_constant = ngraph::opset6::Constant::create(ngraph::element::f16, ngraph::Shape{}, {6.01}); + auto min = std::make_shared(max, min_constant); + auto div_constant = ngraph::opset6::Constant::create(ngraph::element::f16, ngraph::Shape{}, {6.002}); + auto div = std::make_shared(min, div_constant); + auto mul = std::make_shared(input, div); f = std::make_shared(ngraph::NodeVector{mul}, ngraph::ParameterVector{input}); @@ -286,16 +317,16 @@ TEST(TransformationTests, HSwishFusionWithoutReluWrongConstValue) { } { - auto input = std::make_shared(ngraph::element::f16, ngraph::PartialShape::dynamic(1)); - auto add_constant = ngraph::opset4::Constant::create(ngraph::element::f16, ngraph::Shape{}, {3.11}); - auto add = std::make_shared(input, add_constant); - auto max_constant = ngraph::opset4::Constant::create(ngraph::element::f16, ngraph::Shape{}, {0.22}); - auto max = std::make_shared(add, max_constant); - auto min_constant = ngraph::opset4::Constant::create(ngraph::element::f16, ngraph::Shape{}, {6.01}); - auto min = std::make_shared(max, min_constant); - auto div_constant = ngraph::opset4::Constant::create(ngraph::element::f16, ngraph::Shape{}, {6.002}); - auto div = std::make_shared(min, div_constant); - auto mul = std::make_shared(input, div); + auto input = std::make_shared(ngraph::element::f16, ngraph::PartialShape::dynamic(1)); + auto add_constant = ngraph::opset6::Constant::create(ngraph::element::f16, ngraph::Shape{}, {3.11}); + auto add = std::make_shared(input, add_constant); + auto max_constant = ngraph::opset6::Constant::create(ngraph::element::f16, ngraph::Shape{}, {0.22}); + auto max = std::make_shared(add, max_constant); + auto min_constant = ngraph::opset6::Constant::create(ngraph::element::f16, ngraph::Shape{}, {6.01}); + auto min = std::make_shared(max, min_constant); + auto div_constant = ngraph::opset6::Constant::create(ngraph::element::f16, ngraph::Shape{}, {6.002}); + auto div = std::make_shared(min, div_constant); + auto mul = std::make_shared(input, div); f_ref = std::make_shared(ngraph::NodeVector{mul}, ngraph::ParameterVector{input}); } @@ -307,13 +338,13 @@ TEST(TransformationTests, HSwishFusionWithoutReluWrongConstValue) { TEST(TransformationTests, HSwishFusionWithClampWrongConstValue) { std::shared_ptr f(nullptr), f_ref(nullptr); { - auto input = std::make_shared(ngraph::element::f16, ngraph::PartialShape::dynamic(1)); - auto add_constant = ngraph::opset4::Constant::create(ngraph::element::f16, ngraph::Shape{}, {3.11}); - auto add = std::make_shared(input, add_constant); - auto clamp = std::make_shared(add, 0.11f, 6.02f); - auto mul_constant = ngraph::opset4::Constant::create(ngraph::element::f16, ngraph::Shape{}, {0.98 / 6.15}); - auto mul_first = std::make_shared(clamp, mul_constant); - auto mul_second = std::make_shared(input, mul_first); + auto input = std::make_shared(ngraph::element::f16, ngraph::PartialShape::dynamic(1)); + auto add_constant = ngraph::opset6::Constant::create(ngraph::element::f16, ngraph::Shape{}, {3.11}); + auto add = std::make_shared(input, add_constant); + auto clamp = std::make_shared(add, 0.11f, 6.02f); + auto mul_constant = ngraph::opset6::Constant::create(ngraph::element::f16, ngraph::Shape{}, {0.98 / 6.15}); + auto mul_first = std::make_shared(clamp, mul_constant); + auto mul_second = std::make_shared(input, mul_first); f = std::make_shared(ngraph::NodeVector{mul_second}, ngraph::ParameterVector{input}); @@ -325,13 +356,13 @@ TEST(TransformationTests, HSwishFusionWithClampWrongConstValue) { } { - auto input = std::make_shared(ngraph::element::f16, ngraph::PartialShape::dynamic(1)); - auto add_constant = ngraph::opset4::Constant::create(ngraph::element::f16, ngraph::Shape{}, {3.11}); - auto add = std::make_shared(input, add_constant); - auto clamp = std::make_shared(add, 0.11f, 6.02f); - auto mul_constant = ngraph::opset4::Constant::create(ngraph::element::f16, ngraph::Shape{}, {0.98 / 6.15}); - auto mul_first = std::make_shared(clamp, mul_constant); - auto mul_second = std::make_shared(input, mul_first); + auto input = std::make_shared(ngraph::element::f16, ngraph::PartialShape::dynamic(1)); + auto add_constant = ngraph::opset6::Constant::create(ngraph::element::f16, ngraph::Shape{}, {3.11}); + auto add = std::make_shared(input, add_constant); + auto clamp = std::make_shared(add, 0.11f, 6.02f); + auto mul_constant = ngraph::opset6::Constant::create(ngraph::element::f16, ngraph::Shape{}, {0.98 / 6.15}); + auto mul_first = std::make_shared(clamp, mul_constant); + auto mul_second = std::make_shared(input, mul_first); f_ref = std::make_shared(ngraph::NodeVector{mul_second}, ngraph::ParameterVector{input}); } @@ -339,3 +370,30 @@ TEST(TransformationTests, HSwishFusionWithClampWrongConstValue) { auto res = compare_functions(f, f_ref); ASSERT_TRUE(res.first) << res.second; } + +TEST(TransformationTests, HSwishFusionWithHSigmoidMul) { + std::shared_ptr f(nullptr), f_ref(nullptr); + { + auto input = std::make_shared(ngraph::element::f16, ngraph::PartialShape::dynamic(1)); + auto hsigmoid = std::make_shared(input); + auto mul = std::make_shared(input, hsigmoid); + + f = std::make_shared(ngraph::NodeVector{mul}, ngraph::ParameterVector{input}); + + ngraph::pass::Manager manager; + manager.register_pass(); + manager.register_pass(); + manager.run_passes(f); + ASSERT_NO_THROW(check_rt_info(f)); + } + + { + auto input = std::make_shared(ngraph::element::f16, ngraph::PartialShape::dynamic(1)); + auto hswish = std::make_shared(input); + + f_ref = std::make_shared(ngraph::NodeVector{hswish}, ngraph::ParameterVector{input}); + } + + auto res = compare_functions(f, f_ref); + ASSERT_TRUE(res.first) << res.second; +} diff --git a/inference-engine/tests/functional/inference_engine/transformations/mvn6_decomposition_test.cpp b/inference-engine/tests/functional/inference_engine/transformations/mvn6_decomposition_test.cpp index 8fa9f92c9a5478..a98f14d5ed2fae 100644 --- a/inference-engine/tests/functional/inference_engine/transformations/mvn6_decomposition_test.cpp +++ b/inference-engine/tests/functional/inference_engine/transformations/mvn6_decomposition_test.cpp @@ -70,11 +70,11 @@ TEST(TransformationTests, MVN6Decomposition_Inside_Sqrt) { auto mean_normalization = std::make_shared(input0, mean); auto mul = std::make_shared(mean_normalization, mean_normalization); - auto sum = std::make_shared(mul, axes_const, true); + auto mean2 = std::make_shared(mul, axes_const, true); auto eps_node = ngraph::opset6::Constant::create(ngraph::element::f32, ngraph::Shape{ 1 }, { 1e-5 }); - auto eps_add = std::make_shared(sum, eps_node); + auto eps_add = std::make_shared(mean2, eps_node); auto sqrt = std::make_shared(eps_add); auto div = std::make_shared(mean_normalization, sqrt); @@ -108,13 +108,13 @@ TEST(TransformationTests, MVN6Decomposition_Outside_Sqrt) { auto mean_normalization = std::make_shared(input0, mean); auto mul = std::make_shared(mean_normalization, mean_normalization); - auto sum = std::make_shared(mul, axes_const, true); + auto mean2 = std::make_shared(mul, axes_const, true); auto eps_node = ngraph::opset6::Constant::create(ngraph::element::f32, ngraph::Shape{ 1 }, { 1e-5 }); - auto sqrt = std::make_shared(sum); + auto sqrt = std::make_shared(mean2); auto eps_add = std::make_shared(sqrt, eps_node); - auto div = std::make_shared(mean_normalization, sqrt); + auto div = std::make_shared(mean_normalization, eps_add); f_ref = std::make_shared(ngraph::NodeVector{ div }, ngraph::ParameterVector{ input0 }); } diff --git a/inference-engine/tests/functional/plugin/cpu/CMakeLists.txt b/inference-engine/tests/functional/plugin/cpu/CMakeLists.txt index a81a784c0ff8c1..32d24a689414b5 100644 --- a/inference-engine/tests/functional/plugin/cpu/CMakeLists.txt +++ b/inference-engine/tests/functional/plugin/cpu/CMakeLists.txt @@ -1,4 +1,4 @@ -# Copyright (C) 2019-2020 Intel Corporation +# Copyright (C) 2019-2021 Intel Corporation # # SPDX-License-Identifier: Apache-2.0 # @@ -9,16 +9,24 @@ add_library(cpuSpecificRtInfo STATIC ${IE_MAIN_SOURCE_DIR}/src/mkldnn_plugin/uti ${IE_MAIN_SOURCE_DIR}/src/mkldnn_plugin/utils/rt_info/memory_formats_attribute.cpp) target_link_libraries(cpuSpecificRtInfo PRIVATE ${NGRAPH_LIBRARIES}) +set(INCLUDES ${CMAKE_CURRENT_SOURCE_DIR} ${IE_MAIN_SOURCE_DIR}/src/mkldnn_plugin) +set(DEPENDENCIES MKLDNNPlugin) +set(LINK_LIBRARIES funcSharedTests cpuSpecificRtInfo) +if (NGRAPH_ONNX_IMPORT_ENABLE AND NOT NGRAPH_USE_PROTOBUF_LITE) + list(APPEND INCLUDES "${OpenVINO_MAIN_SOURCE_DIR}/docs/onnx_custom_op") + list(APPEND LINK_LIBRARIES onnx_custom_op) + list(APPEND DEPENDENCIES template_extension onnx_custom_op) +else() + set(EXCLUDED_SOURCE_PATHS "${CMAKE_CURRENT_SOURCE_DIR}/extension") +endif() + addIeTargetTest( NAME ${TARGET_NAME} ROOT ${CMAKE_CURRENT_SOURCE_DIR} - INCLUDES ${CMAKE_CURRENT_SOURCE_DIR} - ${IE_MAIN_SOURCE_DIR}/src/mkldnn_plugin - DEPENDENCIES - MKLDNNPlugin - LINK_LIBRARIES - funcSharedTests - cpuSpecificRtInfo + INCLUDES ${INCLUDES} + EXCLUDED_SOURCE_PATHS ${EXCLUDED_SOURCE_PATHS} + DEPENDENCIES ${DEPENDENCIES} + LINK_LIBRARIES ${LINK_LIBRARIES} ADD_CPPLINT LABELS CPU diff --git a/inference-engine/tests/functional/inference_engine/extension.cpp b/inference-engine/tests/functional/plugin/cpu/extension/extension.cpp similarity index 98% rename from inference-engine/tests/functional/inference_engine/extension.cpp rename to inference-engine/tests/functional/plugin/cpu/extension/extension.cpp index 74ce5d7e78271d..ac48ca9d7f7b99 100644 --- a/inference-engine/tests/functional/inference_engine/extension.cpp +++ b/inference-engine/tests/functional/plugin/cpu/extension/extension.cpp @@ -1,4 +1,4 @@ -// Copyright (C) 2020 Intel Corporation +// Copyright (C) 2020-2021 Intel Corporation // SPDX-License-Identifier: Apache-2.0 // @@ -129,7 +129,8 @@ class CustomAbsExtension : public InferenceEngine::IExtension { } }; -void infer_model(InferenceEngine::Core& ie, const std::string& model, const std::vector& input_values, const std::vector& expected) { +void infer_model(InferenceEngine::Core& ie, const std::string& model, + const std::vector& input_values, const std::vector& expected) { InferenceEngine::Blob::CPtr weights; auto network = ie.ReadNetwork(model, weights); auto function = network.getFunction(); @@ -269,8 +270,7 @@ TEST(Extension, XmlModelWithCustomAbs) { static std::string get_extension_path() { - return FileUtils::makePluginLibraryName({}, - std::string("template_extension") + IE_BUILD_POSTFIX); + return FileUtils::makePluginLibraryName({}, std::string("template_extension") + IE_BUILD_POSTFIX); } diff --git a/inference-engine/tests/functional/plugin/cpu/shared_tests_instances/configuration_tests/dynamic_batch.cpp b/inference-engine/tests/functional/plugin/cpu/shared_tests_instances/configuration_tests/dynamic_batch.cpp new file mode 100644 index 00000000000000..34eed9884d0f3d --- /dev/null +++ b/inference-engine/tests/functional/plugin/cpu/shared_tests_instances/configuration_tests/dynamic_batch.cpp @@ -0,0 +1,37 @@ +// Copyright (C) 2020 Intel Corporation +// SPDX-License-Identifier: Apache-2.0 +#include +#include "common_test_utils/test_constants.hpp" + +namespace ConfigurationTestsDefinitions { +namespace { +std::vector batch_sizes = { + 1, + 5, + 9, + 16 +}; + +std::map additional_config = { +}; +} // namespace + + +INSTANTIATE_TEST_CASE_P(smoke_DynamicBatchTest_async, DynamicBatchTest, + ::testing::Combine( + ::testing::Values(CommonTestUtils::DEVICE_CPU), + ::testing::Values(InferenceEngine::Precision::FP32), + ::testing::Values(batch_sizes), + ::testing::Values(true), + ::testing::Values(additional_config)), + DynamicBatchTest::getTestCaseName); + +INSTANTIATE_TEST_CASE_P(smoke_DynamicBatchTest_sync, DynamicBatchTest, + ::testing::Combine( + ::testing::Values(CommonTestUtils::DEVICE_CPU), + ::testing::Values(InferenceEngine::Precision::FP32), + ::testing::Values(batch_sizes), + ::testing::Values(false), + ::testing::Values(additional_config)), + DynamicBatchTest::getTestCaseName); +} // namespace ConfigurationTestsDefinitions diff --git a/inference-engine/tests/functional/plugin/cpu/shared_tests_instances/execution_graph_tests/remove_parameter.cpp b/inference-engine/tests/functional/plugin/cpu/shared_tests_instances/execution_graph_tests/remove_parameter.cpp new file mode 100644 index 00000000000000..c11d876ee38e01 --- /dev/null +++ b/inference-engine/tests/functional/plugin/cpu/shared_tests_instances/execution_graph_tests/remove_parameter.cpp @@ -0,0 +1,16 @@ +// Copyright (C) 2021 Intel Corporation +// SPDX-License-Identifier: Apache-2.0 +// + +#include "execution_graph_tests/remove_parameter.hpp" +#include "common_test_utils/test_constants.hpp" + +using namespace ExecutionGraphTests; + +namespace { + +INSTANTIATE_TEST_CASE_P(smoke_removeParameter, ExecGraphRemoveParameterNode, + ::testing::Values(CommonTestUtils::DEVICE_CPU), + ExecGraphRemoveParameterNode::getTestCaseName); + +} // namespace diff --git a/inference-engine/tests/functional/plugin/cpu/shared_tests_instances/low_precision_transformations/convolution_qdq_transformation.cpp b/inference-engine/tests/functional/plugin/cpu/shared_tests_instances/low_precision_transformations/convolution_qdq_transformation.cpp new file mode 100644 index 00000000000000..7d1d040000564a --- /dev/null +++ b/inference-engine/tests/functional/plugin/cpu/shared_tests_instances/low_precision_transformations/convolution_qdq_transformation.cpp @@ -0,0 +1,249 @@ +// Copyright (C) 2021 Intel Corporation +// SPDX-License-Identifier: Apache-2.0 +// + +#include + +#include "low_precision_transformations/convolution_qdq_transformation.hpp" +#include "low_precision_transformations/convolution_with_incorrect_weights.hpp" +#include "common_test_utils/test_constants.hpp" + +using namespace LayerTestsDefinitions; + +namespace { +const std::vector netPrecisions = { + ngraph::element::f32, + // ngraph::element::f16 +}; + +const std::vector trasformationParamValues = { + LayerTestsUtils::LayerTransformationParamsNGraphFactory::createParams().setUpdatePrecisions(true), + LayerTestsUtils::LayerTransformationParamsNGraphFactory::createParams().setUpdatePrecisions(false), +}; + +const std::vector params = { + // Actual: + // + // Constant + // | Constant Constant Constant Constant + // | /FP32 /FP32 /FP32 /FP32 + // FakeQuantize FakeQuantize + // |FP32 |FP32 + // | | + // Convert Constant Convert + // |U8 |U8 |I8 + // | | | + // Convert Convert Convert Constant + // \FP32 /FP32 |FP32 /I8 + // \ / | / + // Subtract Constant Subtract Constant + // \FP32 /FP32 |FP32 /FP32 + // \ / | / + // Multiply Multiply + // \FP32 /FP32 + // \ / + // Convolution + // + // Transformed: + // + // Parameter Constant Constant + // \U8 /U8 /I8 + // \ / / + // Subtract Subtract + // \FP32 /FP32 + // \ / + // Convolution Constant + // \FP32 /FP32 + // \ / + // Multiply + { + { 256ul, {{ 1, 1, 1, 1 }}, { -12.8f }, { 12.7f }, { 0.f }, { 255.f }, ngraph::element::f32 }, + { ngraph::element::u8, false }, + { + {ngraph::element::f32}, + { {128.f}, ngraph::element::f32, {}, false, 1ul, ngraph::element::u8, true }, + { {0.1f}, ngraph::element::f32, {}, false } + }, + { std::vector{ 15.f }, ngraph::element::f32}, + { 255ul, ngraph::Shape({ 1, 1, 1, 1 }), { 0.f }, { 25.5f }, { -128.f }, { 127.f }, ngraph::element::f32 }, + { ngraph::element::i8, false }, + { + { ngraph::element::f32, false }, + { {-128.f}, ngraph::element::f32, {}, false, 1ul, ngraph::element::i8, true }, + { {0.2f}, ngraph::element::f32, {}, false } + }, + "output_original", + "FP32" + }, + + // Actual: + // + // Constant + // | Constant Constant Constant Constant + // | /FP32 /FP32 /FP32 /FP32 + // FakeQuantize FakeQuantize + // |FP32 |FP32 + // | | + // Convert Constant Convert + // |U8 |U8 |I8 + // | | | + // Convert Convert Convert + // \FP32 /FP32 |FP32 + // \ / | + // Subtract Constant | Constant + // \FP32 /FP32 | /FP32 + // \ / | / + // Multiply Multiply + // \FP32 /FP32 + // \ / + // Convolution + // + // Transformed: + // + // Parameter Constant + // \U8 /U8 + // \ / + // Subtract Constant + // \FP32 /I8 + // \ / + // Convolution Constant + // \FP32 /FP32 + // \ / + // Multiply + { + { 256ul, {{ 1, 1, 1, 1 }}, { -12.8f }, { 12.7f }, { 0.f }, { 255.f }, ngraph::element::f32 }, + { ngraph::element::u8, false }, + { + {ngraph::element::f32}, + {}, + { {0.1f}, ngraph::element::f32, {}, false } + }, + { std::vector{ 15.f }, ngraph::element::f32}, + { 255ul, ngraph::Shape({ 1, 1, 1, 1 }), { 0.f }, { 25.5f }, { -128.f }, { 127.f }, ngraph::element::f32 }, + { ngraph::element::i8, false }, + { + { ngraph::element::f32, false }, + {}, + { {0.2f}, ngraph::element::f32, {}, false } + }, + "output_original", + "U8" + }, + + // Actual: + // + // FQ + // |FP32 + // | + // Convert Convert Constant Constant + // |U8 |U8 |U8 |U8 + // | | | | + // Convert Convert Convert Convert + // \FP32 /FP32 \FP32 /FP32 + // \ / \ / + // Subtract Constant Subtract Constant + // \FP32 /FP32 \FP32 /FP32 + // \ / \ / + // Multiply Multiply + // \FP32 /FP32 + // \ / + // Convolution + // + // Transformed: + // + // FQ Constant Constant + // \U8 /U8 / I8 + // \ / / + // Subtract Subtract + // \FP32 /FP32 + // \ / + // Convolution Constant + // \FP32 /FP32 + // \ / + // Multiply + { + { 256ul, {{ 1, 1, 1, 1 }}, { -12.8f }, { 12.7f }, { 0.f }, { 255.f }, ngraph::element::f32 }, + { ngraph::element::u8, false }, + { + { ngraph::element::f32, false }, + { {128.f}, ngraph::element::f32, {}, false, 1ul, ngraph::element::u8, true }, + { {0.1f}, ngraph::element::f32, {}, false } + }, + {{0.5f}, ngraph::element::i8}, + {}, + {}, + { + { ngraph::element::f32, false }, + { {128.f}, ngraph::element::f32, {}, false, 1ul, ngraph::element::i8, true }, + { {0.2f}, ngraph::element::f32, {}, false } + }, + "output_original", + "FP32" + }, + + // Actual: + // + // FQ + // |FP32 + // | + // Convert Convert + // |U8 |U8 + // | | + // Convert Convert Constant + // \FP32 /FP32 \U8 + // \ / \ + // Subtract Constant Convert Constant + // \FP32 /FP32 \FP32 /FP32 + // \ / \ / + // Multiply Multiply + // \FP32 /FP32 + // \ / + // Convolution + // + // Transformed: + // + // FQ Constant Constant + // \U8 /U8 / I8 + // \ / / + // Subtract Subtract + // \FP32 /FP32 + // \ / + // Convolution Constant + // \FP32 /FP32 + // \ / + // Multiply + { + { 256ul, {{ 1, 1, 1, 1 }}, { -12.8f }, { 12.7f }, { 0.f }, { 255.f }, ngraph::element::f32 }, + { ngraph::element::u8, false }, + { + { ngraph::element::f32, false }, + { {128.f}, ngraph::element::f32, {}, false, 1ul, ngraph::element::u8, true }, + { {0.1f}, ngraph::element::f32, {}, false } + }, + {{0.5f}, ngraph::element::i8}, + {}, + {}, + { + { ngraph::element::f32, false }, + {}, + { {0.2f}, ngraph::element::f32, {}, false } + }, + "output_original", + "U8" + }, +}; + +const std::vector shapes = { + { 1, 3, 4, 4 }, + { 4, 3, 4, 4 } +}; + +INSTANTIATE_TEST_CASE_P(smoke_LPT, ConvolutionQDqTransformation, + ::testing::Combine( + ::testing::ValuesIn(netPrecisions), + ::testing::ValuesIn(shapes), + ::testing::Values(CommonTestUtils::DEVICE_CPU), + ::testing::ValuesIn(trasformationParamValues), + ::testing::ValuesIn(params)), + ConvolutionQDqTransformation::getTestCaseName); +} // namespace diff --git a/inference-engine/tests/functional/plugin/cpu/shared_tests_instances/low_precision_transformations/convolution_transformation.cpp b/inference-engine/tests/functional/plugin/cpu/shared_tests_instances/low_precision_transformations/convolution_transformation.cpp index e05f97ef3445d4..9ad3f34028a818 100644 --- a/inference-engine/tests/functional/plugin/cpu/shared_tests_instances/low_precision_transformations/convolution_transformation.cpp +++ b/inference-engine/tests/functional/plugin/cpu/shared_tests_instances/low_precision_transformations/convolution_transformation.cpp @@ -46,6 +46,30 @@ const std::vector params "output_original", "U8" }, + { + { 16ul, ngraph::Shape { 1, 1, 1, 1 }, { 0.f }, { 255.f }, { 0.f }, { 25.5f } }, + false, + { 16ul, ngraph::Shape { 1, 1, 1, 1 }, { 0.f }, { 254.f }, { -12.7f }, { 12.7f } }, + false, + "output", + "FP32" + }, + { + { 16ul, ngraph::Shape { 1, 1, 1, 1 }, { 0.f }, { 25.5f }, { 0.f }, { 25.5f } }, + false, + { 255ul, ngraph::Shape { 1, 1, 1, 1 }, { -12.7f }, { 12.7f }, { -12.7f }, { 12.7f } }, + false, + "output", + "FP32" + }, + { + { 256ul, ngraph::Shape { 1, 1, 1, 1 }, { 0.f }, { 255.f }, { 0.f }, { 25.5f } }, + false, + { 16ul, ngraph::Shape { 1, 1, 1, 1 }, { 0.f }, { 254.f }, { -12.7f }, { 12.7f } }, + false, + "output", + "FP32" + }, { { 256ul, ngraph::Shape { 1, 1, 1, 1 }, { 0.f }, { 255.f }, { -12.7f }, { 12.8f } }, true, diff --git a/inference-engine/tests/functional/plugin/cpu/shared_tests_instances/low_precision_transformations/fake_quantize_and_two_output_branches_with_convolution.cpp b/inference-engine/tests/functional/plugin/cpu/shared_tests_instances/low_precision_transformations/fake_quantize_and_two_output_branches_with_convolution.cpp index 31f5105ced9699..0b63eabaefd737 100644 --- a/inference-engine/tests/functional/plugin/cpu/shared_tests_instances/low_precision_transformations/fake_quantize_and_two_output_branches_with_convolution.cpp +++ b/inference-engine/tests/functional/plugin/cpu/shared_tests_instances/low_precision_transformations/fake_quantize_and_two_output_branches_with_convolution.cpp @@ -28,7 +28,6 @@ const std::vector testValues = } }; -// TODO: add something to avoid cleanup and enable INSTANTIATE_TEST_CASE_P(smoke_LPT, FakeQuantizeAndTwoOutputBranchesWithConvolutionTransformation, ::testing::Combine( ::testing::ValuesIn(netPrecisions), diff --git a/inference-engine/tests/functional/plugin/cpu/shared_tests_instances/low_precision_transformations/fake_quantize_transformation.cpp b/inference-engine/tests/functional/plugin/cpu/shared_tests_instances/low_precision_transformations/fake_quantize_transformation.cpp index 55691f55d088d2..0ea710a3e98e51 100644 --- a/inference-engine/tests/functional/plugin/cpu/shared_tests_instances/low_precision_transformations/fake_quantize_transformation.cpp +++ b/inference-engine/tests/functional/plugin/cpu/shared_tests_instances/low_precision_transformations/fake_quantize_transformation.cpp @@ -34,7 +34,6 @@ const std::vector fakeQuantizeOnD // { 256ul, { 1ul }, { -1.28f} , { 1.27f } } }; -// TODO: add something to avoid cleanup and enable INSTANTIATE_TEST_CASE_P(smoke_LPT, FakeQuantizeTransformation, ::testing::Combine( ::testing::ValuesIn(netPrecisions), diff --git a/inference-engine/tests/functional/plugin/cpu/shared_tests_instances/low_precision_transformations/fake_quantize_with_dq_not_optimal_transformation.cpp b/inference-engine/tests/functional/plugin/cpu/shared_tests_instances/low_precision_transformations/fake_quantize_with_dq_not_optimal_transformation.cpp new file mode 100644 index 00000000000000..34fab4b80d0c62 --- /dev/null +++ b/inference-engine/tests/functional/plugin/cpu/shared_tests_instances/low_precision_transformations/fake_quantize_with_dq_not_optimal_transformation.cpp @@ -0,0 +1,111 @@ +// Copyright (C) 2021 Intel Corporation +// SPDX-License-Identifier: Apache-2.0 +// + +#include + +#include "low_precision_transformations/fake_quantize_with_dq_not_optimal_transformation.hpp" +#include "common_test_utils/test_constants.hpp" +#include "lpt_ngraph_functions/fake_quantize_function.hpp" + +using namespace LayerTestsDefinitions; +using namespace ngraph::pass::low_precision; + +namespace { +const std::vector netPrecisions = { + InferenceEngine::Precision::FP32 +}; + +const std::vector trasformationParamValues = { + LayerTestsUtils::LayerTransformationParamsFactory::createParamsU8I8AndI8().setUpdatePrecisions(true), + LayerTestsUtils::LayerTransformationParamsFactory::createParamsU8I8AndI8().setUpdatePrecisions(false) +}; + +const std::vector fakeQuantizeOnDataValues = { + { + { 256ul, {{ 1, 1, 1, 1 }}, { 0.f }, { 25.5f }, { -128.f }, { 127.f }, ngraph::element::f32 }, + { ngraph::element::i8, false }, + { + { ngraph::element::f32, false }, + { {-128.f}, ngraph::element::f32, {}, false, 1ul, ngraph::element::i8, true }, + { {0.1f}, ngraph::element::f32, {}, false } + }, + {{5.f}, ngraph::element::i8}, + {}, + {}, + { + { ngraph::element::f32, false }, + { {127.f}, ngraph::element::f32, {}, false, 1ul, ngraph::element::i8, true }, + { {0.3f}, ngraph::element::f32, {}, false } + }, + {}, + "FP32" + }, + { + { 256ul, {{ 1, 1, 1, 1 }}, { 0.f }, { 25.5f }, { -128.f }, { 127.f }, ngraph::element::f32 }, + { ngraph::element::i8, false }, + { + { ngraph::element::f32, false }, + {}, + { {0.1f}, ngraph::element::f32, {}, false } + }, + {{5.f}, ngraph::element::i8}, + {}, + {}, + { + { ngraph::element::f32, false }, + {}, + { {0.3f}, ngraph::element::f32, {}, false } + }, + {}, + "U8" + }, + { + { 256ul, {{ 1, 1, 1, 1 }}, { 0.f }, { 25.5f }, { -128.f }, { 127.f }, ngraph::element::f32 }, + { ngraph::element::i8, false }, + { + { ngraph::element::f32, false }, + { }, + { {0.1f}, ngraph::element::f32, {}, false } + }, + {{5.f}, ngraph::element::i8}, + {}, + {}, + { + { ngraph::element::f32, false }, + { {127.f}, ngraph::element::f32, {}, false, 1ul, ngraph::element::i8, true }, + { {0.3f}, ngraph::element::f32, {}, false } + }, + {}, + "FP32" + }, + { + { 256ul, {{ 1, 1, 1, 1 }}, { 0.f }, { 25.5f }, { -128.f }, { 127.f }, ngraph::element::f32 }, + { ngraph::element::i8, false }, + { + { ngraph::element::f32, false }, + { {-128.f}, ngraph::element::f32, {}, false, 1ul, ngraph::element::i8, true }, + { {0.1f}, ngraph::element::f32, {}, false } + }, + {{5.f}, ngraph::element::i8}, + {}, + {}, + { + { ngraph::element::f32, false }, + { }, + { {0.3f}, ngraph::element::f32, {}, false } + }, + {}, + "U8" + } +}; + +INSTANTIATE_TEST_CASE_P(smoke_LPT, FakeQuantizeWithNotOptimalTransformation, + ::testing::Combine( + ::testing::ValuesIn(netPrecisions), + ::testing::Values(InferenceEngine::SizeVector({ 1, 3, 16, 16 })), + ::testing::Values(CommonTestUtils::DEVICE_CPU), + ::testing::ValuesIn(trasformationParamValues), + ::testing::ValuesIn(fakeQuantizeOnDataValues)), + FakeQuantizeWithNotOptimalTransformation::getTestCaseName); +} // namespace diff --git a/inference-engine/tests/functional/plugin/cpu/shared_tests_instances/low_precision_transformations/mat_mul_transformation.cpp b/inference-engine/tests/functional/plugin/cpu/shared_tests_instances/low_precision_transformations/mat_mul_transformation.cpp index dfc3a09da3ac0d..f7ebcb8fe159e9 100644 --- a/inference-engine/tests/functional/plugin/cpu/shared_tests_instances/low_precision_transformations/mat_mul_transformation.cpp +++ b/inference-engine/tests/functional/plugin/cpu/shared_tests_instances/low_precision_transformations/mat_mul_transformation.cpp @@ -20,26 +20,32 @@ std::vector testValues = { { 1, 4, 12, 2 }, { 256ul, ngraph::Shape({}), {0.f}, {25.5f}, {0.f}, {25.5f} }, { 1, 4, 2, 12 }, - { 256ul, ngraph::Shape({}), {-12.8f}, {12.7f}, {-12.8f}, {12.7f} } + { 256ul, ngraph::Shape({}), {-12.8f}, {12.7f}, {-12.8f}, {12.7f} }, + "matMul/1", + "U8" }, { { 8, 4, 12, 2 }, { 256ul, ngraph::Shape({}), {0.f}, {25.5f}, {0.f}, {25.5f} }, { 8, 4, 2, 12 }, - { 256ul, ngraph::Shape({}), {-12.8f}, {12.7f}, {-12.8f}, {12.7f} } + { 256ul, ngraph::Shape({}), {-12.8f}, {12.7f}, {-12.8f}, {12.7f} }, + "matMul/1", + "U8" }, { { 1, 4, 12, 2 }, { 256ul, ngraph::Shape({}), {-12.8f}, {12.7f}, {-12.8f}, {12.7f} }, { 1, 4, 2, 12 }, - { 256ul, ngraph::Shape({}), {-12.8f}, {12.7f}, {-12.8f}, {12.7f} } + { 256ul, ngraph::Shape({}), {-12.8f}, {12.7f}, {-12.8f}, {12.7f} }, + "matMul/1", + "I8" } }; INSTANTIATE_TEST_CASE_P(smoke_LPT, MatMulTransformation, ::testing::Combine( ::testing::ValuesIn(precisions), - ::testing::Values(InferenceEngine::SizeVector({ 1, 384, 1024 })), + ::testing::Values(ngraph::Shape({ 1, 384, 1024 })), ::testing::Values(CommonTestUtils::DEVICE_CPU), ::testing::ValuesIn(testValues)), MatMulTransformation::getTestCaseName); diff --git a/inference-engine/tests/functional/plugin/cpu/shared_tests_instances/low_precision_transformations/mat_mul_with_constant_transformation.cpp b/inference-engine/tests/functional/plugin/cpu/shared_tests_instances/low_precision_transformations/mat_mul_with_constant_transformation.cpp index e19d3959899870..c8deae853f98e9 100644 --- a/inference-engine/tests/functional/plugin/cpu/shared_tests_instances/low_precision_transformations/mat_mul_with_constant_transformation.cpp +++ b/inference-engine/tests/functional/plugin/cpu/shared_tests_instances/low_precision_transformations/mat_mul_with_constant_transformation.cpp @@ -12,13 +12,55 @@ using namespace InferenceEngine::details; namespace { const std::vector precisions = { ngraph::element::f32 }; +//transpose_a = false, transpose_b = true std::vector testValues = { + // 3D with different values { - { 1, 32 }, - { 256ul, ngraph::Shape({}), {0.f}, {25.5f}, {0.f}, {25.5f} }, - { 32, 10 }, - std::vector(32 * 10, 1.f), - { 256ul, ngraph::Shape({}), {-12.8f}, {12.7f}, {-12.8f}, {12.7f} }, + { 2, 3, 4 }, + { 256ul, {{1, 1, 1}, {1, 1, 1}, {1, 3, 1}, {1, 3, 1}}, {0.f}, {255.f}, {0.f, 0.f, 0.f}, {255.f, 25.5f, 255.f} }, + { 2, 4 }, + std::vector(4 * 2, 2.f), + { 256ul, {{1}, {1}, {2, 1}, {2, 1}}, {-128.f}, {127.f}, {-128.f, -12.8f}, {127.f, 12.7f} }, + "matMul/FC", + "U8" + }, + // 3D with different values + { + { 1, 3, 4 }, + { 256ul, {{1, 1, 1}, {1, 1, 1}, {1, 1, 1}, {1, 1, 1}}, {-10.5f}, {4.5f}, {-10.5f}, {4.5f} }, + { 2, 4 }, + std::vector(4 * 2, 2.f), + { 256ul, {{1}, {1}, {2, 1}, {2, 1}}, {-128.f}, {127.f}, {-128.f, -12.8f}, {127.f, 12.7f} }, + "matMul/FC", + "U8" + }, + // 4D with different values + { + { 1, 1, 3, 4 }, + { 256ul, {{1, 1, 1}, {1, 1, 1}, {1, 3, 1}, {1, 3, 1}}, {0.f}, {255.f}, {0.f, 0.f, 0.f}, {255.f, 25.5f, 255.f} }, + { 2, 4 }, + std::vector(4 * 2, 2.f), + { 256ul, {{1}, {1}, {2, 1}, {2, 1}}, {-128.f}, {127.f}, {-128.f, -12.8f}, {127.f, 12.7f} }, + "matMul/FC", + "U8" + }, + // 3D with the same values + { + { 1, 3, 4 }, + { 256ul, {{1}, {1}, {1}, {1}}, {0.f}, {255.f}, {0.f}, {25.5f} }, + { 4, 4 }, + std::vector(4 * 4, 2.f), + { 256ul, {{1}, {1}, {1}, {1}}, {-128.f}, {127.f}, {-128.f}, {127.f} }, + "matMul/FC", + "U8" + }, + // 2D with subtract on activations + { + { 2, 3 }, + { 256ul, {{1}, {1}, {2, 1}, {2, 1}}, {-10.f}, {5.f}, {-10.f, -5.f}, {5.f, 5.f} }, + { 2, 3 }, + std::vector{1, 2, 3, 4, 5, 6}, + { 256ul, {{1}, {1}, {1}, {1}}, {-128.f}, {127.f}, {-12.8f}, {12.7f} }, "matMul/1", "U8" } diff --git a/inference-engine/tests/functional/plugin/cpu/shared_tests_instances/single_layer_tests/convolution.cpp b/inference-engine/tests/functional/plugin/cpu/shared_tests_instances/single_layer_tests/convolution.cpp index 3973890d55be12..e83e94f8860e16 100644 --- a/inference-engine/tests/functional/plugin/cpu/shared_tests_instances/single_layer_tests/convolution.cpp +++ b/inference-engine/tests/functional/plugin/cpu/shared_tests_instances/single_layer_tests/convolution.cpp @@ -1,132 +1,148 @@ -// Copyright (C) 2019 Intel Corporation +// Copyright (C) 2019-2021 Intel Corporation // SPDX-License-Identifier: Apache-2.0 // #include -#include "single_layer_tests/convolution.hpp" #include "common_test_utils/test_constants.hpp" +#include "single_layer_tests/convolution.hpp" using namespace LayerTestsDefinitions; namespace { const std::vector netPrecisions = { - InferenceEngine::Precision::FP32, - InferenceEngine::Precision::FP16 -}; + InferenceEngine::Precision::FP32, InferenceEngine::Precision::FP16}; + +/* ============= 1D Convolution ============= */ +const std::vector> kernels1D = {{3}, {5}}; +const std::vector> strides1D = {{1}, {3}}; +const std::vector> padBegins1D = {{0}, {3}}; +const std::vector> padEnds1D = {{0}, {3}}; +const std::vector> dilations1D = {{1}, {3}}; +const std::vector numOutChannels1D = {1, 5}; + +const auto conv1DParams_ExplicitPadding = ::testing::Combine( + ::testing::ValuesIn(kernels1D), ::testing::ValuesIn(strides1D), + ::testing::ValuesIn(padBegins1D), ::testing::ValuesIn(padEnds1D), + ::testing::ValuesIn(dilations1D), ::testing::ValuesIn(numOutChannels1D), + ::testing::Values(ngraph::op::PadType::EXPLICIT)); +const auto conv1DParams_AutoPadValid = ::testing::Combine( + ::testing::ValuesIn(kernels1D), ::testing::ValuesIn(strides1D), + ::testing::Values(std::vector({0})), + ::testing::Values(std::vector({0})), + ::testing::ValuesIn(dilations1D), ::testing::ValuesIn(numOutChannels1D), + ::testing::Values(ngraph::op::PadType::VALID)); + +INSTANTIATE_TEST_CASE_P( + smoke_Convolution1D_ExplicitPadding, ConvolutionLayerTest, + ::testing::Combine( + conv1DParams_ExplicitPadding, ::testing::ValuesIn(netPrecisions), + ::testing::Values(InferenceEngine::Precision::UNSPECIFIED), + ::testing::Values(InferenceEngine::Precision::UNSPECIFIED), + ::testing::Values(InferenceEngine::Layout::ANY), + ::testing::Values(InferenceEngine::Layout::ANY), + ::testing::Values(std::vector({1, 3, 30})), + ::testing::Values(CommonTestUtils::DEVICE_CPU)), + ConvolutionLayerTest::getTestCaseName); + +INSTANTIATE_TEST_CASE_P( + smoke_Convolution1D_AutoPadValid, ConvolutionLayerTest, + ::testing::Combine( + conv1DParams_AutoPadValid, ::testing::ValuesIn(netPrecisions), + ::testing::Values(InferenceEngine::Precision::UNSPECIFIED), + ::testing::Values(InferenceEngine::Precision::UNSPECIFIED), + ::testing::Values(InferenceEngine::Layout::ANY), + ::testing::Values(InferenceEngine::Layout::ANY), + ::testing::Values(std::vector({1, 3, 30})), + ::testing::Values(CommonTestUtils::DEVICE_CPU)), + ConvolutionLayerTest::getTestCaseName); /* ============= 2D Convolution ============= */ -const std::vector> kernels = {{3, 3}, - {3, 5}}; -const std::vector> strides = {{1, 1}, - {1, 3}}; -const std::vector> padBegins = {{0, 0}, - {0, 3}}; -const std::vector> padEnds = {{0, 0}, - {0, 3}}; -const std::vector> dilations = {{1, 1}, - {3, 1}}; +const std::vector> kernels = {{3, 3}, {3, 5}}; +const std::vector> strides = {{1, 1}, {1, 3}}; +const std::vector> padBegins = {{0, 0}, {0, 3}}; +const std::vector> padEnds = {{0, 0}, {0, 3}}; +const std::vector> dilations = {{1, 1}, {3, 1}}; const std::vector numOutChannels = {1, 5}; -const std::vector padTypes = { - ngraph::op::PadType::EXPLICIT, - ngraph::op::PadType::VALID -}; const auto conv2DParams_ExplicitPadding = ::testing::Combine( - ::testing::ValuesIn(kernels), - ::testing::ValuesIn(strides), - ::testing::ValuesIn(padBegins), - ::testing::ValuesIn(padEnds), - ::testing::ValuesIn(dilations), - ::testing::ValuesIn(numOutChannels), - ::testing::Values(ngraph::op::PadType::EXPLICIT) -); + ::testing::ValuesIn(kernels), ::testing::ValuesIn(strides), + ::testing::ValuesIn(padBegins), ::testing::ValuesIn(padEnds), + ::testing::ValuesIn(dilations), ::testing::ValuesIn(numOutChannels), + ::testing::Values(ngraph::op::PadType::EXPLICIT)); const auto conv2DParams_AutoPadValid = ::testing::Combine( - ::testing::ValuesIn(kernels), - ::testing::ValuesIn(strides), - ::testing::Values(std::vector({0, 0})), - ::testing::Values(std::vector({0, 0})), - ::testing::ValuesIn(dilations), - ::testing::ValuesIn(numOutChannels), - ::testing::Values(ngraph::op::PadType::VALID) -); - -INSTANTIATE_TEST_CASE_P(smoke_Convolution2D_ExplicitPadding, ConvolutionLayerTest, - ::testing::Combine( - conv2DParams_ExplicitPadding, - ::testing::ValuesIn(netPrecisions), - ::testing::Values(InferenceEngine::Precision::UNSPECIFIED), - ::testing::Values(InferenceEngine::Precision::UNSPECIFIED), - ::testing::Values(InferenceEngine::Layout::ANY), - ::testing::Values(InferenceEngine::Layout::ANY), - ::testing::Values(std::vector({1, 3, 30, 30})), - ::testing::Values(CommonTestUtils::DEVICE_CPU)), - ConvolutionLayerTest::getTestCaseName); - -INSTANTIATE_TEST_CASE_P(smoke_Convolution2D_AutoPadValid, ConvolutionLayerTest, - ::testing::Combine( - conv2DParams_AutoPadValid, - ::testing::ValuesIn(netPrecisions), - ::testing::Values(InferenceEngine::Precision::UNSPECIFIED), - ::testing::Values(InferenceEngine::Precision::UNSPECIFIED), - ::testing::Values(InferenceEngine::Layout::ANY), - ::testing::Values(InferenceEngine::Layout::ANY), - ::testing::Values(std::vector({1, 3, 30, 30})), - ::testing::Values(CommonTestUtils::DEVICE_CPU)), - ConvolutionLayerTest::getTestCaseName); -/* ============= 3D Convolution ============= */ -const std::vector> kernels3d = {{3, 3, 3}, - {3, 5, 3}}; -const std::vector> paddings3d = {{0, 0, 0}, - {0, 2, 0}}; + ::testing::ValuesIn(kernels), ::testing::ValuesIn(strides), + ::testing::Values(std::vector({0, 0})), + ::testing::Values(std::vector({0, 0})), + ::testing::ValuesIn(dilations), ::testing::ValuesIn(numOutChannels), + ::testing::Values(ngraph::op::PadType::VALID)); + +INSTANTIATE_TEST_CASE_P( + smoke_Convolution2D_ExplicitPadding, ConvolutionLayerTest, + ::testing::Combine( + conv2DParams_ExplicitPadding, ::testing::ValuesIn(netPrecisions), + ::testing::Values(InferenceEngine::Precision::UNSPECIFIED), + ::testing::Values(InferenceEngine::Precision::UNSPECIFIED), + ::testing::Values(InferenceEngine::Layout::ANY), + ::testing::Values(InferenceEngine::Layout::ANY), + ::testing::Values(std::vector({1, 3, 30, 30})), + ::testing::Values(CommonTestUtils::DEVICE_CPU)), + ConvolutionLayerTest::getTestCaseName); + +INSTANTIATE_TEST_CASE_P( + smoke_Convolution2D_AutoPadValid, ConvolutionLayerTest, + ::testing::Combine( + conv2DParams_AutoPadValid, ::testing::ValuesIn(netPrecisions), + ::testing::Values(InferenceEngine::Precision::UNSPECIFIED), + ::testing::Values(InferenceEngine::Precision::UNSPECIFIED), + ::testing::Values(InferenceEngine::Layout::ANY), + ::testing::Values(InferenceEngine::Layout::ANY), + ::testing::Values(std::vector({1, 3, 30, 30})), + ::testing::Values(CommonTestUtils::DEVICE_CPU)), + ConvolutionLayerTest::getTestCaseName); -const std::vector> strides3d = {{1, 1, 1}, - {1, 2, 1}}; -const std::vector> dilations3d = {{1, 1, 1}, - {1, 2, 1}}; +/* ============= 3D Convolution ============= */ +const std::vector> kernels3d = {{3, 3, 3}, {3, 5, 3}}; +const std::vector> paddings3d = {{0, 0, 0}, {0, 2, 0}}; +const std::vector> strides3d = {{1, 1, 1}, {1, 2, 1}}; +const std::vector> dilations3d = {{1, 1, 1}, {1, 2, 1}}; +const std::vector numOutChannels3D = {1, 5}; const auto conv3DParams_ExplicitPadding = ::testing::Combine( - ::testing::ValuesIn(kernels3d), - ::testing::ValuesIn(strides3d), - ::testing::ValuesIn(paddings3d), - ::testing::ValuesIn(paddings3d), - ::testing::ValuesIn(dilations3d), - ::testing::Values(5), - ::testing::Values(ngraph::op::PadType::EXPLICIT) -); + ::testing::ValuesIn(kernels3d), ::testing::ValuesIn(strides3d), + ::testing::ValuesIn(paddings3d), ::testing::ValuesIn(paddings3d), + ::testing::ValuesIn(dilations3d), ::testing::ValuesIn(numOutChannels3D), + ::testing::Values(ngraph::op::PadType::EXPLICIT)); const auto conv3DParams_AutoPadValid = ::testing::Combine( - ::testing::ValuesIn(kernels3d), - ::testing::ValuesIn(strides3d), - ::testing::Values(std::vector({0, 0, 0})), - ::testing::Values(std::vector({0, 0, 0})), - ::testing::ValuesIn(dilations3d), - ::testing::Values(5), - ::testing::Values(ngraph::op::PadType::VALID) -); - -INSTANTIATE_TEST_CASE_P(smoke_Convolution3D_ExplicitPadding, ConvolutionLayerTest, - ::testing::Combine( - conv3DParams_ExplicitPadding, - ::testing::ValuesIn(netPrecisions), - ::testing::Values(InferenceEngine::Precision::UNSPECIFIED), - ::testing::Values(InferenceEngine::Precision::UNSPECIFIED), - ::testing::Values(InferenceEngine::Layout::ANY), - ::testing::Values(InferenceEngine::Layout::ANY), - ::testing::Values(std::vector({1, 3, 10, 10, 10})), - ::testing::Values(CommonTestUtils::DEVICE_CPU)), - ConvolutionLayerTest::getTestCaseName); - -INSTANTIATE_TEST_CASE_P(smoke_Convolution3D_AutoPadValid, ConvolutionLayerTest, - ::testing::Combine( - conv3DParams_AutoPadValid, - ::testing::ValuesIn(netPrecisions), - ::testing::Values(InferenceEngine::Precision::UNSPECIFIED), - ::testing::Values(InferenceEngine::Precision::UNSPECIFIED), - ::testing::Values(InferenceEngine::Layout::ANY), - ::testing::Values(InferenceEngine::Layout::ANY), - ::testing::Values(std::vector({1, 3, 10, 10, 10})), - ::testing::Values(CommonTestUtils::DEVICE_CPU)), - ConvolutionLayerTest::getTestCaseName); + ::testing::ValuesIn(kernels3d), ::testing::ValuesIn(strides3d), + ::testing::Values(std::vector({0, 0, 0})), + ::testing::Values(std::vector({0, 0, 0})), + ::testing::ValuesIn(dilations3d), ::testing::ValuesIn(numOutChannels3D), + ::testing::Values(ngraph::op::PadType::VALID)); + +INSTANTIATE_TEST_CASE_P( + smoke_Convolution3D_ExplicitPadding, ConvolutionLayerTest, + ::testing::Combine( + conv3DParams_ExplicitPadding, ::testing::ValuesIn(netPrecisions), + ::testing::Values(InferenceEngine::Precision::UNSPECIFIED), + ::testing::Values(InferenceEngine::Precision::UNSPECIFIED), + ::testing::Values(InferenceEngine::Layout::ANY), + ::testing::Values(InferenceEngine::Layout::ANY), + ::testing::Values(std::vector({1, 3, 10, 10, 10})), + ::testing::Values(CommonTestUtils::DEVICE_CPU)), + ConvolutionLayerTest::getTestCaseName); + +INSTANTIATE_TEST_CASE_P( + smoke_Convolution3D_AutoPadValid, ConvolutionLayerTest, + ::testing::Combine( + conv3DParams_AutoPadValid, ::testing::ValuesIn(netPrecisions), + ::testing::Values(InferenceEngine::Precision::UNSPECIFIED), + ::testing::Values(InferenceEngine::Precision::UNSPECIFIED), + ::testing::Values(InferenceEngine::Layout::ANY), + ::testing::Values(InferenceEngine::Layout::ANY), + ::testing::Values(std::vector({1, 3, 10, 10, 10})), + ::testing::Values(CommonTestUtils::DEVICE_CPU)), + ConvolutionLayerTest::getTestCaseName); } // namespace diff --git a/inference-engine/tests/functional/plugin/cpu/shared_tests_instances/single_layer_tests/ctc_greedy_decoder.cpp b/inference-engine/tests/functional/plugin/cpu/shared_tests_instances/single_layer_tests/ctc_greedy_decoder.cpp index e128cf36e6c2ee..47dd838d6e4d97 100644 --- a/inference-engine/tests/functional/plugin/cpu/shared_tests_instances/single_layer_tests/ctc_greedy_decoder.cpp +++ b/inference-engine/tests/functional/plugin/cpu/shared_tests_instances/single_layer_tests/ctc_greedy_decoder.cpp @@ -1,4 +1,4 @@ -// Copyright (C) 2020 Intel Corporation +// Copyright (C) 2020-2021 Intel Corporation // SPDX-License-Identifier: Apache-2.0 // @@ -15,6 +15,7 @@ const std::vector netPrecisions = { InferenceEngine::Precision::FP32, InferenceEngine::Precision::FP16 }; +std::vector mergeRepeated{true, false}; const auto basicCases = ::testing::Combine( ::testing::ValuesIn(netPrecisions), @@ -22,12 +23,18 @@ const auto basicCases = ::testing::Combine( ::testing::Values(InferenceEngine::Precision::UNSPECIFIED), ::testing::Values(InferenceEngine::Layout::ANY), ::testing::Values(InferenceEngine::Layout::ANY), - ::testing::Values(std::vector({ 10, 1, 16 }), - std::vector({ 20, 2, 8 })), - ::testing::Values(true/*, false - current implementation of CPU greedy decoder always merge_repeated */), + ::testing::Values(std::vector({ 50, 3, 3 }), + std::vector({ 50, 3, 7 }), + std::vector({ 50, 3, 8 }), + std::vector({ 50, 3, 16 }), + std::vector({ 50, 3, 128 }), + std::vector({ 50, 3, 49 }), + std::vector({ 50, 3, 55 }), + std::vector({ 1, 1, 16 })), + ::testing::ValuesIn(mergeRepeated), ::testing::Values(CommonTestUtils::DEVICE_CPU)); -INSTANTIATE_TEST_CASE_P(smoke_CTC_Greedy_decoder_Basic, CTCGreedyDecoderLayerTest, +INSTANTIATE_TEST_CASE_P(smoke_CtcGreedyDecoderBasic, CTCGreedyDecoderLayerTest, basicCases, CTCGreedyDecoderLayerTest::getTestCaseName); } // namespace diff --git a/inference-engine/tests/functional/plugin/cpu/shared_tests_instances/single_layer_tests/ctc_greedy_decoder_seq_len.cpp b/inference-engine/tests/functional/plugin/cpu/shared_tests_instances/single_layer_tests/ctc_greedy_decoder_seq_len.cpp new file mode 100644 index 00000000000000..399e4a36702293 --- /dev/null +++ b/inference-engine/tests/functional/plugin/cpu/shared_tests_instances/single_layer_tests/ctc_greedy_decoder_seq_len.cpp @@ -0,0 +1,48 @@ +// Copyright (C) 2021 Intel Corporation +// SPDX-License-Identifier: Apache-2.0 +// + +#include +#include "single_layer_tests/ctc_greedy_decoder_seq_len.hpp" +#include "common_test_utils/test_constants.hpp" + +using namespace LayerTestsDefinitions; +using namespace ngraph::helpers; + +namespace { + +std::vector> inputShape{{1, 1, 1}, {1, 6, 10}, {3, 3, 16}, {5, 3, 55}}; + +const std::vector probPrecisions = { + InferenceEngine::Precision::FP32, + InferenceEngine::Precision::FP16 +}; +const std::vector idxPrecisions = { + InferenceEngine::Precision::I32, + InferenceEngine::Precision::I64 +}; + +std::vector mergeRepeated{true, false}; + +const auto basicCases = ::testing::Combine( + ::testing::ValuesIn(inputShape), + ::testing::ValuesIn(probPrecisions), + ::testing::ValuesIn(idxPrecisions), + ::testing::Values(0), + ::testing::ValuesIn(mergeRepeated), + ::testing::Values(CommonTestUtils::DEVICE_CPU)); + +INSTANTIATE_TEST_CASE_P(smoke_set1, CTCGreedyDecoderSeqLenLayerTest, + basicCases, + CTCGreedyDecoderSeqLenLayerTest::getTestCaseName); + +INSTANTIATE_TEST_CASE_P(smoke_set2, CTCGreedyDecoderSeqLenLayerTest, + ::testing::Combine( + ::testing::ValuesIn(std::vector>{{2, 8, 11}, {4, 10, 55}}), + ::testing::ValuesIn(probPrecisions), + ::testing::ValuesIn(idxPrecisions), + ::testing::ValuesIn(std::vector{0, 5, 10}), + ::testing::ValuesIn(mergeRepeated), + ::testing::Values(CommonTestUtils::DEVICE_CPU)), + CTCGreedyDecoderSeqLenLayerTest::getTestCaseName); +} // namespace diff --git a/inference-engine/tests/functional/plugin/cpu/shared_tests_instances/single_layer_tests/cum_sum.cpp b/inference-engine/tests/functional/plugin/cpu/shared_tests_instances/single_layer_tests/cum_sum.cpp index 2c88ef93189007..1e47d00db51d72 100644 --- a/inference-engine/tests/functional/plugin/cpu/shared_tests_instances/single_layer_tests/cum_sum.cpp +++ b/inference-engine/tests/functional/plugin/cpu/shared_tests_instances/single_layer_tests/cum_sum.cpp @@ -14,7 +14,9 @@ const std::vector> shapes = { {9, 15}, {16, 10, 12}, {5, 14, 5, 7}, - {7, 8, 6, 7, 13} + {7, 8, 6, 7, 13}, + {2, 3, 4, 2, 3, 5}, + {4, 3, 6, 2, 3, 4, 5, 2, 3, 4}, }; const std::vector inputPrecision = { @@ -25,14 +27,14 @@ const std::vector inputPrecision = { InferenceEngine::Precision::FP32 }; -const std::vector axes = { 0, 1, 2, 3, 4 }; -const std::vector negativeAxes = { -1, -2, -3, -4, -5 }; +const std::vector axes = { 0, 1, 2, 3, 4, 5, 6}; +const std::vector negativeAxes = { -1, -2, -3, -4, -5, -6 }; const std::vector exclusive = {true, false}; const std::vector reverse = {true, false}; const auto testCasesNegativeAxis = ::testing::Combine( - ::testing::Values(std::vector{4, 16, 3, 6, 5}), + ::testing::Values(std::vector{4, 16, 3, 6, 5, 2}), ::testing::Values(InferenceEngine::Precision::FP32), ::testing::ValuesIn(negativeAxes), ::testing::ValuesIn(exclusive), @@ -85,10 +87,29 @@ const auto testCasesAxis_4 = ::testing::Combine( ::testing::Values(CommonTestUtils::DEVICE_CPU) ); +const auto testCasesAxis_5 = ::testing::Combine( + ::testing::ValuesIn(std::vector>(shapes.begin() + 5, shapes.end())), + ::testing::ValuesIn(inputPrecision), + ::testing::Values(axes[5]), + ::testing::ValuesIn(exclusive), + ::testing::ValuesIn(reverse), + ::testing::Values(CommonTestUtils::DEVICE_CPU) +); + +const auto testCasesAxis_6 = ::testing::Combine( + ::testing::ValuesIn(std::vector>(shapes.begin() + 6, shapes.end())), + ::testing::ValuesIn(inputPrecision), + ::testing::Values(axes[6]), + ::testing::ValuesIn(exclusive), + ::testing::ValuesIn(reverse), + ::testing::Values(CommonTestUtils::DEVICE_CPU) +); + INSTANTIATE_TEST_CASE_P(smoke_MKLDNN_TestsCumSum_negative_axis, CumSumLayerTest, testCasesNegativeAxis, CumSumLayerTest::getTestCaseName); INSTANTIATE_TEST_CASE_P(smoke_MKLDNN_TestsCumSum_axis_0, CumSumLayerTest, testCasesAxis_0, CumSumLayerTest::getTestCaseName); INSTANTIATE_TEST_CASE_P(smoke_MKLDNN_TestsCumSum_axis_1, CumSumLayerTest, testCasesAxis_1, CumSumLayerTest::getTestCaseName); INSTANTIATE_TEST_CASE_P(smoke_MKLDNN_TestsCumSum_axis_2, CumSumLayerTest, testCasesAxis_2, CumSumLayerTest::getTestCaseName); INSTANTIATE_TEST_CASE_P(smoke_MKLDNN_TestsCumSum_axis_3, CumSumLayerTest, testCasesAxis_3, CumSumLayerTest::getTestCaseName); INSTANTIATE_TEST_CASE_P(smoke_MKLDNN_TestsCumSum_axis_4, CumSumLayerTest, testCasesAxis_4, CumSumLayerTest::getTestCaseName); - +INSTANTIATE_TEST_CASE_P(smoke_MKLDNN_TestsCumSum_axis_5, CumSumLayerTest, testCasesAxis_5, CumSumLayerTest::getTestCaseName); +INSTANTIATE_TEST_CASE_P(smoke_MKLDNN_TestsCumSum_axis_6, CumSumLayerTest, testCasesAxis_6, CumSumLayerTest::getTestCaseName); diff --git a/inference-engine/tests/functional/plugin/cpu/shared_tests_instances/single_layer_tests/group_convolution.cpp b/inference-engine/tests/functional/plugin/cpu/shared_tests_instances/single_layer_tests/group_convolution.cpp index 54488970d994d2..808690c111006c 100644 --- a/inference-engine/tests/functional/plugin/cpu/shared_tests_instances/single_layer_tests/group_convolution.cpp +++ b/inference-engine/tests/functional/plugin/cpu/shared_tests_instances/single_layer_tests/group_convolution.cpp @@ -4,121 +4,153 @@ #include -#include "single_layer_tests/group_convolution.hpp" #include "common_test_utils/test_constants.hpp" +#include "single_layer_tests/group_convolution.hpp" using namespace LayerTestsDefinitions; namespace { const std::vector netPrecisions = { - InferenceEngine::Precision::FP32 -}; + InferenceEngine::Precision::FP32, InferenceEngine::Precision::FP16}; + +/* ============= 1D GroupConvolution ============= */ +const std::vector> kernels1d = {{3}}; +const std::vector> strides1d = {{1}}; +const std::vector> padBegins1d = {{0}}; +const std::vector> padEnds1d = {{0}}; +const std::vector> dilations1d = {{1}}; +const std::vector numOutChannels1d = {8, 16}; +const std::vector numGroups1d = {2, 8}; +const auto inputShapes1d = std::vector({1, 16, 30}); + +const auto groupConv1DParams_ExplicitPadding = ::testing::Combine( + ::testing::ValuesIn(kernels1d), ::testing::ValuesIn(strides1d), + ::testing::ValuesIn(padBegins1d), ::testing::ValuesIn(padEnds1d), + ::testing::ValuesIn(dilations1d), ::testing::ValuesIn(numOutChannels1d), + ::testing::ValuesIn(numGroups1d), + ::testing::Values(ngraph::op::PadType::EXPLICIT)); +const auto groupConv1DParams_AutoPadValid = ::testing::Combine( + ::testing::ValuesIn(kernels1d), ::testing::ValuesIn(strides1d), + ::testing::Values(std::vector({0})), + ::testing::Values(std::vector({0})), + ::testing::ValuesIn(dilations1d), ::testing::ValuesIn(numOutChannels1d), + ::testing::ValuesIn(numGroups1d), + ::testing::Values(ngraph::op::PadType::VALID)); + +INSTANTIATE_TEST_CASE_P( + smoke_GroupConvolution1D_ExplicitPadding, GroupConvolutionLayerTest, + ::testing::Combine( + groupConv1DParams_ExplicitPadding, ::testing::ValuesIn(netPrecisions), + ::testing::Values(InferenceEngine::Precision::UNSPECIFIED), + ::testing::Values(InferenceEngine::Precision::UNSPECIFIED), + ::testing::Values(InferenceEngine::Layout::ANY), + ::testing::Values(InferenceEngine::Layout::ANY), + ::testing::Values(std::vector(inputShapes1d)), + ::testing::Values(CommonTestUtils::DEVICE_CPU)), + GroupConvolutionLayerTest::getTestCaseName); + +INSTANTIATE_TEST_CASE_P( + smoke_GroupConvolution1D_AutoPadValid, GroupConvolutionLayerTest, + ::testing::Combine( + groupConv1DParams_AutoPadValid, ::testing::ValuesIn(netPrecisions), + ::testing::Values(InferenceEngine::Precision::UNSPECIFIED), + ::testing::Values(InferenceEngine::Precision::UNSPECIFIED), + ::testing::Values(InferenceEngine::Layout::ANY), + ::testing::Values(InferenceEngine::Layout::ANY), + ::testing::Values(std::vector({1, 16, 30})), + ::testing::Values(CommonTestUtils::DEVICE_CPU)), + GroupConvolutionLayerTest::getTestCaseName); /* ============= 2D GroupConvolution ============= */ -const std::vector> kernels = {{3, 3}}; -const std::vector> strides = {{1, 1}}; +const std::vector> kernels = {{3, 3}}; +const std::vector> strides = {{1, 1}}; const std::vector> padBegins = {{0, 0}}; const std::vector> padEnds = {{0, 0}}; -const std::vector> dilations = {{1, 1}}; +const std::vector> dilations = {{1, 1}}; const std::vector numOutChannels = {8, 16}; const std::vector numGroups = {2, 8}; +const auto inputShapes = std::vector({1, 16, 30, 30}); const auto groupConv2DParams_ExplicitPadding = ::testing::Combine( - ::testing::ValuesIn(kernels), - ::testing::ValuesIn(strides), - ::testing::ValuesIn(padBegins), - ::testing::ValuesIn(padEnds), - ::testing::ValuesIn(dilations), - ::testing::ValuesIn(numOutChannels), - ::testing::ValuesIn(numGroups), - ::testing::Values(ngraph::op::PadType::EXPLICIT) -); + ::testing::ValuesIn(kernels), ::testing::ValuesIn(strides), + ::testing::ValuesIn(padBegins), ::testing::ValuesIn(padEnds), + ::testing::ValuesIn(dilations), ::testing::ValuesIn(numOutChannels), + ::testing::ValuesIn(numGroups), + ::testing::Values(ngraph::op::PadType::EXPLICIT)); const auto groupConv2DParams_AutoPadValid = ::testing::Combine( - ::testing::ValuesIn(kernels), - ::testing::ValuesIn(strides), - ::testing::Values(std::vector({0, 0})), - ::testing::Values(std::vector({0, 0})), - ::testing::ValuesIn(dilations), - ::testing::ValuesIn(numOutChannels), - ::testing::ValuesIn(numGroups), - ::testing::Values(ngraph::op::PadType::VALID) -); - -INSTANTIATE_TEST_CASE_P(smoke_GroupConvolution2D_ExplicitPadding, GroupConvolutionLayerTest, - ::testing::Combine( - groupConv2DParams_ExplicitPadding, - ::testing::ValuesIn(netPrecisions), - ::testing::Values(InferenceEngine::Precision::UNSPECIFIED), - ::testing::Values(InferenceEngine::Precision::UNSPECIFIED), - ::testing::Values(InferenceEngine::Layout::ANY), - ::testing::Values(InferenceEngine::Layout::ANY), - ::testing::Values(std::vector({1, 16, 30, 30})), - ::testing::Values(CommonTestUtils::DEVICE_CPU)), - GroupConvolutionLayerTest::getTestCaseName); - -INSTANTIATE_TEST_CASE_P(smoke_GroupConvolution2D_AutoPadValid, GroupConvolutionLayerTest, - ::testing::Combine( - groupConv2DParams_AutoPadValid, - ::testing::ValuesIn(netPrecisions), - ::testing::Values(InferenceEngine::Precision::UNSPECIFIED), - ::testing::Values(InferenceEngine::Precision::UNSPECIFIED), - ::testing::Values(InferenceEngine::Layout::ANY), - ::testing::Values(InferenceEngine::Layout::ANY), - ::testing::Values(std::vector({1, 16, 30, 30})), - ::testing::Values(CommonTestUtils::DEVICE_CPU)), - GroupConvolutionLayerTest::getTestCaseName); + ::testing::ValuesIn(kernels), ::testing::ValuesIn(strides), + ::testing::Values(std::vector({0, 0})), + ::testing::Values(std::vector({0, 0})), + ::testing::ValuesIn(dilations), ::testing::ValuesIn(numOutChannels), + ::testing::ValuesIn(numGroups), + ::testing::Values(ngraph::op::PadType::VALID)); + +INSTANTIATE_TEST_CASE_P( + smoke_GroupConvolution2D_ExplicitPadding, GroupConvolutionLayerTest, + ::testing::Combine( + groupConv2DParams_ExplicitPadding, ::testing::ValuesIn(netPrecisions), + ::testing::Values(InferenceEngine::Precision::UNSPECIFIED), + ::testing::Values(InferenceEngine::Precision::UNSPECIFIED), + ::testing::Values(InferenceEngine::Layout::ANY), + ::testing::Values(InferenceEngine::Layout::ANY), + ::testing::Values(std::vector(inputShapes)), + ::testing::Values(CommonTestUtils::DEVICE_CPU)), + GroupConvolutionLayerTest::getTestCaseName); + +INSTANTIATE_TEST_CASE_P( + smoke_GroupConvolution2D_AutoPadValid, GroupConvolutionLayerTest, + ::testing::Combine( + groupConv2DParams_AutoPadValid, ::testing::ValuesIn(netPrecisions), + ::testing::Values(InferenceEngine::Precision::UNSPECIFIED), + ::testing::Values(InferenceEngine::Precision::UNSPECIFIED), + ::testing::Values(InferenceEngine::Layout::ANY), + ::testing::Values(InferenceEngine::Layout::ANY), + ::testing::Values(std::vector({1, 16, 30, 30})), + ::testing::Values(CommonTestUtils::DEVICE_CPU)), + GroupConvolutionLayerTest::getTestCaseName); /* ============= 3D GroupConvolution ============= */ -const std::vector> kernels3d = {{3, 3, 3}}; +const std::vector> kernels3d = {{3, 3, 3}}; const std::vector> paddings3d = {{0, 0, 0}}; - -const std::vector> strides3d = {{1, 1, 1}}; -const std::vector> dilations3d = {{1, 1, 1}}; +const std::vector> strides3d = {{1, 1, 1}}; +const std::vector> dilations3d = {{1, 1, 1}}; +const auto inputShapes3d = std::vector({1, 4, 10, 10, 10}); const auto groupConv3DParams_ExplicitPadding = ::testing::Combine( - ::testing::ValuesIn(kernels3d), - ::testing::ValuesIn(strides3d), - ::testing::ValuesIn(paddings3d), - ::testing::ValuesIn(paddings3d), - ::testing::ValuesIn(dilations3d), - ::testing::Values(4), - ::testing::Values(2), - ::testing::Values(ngraph::op::PadType::EXPLICIT) -); + ::testing::ValuesIn(kernels3d), ::testing::ValuesIn(strides3d), + ::testing::ValuesIn(paddings3d), ::testing::ValuesIn(paddings3d), + ::testing::ValuesIn(dilations3d), ::testing::Values(4), + ::testing::Values(2), ::testing::Values(ngraph::op::PadType::EXPLICIT)); const auto groupConv3DParams_AutoPadValid = ::testing::Combine( - ::testing::ValuesIn(kernels3d), - ::testing::ValuesIn(strides3d), - ::testing::Values(std::vector({0, 0, 0})), - ::testing::Values(std::vector({0, 0, 0})), - ::testing::ValuesIn(dilations3d), - ::testing::Values(4), - ::testing::Values(2), - ::testing::Values(ngraph::op::PadType::VALID) -); - -INSTANTIATE_TEST_CASE_P(smoke_GroupConvolution3D_ExplicitPadding, GroupConvolutionLayerTest, - ::testing::Combine( - groupConv3DParams_ExplicitPadding, - ::testing::ValuesIn(netPrecisions), - ::testing::Values(InferenceEngine::Precision::UNSPECIFIED), - ::testing::Values(InferenceEngine::Precision::UNSPECIFIED), - ::testing::Values(InferenceEngine::Layout::ANY), - ::testing::Values(InferenceEngine::Layout::ANY), - ::testing::Values(std::vector({1, 4, 10, 10, 10})), - ::testing::Values(CommonTestUtils::DEVICE_CPU)), - GroupConvolutionLayerTest::getTestCaseName); - -INSTANTIATE_TEST_CASE_P(smoke_GroupConvolution3D_AutoPadValid, GroupConvolutionLayerTest, - ::testing::Combine( - groupConv3DParams_AutoPadValid, - ::testing::ValuesIn(netPrecisions), - ::testing::Values(InferenceEngine::Precision::UNSPECIFIED), - ::testing::Values(InferenceEngine::Precision::UNSPECIFIED), - ::testing::Values(InferenceEngine::Layout::ANY), - ::testing::Values(InferenceEngine::Layout::ANY), - ::testing::Values(std::vector({1, 4, 10, 10, 10})), - ::testing::Values(CommonTestUtils::DEVICE_CPU)), - GroupConvolutionLayerTest::getTestCaseName); + ::testing::ValuesIn(kernels3d), ::testing::ValuesIn(strides3d), + ::testing::Values(std::vector({0, 0, 0})), + ::testing::Values(std::vector({0, 0, 0})), + ::testing::ValuesIn(dilations3d), ::testing::Values(4), + ::testing::Values(2), ::testing::Values(ngraph::op::PadType::VALID)); + +INSTANTIATE_TEST_CASE_P( + smoke_GroupConvolution3D_ExplicitPadding, GroupConvolutionLayerTest, + ::testing::Combine( + groupConv3DParams_ExplicitPadding, ::testing::ValuesIn(netPrecisions), + ::testing::Values(InferenceEngine::Precision::UNSPECIFIED), + ::testing::Values(InferenceEngine::Precision::UNSPECIFIED), + ::testing::Values(InferenceEngine::Layout::ANY), + ::testing::Values(InferenceEngine::Layout::ANY), + ::testing::Values(std::vector(inputShapes3d)), + ::testing::Values(CommonTestUtils::DEVICE_CPU)), + GroupConvolutionLayerTest::getTestCaseName); + +INSTANTIATE_TEST_CASE_P( + smoke_GroupConvolution3D_AutoPadValid, GroupConvolutionLayerTest, + ::testing::Combine( + groupConv3DParams_AutoPadValid, ::testing::ValuesIn(netPrecisions), + ::testing::Values(InferenceEngine::Precision::UNSPECIFIED), + ::testing::Values(InferenceEngine::Precision::UNSPECIFIED), + ::testing::Values(InferenceEngine::Layout::ANY), + ::testing::Values(InferenceEngine::Layout::ANY), + ::testing::Values(std::vector({1, 4, 10, 10, 10})), + ::testing::Values(CommonTestUtils::DEVICE_CPU)), + GroupConvolutionLayerTest::getTestCaseName); } // namespace diff --git a/inference-engine/tests/functional/plugin/cpu/shared_tests_instances/single_layer_tests/interpolate.cpp b/inference-engine/tests/functional/plugin/cpu/shared_tests_instances/single_layer_tests/interpolate.cpp index a2fceb3d82cca0..20f4be614eef42 100644 --- a/inference-engine/tests/functional/plugin/cpu/shared_tests_instances/single_layer_tests/interpolate.cpp +++ b/inference-engine/tests/functional/plugin/cpu/shared_tests_instances/single_layer_tests/interpolate.cpp @@ -20,10 +20,6 @@ const std::vector> inShapes = { {1, 4, 30, 30}, }; -const std::vector> targetShapes = { - {1, 4, 40, 40}, -}; - const std::vector modesWithoutNearest = { ngraph::op::v4::Interpolate::InterpolateMode::linear, ngraph::op::v4::Interpolate::InterpolateMode::linear_onnx, @@ -78,8 +74,12 @@ const std::vector> defaultAxes = { {2, 3} }; +const std::vector> targetShapes = { + {40, 40}, +}; + const std::vector> defaultScales = { - {1.33333f, 1.33333f} + {1.333333f, 1.333333f} }; const auto interpolateCasesWithoutNearest = ::testing::Combine( @@ -135,7 +135,7 @@ const std::vector> targetShapesTailTest = { }; const std::vector> defaultScalesTailTest = { - {0.33333f, 1.36666f} + {0.333333f, 1.366666f} }; const auto interpolateCasesWithoutNearestTail = ::testing::Combine( diff --git a/inference-engine/tests/functional/plugin/cpu/shared_tests_instances/single_layer_tests/mvn.cpp b/inference-engine/tests/functional/plugin/cpu/shared_tests_instances/single_layer_tests/mvn.cpp index cd789208ea5e5f..c682b0faa3979c 100644 --- a/inference-engine/tests/functional/plugin/cpu/shared_tests_instances/single_layer_tests/mvn.cpp +++ b/inference-engine/tests/functional/plugin/cpu/shared_tests_instances/single_layer_tests/mvn.cpp @@ -1,4 +1,4 @@ -// Copyright (C) 2020 Intel Corporation +// Copyright (C) 2020-2021 Intel Corporation // SPDX-License-Identifier: Apache-2.0 // @@ -49,3 +49,131 @@ const auto MvnCases = ::testing::Combine( INSTANTIATE_TEST_CASE_P(smoke_MKLDNN_TestsMVN, MvnLayerTest, MvnCases, MvnLayerTest::getTestCaseName); + + +std::vector dataPrecisions = { + InferenceEngine::Precision::FP32, + InferenceEngine::Precision::FP16 +}; + +std::vector idxPrecisions = { + InferenceEngine::Precision::I32, + InferenceEngine::Precision::I64 +}; + +const std::vector epsMode = { + "inside_sqrt", + "outside_sqrt" +}; + +const std::vector epsilonF = { + 0.0001 +}; + +INSTANTIATE_TEST_CASE_P(smoke_MVN_5D, Mvn6LayerTest, + ::testing::Combine( + ::testing::ValuesIn(std::vector>{{1, 10, 5, 7, 8}, {1, 3, 8, 9, 49}}), + ::testing::ValuesIn(dataPrecisions), + ::testing::ValuesIn(idxPrecisions), + ::testing::ValuesIn(std::vector>{{1, 2, 3, 4}, {2, 3, 4}}), + ::testing::ValuesIn(normalizeVariance), + ::testing::ValuesIn(epsilonF), + ::testing::ValuesIn(epsMode), + ::testing::Values(CommonTestUtils::DEVICE_CPU)), + Mvn6LayerTest::getTestCaseName); + +INSTANTIATE_TEST_CASE_P(smoke_MVN_4D, Mvn6LayerTest, + ::testing::Combine( + ::testing::ValuesIn(std::vector>{{1, 10, 5, 17}, {1, 3, 8, 9}}), + ::testing::ValuesIn(dataPrecisions), + ::testing::ValuesIn(idxPrecisions), + ::testing::ValuesIn(std::vector>{{1, 2, 3}, {2, 3}}), + ::testing::ValuesIn(normalizeVariance), + ::testing::ValuesIn(epsilonF), + ::testing::ValuesIn(epsMode), + ::testing::Values(CommonTestUtils::DEVICE_CPU)), + Mvn6LayerTest::getTestCaseName); + +INSTANTIATE_TEST_CASE_P(smoke_MVN_3D, Mvn6LayerTest, + ::testing::Combine( + ::testing::ValuesIn(std::vector>{{1, 32, 17}, {1, 37, 9}}), + ::testing::ValuesIn(dataPrecisions), + ::testing::ValuesIn(idxPrecisions), + ::testing::ValuesIn(std::vector>{{1, 2}, {2}}), + ::testing::ValuesIn(normalizeVariance), + ::testing::ValuesIn(epsilonF), + ::testing::ValuesIn(epsMode), + ::testing::Values(CommonTestUtils::DEVICE_CPU)), + Mvn6LayerTest::getTestCaseName); + +INSTANTIATE_TEST_CASE_P(smoke_MVN_2D, Mvn6LayerTest, + ::testing::Combine( + ::testing::ValuesIn(std::vector>{{3, 5}, {2, 55}}), + ::testing::ValuesIn(dataPrecisions), + ::testing::ValuesIn(idxPrecisions), + ::testing::ValuesIn(std::vector>{{1}}), + ::testing::ValuesIn(normalizeVariance), + ::testing::ValuesIn(epsilonF), + ::testing::ValuesIn(epsMode), + ::testing::Values(CommonTestUtils::DEVICE_CPU)), + Mvn6LayerTest::getTestCaseName); + +INSTANTIATE_TEST_CASE_P(smoke_MVN_1D, Mvn6LayerTest, + ::testing::Combine( + ::testing::ValuesIn(std::vector>{{3}, {9}, {55}}), + ::testing::ValuesIn(dataPrecisions), + ::testing::ValuesIn(idxPrecisions), + ::testing::ValuesIn(std::vector>{{0}}), + ::testing::ValuesIn(normalizeVariance), + ::testing::ValuesIn(epsilonF), + ::testing::ValuesIn(epsMode), + ::testing::Values(CommonTestUtils::DEVICE_CPU)), + Mvn6LayerTest::getTestCaseName); + +INSTANTIATE_TEST_CASE_P(smoke_Decomposition_1D, Mvn6LayerTest, + ::testing::Combine( + ::testing::ValuesIn(std::vector>{{3}, {9}, {55}}), + ::testing::ValuesIn(dataPrecisions), + ::testing::ValuesIn(idxPrecisions), + ::testing::ValuesIn(std::vector>{{}}), + ::testing::ValuesIn(normalizeVariance), + ::testing::ValuesIn(epsilonF), + ::testing::ValuesIn(epsMode), + ::testing::Values(CommonTestUtils::DEVICE_CPU)), + Mvn6LayerTest::getTestCaseName); + +INSTANTIATE_TEST_CASE_P(smoke_Decomposition_3D, Mvn6LayerTest, + ::testing::Combine( + ::testing::ValuesIn(std::vector>{{1, 32, 17}, {1, 37, 9}}), + ::testing::ValuesIn(dataPrecisions), + ::testing::ValuesIn(idxPrecisions), + ::testing::ValuesIn(std::vector>{{0, 1, 2}, {0}, {1}}), + ::testing::ValuesIn(normalizeVariance), + ::testing::ValuesIn(epsilonF), + ::testing::ValuesIn(epsMode), + ::testing::Values(CommonTestUtils::DEVICE_CPU)), + Mvn6LayerTest::getTestCaseName); + +INSTANTIATE_TEST_CASE_P(smoke_Decomposition_4D, Mvn6LayerTest, + ::testing::Combine( + ::testing::ValuesIn(std::vector>{{1, 16, 5, 8}, {2, 19, 5, 10}}), + ::testing::ValuesIn(dataPrecisions), + ::testing::ValuesIn(idxPrecisions), + ::testing::ValuesIn(std::vector>{{0, 1, 2, 3}, {0, 1, 2}, {0, 3}, {0}, {1}, {2}, {3}}), + ::testing::ValuesIn(normalizeVariance), + ::testing::ValuesIn(epsilonF), + ::testing::ValuesIn(epsMode), + ::testing::Values(CommonTestUtils::DEVICE_CPU)), + Mvn6LayerTest::getTestCaseName); + +INSTANTIATE_TEST_CASE_P(smoke_Decomposition_10D, Mvn6LayerTest, + ::testing::Combine( + ::testing::ValuesIn(std::vector>{{1, 3, 5, 4, 2, 6, 5, 3, 2, 1}}), + ::testing::ValuesIn(dataPrecisions), + ::testing::ValuesIn(idxPrecisions), + ::testing::ValuesIn(std::vector>{{0, 1, 5, 8, 9}, {0, 1, 2, 3}, {0, 1, 2}, {0, 3}, {0}, {3}, {5}, {9}}), + ::testing::ValuesIn(normalizeVariance), + ::testing::ValuesIn(epsilonF), + ::testing::ValuesIn(epsMode), + ::testing::Values(CommonTestUtils::DEVICE_CPU)), + Mvn6LayerTest::getTestCaseName); diff --git a/inference-engine/tests/functional/plugin/cpu/shared_tests_instances/single_layer_tests/roi_pooling.cpp b/inference-engine/tests/functional/plugin/cpu/shared_tests_instances/single_layer_tests/roi_pooling.cpp index bc244cf5b571ca..b67e0bd252bf83 100644 --- a/inference-engine/tests/functional/plugin/cpu/shared_tests_instances/single_layer_tests/roi_pooling.cpp +++ b/inference-engine/tests/functional/plugin/cpu/shared_tests_instances/single_layer_tests/roi_pooling.cpp @@ -22,6 +22,7 @@ const std::vector> pooledShapes_max = { }; const std::vector> pooledShapes_bilinear = { + {1, 1}, {2, 2}, {3, 3}, {6, 6} diff --git a/inference-engine/tests/functional/plugin/cpu/shared_tests_instances/skip_tests_config.cpp b/inference-engine/tests/functional/plugin/cpu/shared_tests_instances/skip_tests_config.cpp index 3291162508f54c..18348d17584b45 100644 --- a/inference-engine/tests/functional/plugin/cpu/shared_tests_instances/skip_tests_config.cpp +++ b/inference-engine/tests/functional/plugin/cpu/shared_tests_instances/skip_tests_config.cpp @@ -34,8 +34,11 @@ std::vector disabledTestPatterns() { R"(.*(CoreThreadingTestsWithIterations).*(smoke_LoadNetworkAccuracy).*)", #endif // TODO: Issue: 43793 - R"(.*(PreprocessTest).*(SetScalePreProcess).*)", - R"(.*(PreprocessTest).*(ReverseInputChannelsPreProcess).*)", + R"(.*(PreprocessTest).*(SetScalePreProcessSetBlob).*)", + R"(.*(PreprocessTest).*(SetScalePreProcessGetBlob).*)", + R"(.*(PreprocessTest).*(SetMeanValuePreProcessSetBlob).*)", + R"(.*(PreprocessTest).*(SetMeanImagePreProcessSetBlob).*)", + R"(.*(PreprocessTest).*(ReverseInputChannelsPreProcessGetBlob).*)", // TODO: Issue: 40957 R"(.*(ConstantResultSubgraphTest).*)", // TODO: Issue: 34348 @@ -56,8 +59,8 @@ std::vector disabledTestPatterns() { R"(.*Broadcast.*mode=BIDIRECTIONAL.*inNPrec=BOOL.*)", // TODO: Issue 43417 sporadic issue, looks like an issue in test, reproducible only on Windows platform R"(.*decomposition1_batch=5_hidden_size=10_input_size=30_.*tanh.relu.*_clip=0_linear_before_reset=1.*_targetDevice=CPU_.*)", - // TODO: Sporadic Issue: 45163 - R"(.*Behavior.*CancellationTests.*canResetAfterCancelAsyncRequest.*)", + // TODO: Issue 47556. [NGraph] CTCGreedyDecoderSeqLen. Invalid type transformation i64 to i32. + R"(.*(CTCGreedyDecoderSeqLenLayerTest).*(idxPRC=I64).*)", }; if (!InferenceEngine::with_cpu_x86_avx512_core()) { diff --git a/inference-engine/tests/functional/plugin/cpu/shared_tests_instances/subgraph_tests/tensor_names.cpp b/inference-engine/tests/functional/plugin/cpu/shared_tests_instances/subgraph_tests/tensor_names.cpp new file mode 100644 index 00000000000000..99ceae1156ac85 --- /dev/null +++ b/inference-engine/tests/functional/plugin/cpu/shared_tests_instances/subgraph_tests/tensor_names.cpp @@ -0,0 +1,16 @@ +// Copyright (C) 2021 Intel Corporation +// SPDX-License-Identifier: Apache-2.0 +// + +#include + +#include "subgraph_tests/tensor_names.hpp" +#include "common_test_utils/test_constants.hpp" + +using namespace SubgraphTestsDefinitions; + +namespace { + INSTANTIATE_TEST_CASE_P(smoke_Check, TensorNamesTest, + ::testing::Values(CommonTestUtils::DEVICE_CPU), + TensorNamesTest::getTestCaseName); +} // namespace diff --git a/inference-engine/tests/functional/plugin/cpu/single_layer_tests/convolution_backprop_data.cpp b/inference-engine/tests/functional/plugin/cpu/single_layer_tests/convolution_backprop_data.cpp new file mode 100755 index 00000000000000..90ef29a9cc2701 --- /dev/null +++ b/inference-engine/tests/functional/plugin/cpu/single_layer_tests/convolution_backprop_data.cpp @@ -0,0 +1,352 @@ +// Copyright (C) 2021 Intel Corporation +// SPDX-License-Identifier: Apache-2.0 +// + +#include "test_utils/cpu_test_utils.hpp" +#include "test_utils/fusing_test_utils.hpp" +#include "shared_test_classes/base/layer_test_utils.hpp" +#include "ngraph_functions/utils/ngraph_helpers.hpp" +#include "ngraph_functions/builders.hpp" +#include + + +using namespace InferenceEngine; +using namespace CPUTestUtils; + +namespace CPULayerTestsDefinitions { +using LayerTestsDefinitions::convBackpropDataSpecificParams; +using LayerTestsDefinitions::convBackpropDataLayerTestParamsSet; + +typedef std::tuple< + convBackpropDataLayerTestParamsSet, + CPUSpecificParams, + fusingSpecificParams, + std::map > deconvLayerCPUTestParamsSet; + +class DeconvolutionLayerCPUTest : public testing::WithParamInterface, + virtual public LayerTestsUtils::LayerTestsCommon, public CpuTestWithFusing { +public: + static std::string getTestCaseName(testing::TestParamInfo obj) { + convBackpropDataLayerTestParamsSet basicParamsSet; + CPUSpecificParams cpuParams; + fusingSpecificParams fusingParams; + std::map additionalConfig; + std::tie(basicParamsSet, cpuParams, fusingParams, additionalConfig) = obj.param; + + std::ostringstream result; + result << LayerTestsDefinitions::ConvolutionBackpropDataLayerTest::getTestCaseName(testing::TestParamInfo( + basicParamsSet, 0)); + + result << CPUTestsBase::getTestCaseName(cpuParams); + result << CpuTestWithFusing::getTestCaseName(fusingParams); + + if (!additionalConfig.empty()) { + result << "_PluginConf"; + for (auto& item : additionalConfig) { + result << "_" << item.first << "=" << item.second; + } + } + + return result.str(); + } +protected: + void SetUp() override { + convBackpropDataLayerTestParamsSet basicParamsSet; + CPUSpecificParams cpuParams; + fusingSpecificParams fusingParams; + std::map additionalConfig; + std::tie(basicParamsSet, cpuParams, fusingParams, additionalConfig) = this->GetParam(); + + configuration.insert(additionalConfig.begin(), additionalConfig.end()); + + std::tie(inFmts, outFmts, priority, selectedType) = cpuParams; + std::tie(postOpMgrPtr, fusedOps) = fusingParams; + + convBackpropDataSpecificParams convParams; + std::vector inputShape; + auto netPrecision = InferenceEngine::Precision::UNSPECIFIED; + std::tie(convParams, netPrecision, inPrc, outPrc, inLayout, outLayout, inputShape, targetDevice) = basicParamsSet; + + if (inPrc == Precision::UNSPECIFIED) { + selectedType += std::string("_") + Precision(Precision::FP32).name(); + } else { + selectedType += std::string("_") + inPrc.name(); + } + + ngraph::op::PadType padType; + InferenceEngine::SizeVector kernel, stride, dilation; + std::vector padBegin, padEnd; + size_t convOutChannels; + std::tie(kernel, stride, padBegin, padEnd, dilation, convOutChannels, padType) = convParams; + auto ngPrc = FuncTestUtils::PrecisionUtils::convertIE2nGraphPrc(netPrecision); + + auto inputParams = ngraph::builder::makeParams(ngraph::element::f32, { inputShape }); + auto paramOuts = ngraph::helpers::convert2OutputVector(ngraph::helpers::castOps2Nodes(inputParams)); + + auto deconvolutionNode = ngraph::builder::makeConvolutionBackpropData(paramOuts.front(), ngPrc, kernel, stride, padBegin, + padEnd, dilation, padType, convOutChannels); + + function = makeNgraphFunction(ngPrc, inputParams, deconvolutionNode, "convolutionBackpropData"); + } +}; + +TEST_P(DeconvolutionLayerCPUTest, CompareWithRefs) { + SKIP_IF_CURRENT_TEST_IS_DISABLED() + + Run(); + CheckPluginRelatedResults(executableNetwork, "Deconvolution"); +} + +namespace { + +/* COMMON PARAMS */ +const std::vector fusingParamsSet{ + emptyFusingSpec, + fusingScaleShift +}; + +const std::map cpuEmptyPluginConfig; +const std::map cpuBF16PluginConfig = { { PluginConfigParams::KEY_ENFORCE_BF16, PluginConfigParams::YES } }; + +/* ============= Deconvolution params (planar layout) ============= */ +const SizeVector numOutChannels_Planar = { 6 }; + +/* ============= Deconvolution params (blocked layout) ============= */ +const SizeVector numOutChannels_Blocked = { 64 }; + +/* ============= Deconvolution params (2D) ============= */ +const std::vector kernels2d = { {3, 3}, {1, 1} }; +const std::vector strides2d = { {1, 1}, {2, 2} }; +const std::vector> padBegins2d = { {0, 0} }; +const std::vector> padEnds2d = { {0, 0} }; +const std::vector dilations2d = { {1, 1} }; + +/* ============= Deconvolution params (3D) ============= */ +const std::vector kernels3d = { {3, 3, 3}, {1, 1, 1} }; +const std::vector strides3d = { {1, 1, 1}, {2, 2, 2} }; +const std::vector> padBegins3d = { {0, 0, 0} }; +const std::vector> padEnds3d = { {0, 0, 0} }; +const std::vector dilations3d = { {1, 1, 1} }; +/* ============= */ + +/* INSTANCES */ +/* ============= Deconvolution (Planar 2D) ============= */ +const auto convParams_ExplicitPadding_Planar_2D = ::testing::Combine( + ::testing::ValuesIn(kernels2d), + ::testing::ValuesIn(strides2d), + ::testing::ValuesIn(padBegins2d), + ::testing::ValuesIn(padEnds2d), + ::testing::ValuesIn(dilations2d), + ::testing::ValuesIn(numOutChannels_Planar), + ::testing::Values(ngraph::op::PadType::EXPLICIT) +); + +INSTANTIATE_TEST_CASE_P(smoke_Deconv_2D_Planar_FP32, DeconvolutionLayerCPUTest, + ::testing::Combine( + ::testing::Combine( + convParams_ExplicitPadding_Planar_2D, + ::testing::Values(Precision::FP32), + ::testing::Values(Precision::UNSPECIFIED), + ::testing::Values(Precision::UNSPECIFIED), + ::testing::Values(Layout::ANY), + ::testing::Values(Layout::ANY), + ::testing::Values(std::vector({ 2, 12, 7, 7 })), + ::testing::Values(CommonTestUtils::DEVICE_CPU)), + ::testing::ValuesIn(filterCPUInfoForDevice({conv_gemm_2D})), + ::testing::ValuesIn(fusingParamsSet), + ::testing::Values(cpuEmptyPluginConfig)), + DeconvolutionLayerCPUTest::getTestCaseName); + +INSTANTIATE_TEST_CASE_P(smoke_Deconv_2D_Planar_BF16, DeconvolutionLayerCPUTest, + ::testing::Combine( + ::testing::Combine( + convParams_ExplicitPadding_Planar_2D, + ::testing::Values(Precision::FP32), + ::testing::Values(Precision::BF16), + ::testing::Values(Precision::BF16), + ::testing::Values(Layout::ANY), + ::testing::Values(Layout::ANY), + ::testing::Values(std::vector({ 2, 12, 7, 7 })), + ::testing::Values(CommonTestUtils::DEVICE_CPU)), + ::testing::ValuesIn(filterCPUInfoForDevice({conv_gemm_2D})), + ::testing::ValuesIn(fusingParamsSet), + ::testing::Values(cpuBF16PluginConfig)), + DeconvolutionLayerCPUTest::getTestCaseName); + +/* ============= GroupDeconvolution (Planar 3D) ============= */ +const auto convParams_ExplicitPadding_Planar_3D = ::testing::Combine( + ::testing::ValuesIn(kernels3d), + ::testing::ValuesIn(strides3d), + ::testing::ValuesIn(padBegins3d), + ::testing::ValuesIn(padEnds3d), + ::testing::ValuesIn(dilations3d), + ::testing::ValuesIn(numOutChannels_Planar), + ::testing::Values(ngraph::op::PadType::EXPLICIT) +); + +INSTANTIATE_TEST_CASE_P(smoke_Deconv_3D_Planar_FP32, DeconvolutionLayerCPUTest, + ::testing::Combine( + ::testing::Combine( + convParams_ExplicitPadding_Planar_3D, + ::testing::Values(Precision::FP32), + ::testing::Values(Precision::UNSPECIFIED), + ::testing::Values(Precision::UNSPECIFIED), + ::testing::Values(Layout::ANY), + ::testing::Values(Layout::ANY), + ::testing::Values(std::vector({ 2, 12, 7, 7, 7 })), + ::testing::Values(CommonTestUtils::DEVICE_CPU)), + ::testing::ValuesIn(filterCPUInfoForDevice({conv_gemm_3D})), + ::testing::ValuesIn(fusingParamsSet), + ::testing::Values(cpuEmptyPluginConfig)), + DeconvolutionLayerCPUTest::getTestCaseName); + +INSTANTIATE_TEST_CASE_P(smoke_Deconv_3D_Planar_BF16, DeconvolutionLayerCPUTest, + ::testing::Combine( + ::testing::Combine( + convParams_ExplicitPadding_Planar_3D, + ::testing::Values(Precision::FP32), + ::testing::Values(Precision::BF16), + ::testing::Values(Precision::BF16), + ::testing::Values(Layout::ANY), + ::testing::Values(Layout::ANY), + ::testing::Values(std::vector({ 2, 12, 7, 7, 7 })), + ::testing::Values(CommonTestUtils::DEVICE_CPU)), + ::testing::ValuesIn(filterCPUInfoForDevice({conv_gemm_3D})), + ::testing::ValuesIn(fusingParamsSet), + ::testing::Values(cpuBF16PluginConfig)), + DeconvolutionLayerCPUTest::getTestCaseName); + +/* ============= GroupDeconvolution (Blocked 2D) ============= */ +const auto convParams_ExplicitPadding_Blocked_2D = ::testing::Combine( + ::testing::ValuesIn(kernels2d), + ::testing::ValuesIn(strides2d), + ::testing::ValuesIn(padBegins2d), + ::testing::ValuesIn(padEnds2d), + ::testing::ValuesIn(dilations2d), + ::testing::ValuesIn(numOutChannels_Blocked), + ::testing::Values(ngraph::op::PadType::EXPLICIT) +); + +INSTANTIATE_TEST_CASE_P(smoke_Deconv_2D_Blocked_FP32, DeconvolutionLayerCPUTest, + ::testing::Combine( + ::testing::Combine( + convParams_ExplicitPadding_Blocked_2D, + ::testing::Values(Precision::FP32), + ::testing::Values(Precision::UNSPECIFIED), + ::testing::Values(Precision::UNSPECIFIED), + ::testing::Values(Layout::ANY), + ::testing::Values(Layout::ANY), + ::testing::Values(std::vector({ 2, 67, 7, 7 })), + ::testing::Values(CommonTestUtils::DEVICE_CPU)), + ::testing::ValuesIn(filterCPUInfoForDevice({conv_avx512_2D})), + ::testing::ValuesIn(fusingParamsSet), + ::testing::Values(cpuEmptyPluginConfig)), + DeconvolutionLayerCPUTest::getTestCaseName); + +INSTANTIATE_TEST_CASE_P(smoke_Deconv_2D_Blocked_BF16, DeconvolutionLayerCPUTest, + ::testing::Combine( + ::testing::Combine( + convParams_ExplicitPadding_Blocked_2D, + ::testing::Values(Precision::FP32), + ::testing::Values(Precision::BF16), + ::testing::Values(Precision::BF16), + ::testing::Values(Layout::ANY), + ::testing::Values(Layout::ANY), + ::testing::Values(std::vector({ 2, 67, 7, 7 })), + ::testing::Values(CommonTestUtils::DEVICE_CPU)), + ::testing::ValuesIn(filterCPUInfoForDevice({conv_avx512_2D})), + ::testing::ValuesIn(fusingParamsSet), + ::testing::Values(cpuBF16PluginConfig)), + DeconvolutionLayerCPUTest::getTestCaseName); + +/* ============= GroupDeconvolution (Blocked 3D) ============= */ +const auto convParams_ExplicitPadding_Blocked_3D = ::testing::Combine( + ::testing::ValuesIn(kernels3d), + ::testing::ValuesIn(strides3d), + ::testing::ValuesIn(padBegins3d), + ::testing::ValuesIn(padEnds3d), + ::testing::ValuesIn(dilations3d), + ::testing::ValuesIn(numOutChannels_Blocked), + ::testing::Values(ngraph::op::PadType::EXPLICIT) +); + +INSTANTIATE_TEST_CASE_P(smoke_Deconv_3D_Blocked_FP32, DeconvolutionLayerCPUTest, + ::testing::Combine( + ::testing::Combine( + convParams_ExplicitPadding_Blocked_3D, + ::testing::Values(Precision::FP32), + ::testing::Values(Precision::UNSPECIFIED), + ::testing::Values(Precision::UNSPECIFIED), + ::testing::Values(Layout::ANY), + ::testing::Values(Layout::ANY), + ::testing::Values(std::vector({ 2, 67, 7, 7, 7 })), + ::testing::Values(CommonTestUtils::DEVICE_CPU)), + ::testing::ValuesIn(filterCPUInfoForDevice({conv_avx512_3D})), + ::testing::ValuesIn(fusingParamsSet), + ::testing::Values(cpuEmptyPluginConfig)), + DeconvolutionLayerCPUTest::getTestCaseName); + +INSTANTIATE_TEST_CASE_P(smoke_Deconv_3D_Blocked_BF16, DeconvolutionLayerCPUTest, + ::testing::Combine( + ::testing::Combine( + convParams_ExplicitPadding_Blocked_3D, + ::testing::Values(Precision::FP32), + ::testing::Values(Precision::BF16), + ::testing::Values(Precision::BF16), + ::testing::Values(Layout::ANY), + ::testing::Values(Layout::ANY), + ::testing::Values(std::vector({ 2, 67, 7, 7, 7 })), + ::testing::Values(CommonTestUtils::DEVICE_CPU)), + ::testing::ValuesIn(filterCPUInfoForDevice({conv_avx512_3D})), + ::testing::ValuesIn(fusingParamsSet), + ::testing::Values(cpuBF16PluginConfig)), + DeconvolutionLayerCPUTest::getTestCaseName); + +/* ============= Kernel_1x1 (2D) ============= */ + +const auto convParams_ExplicitPadding_1x1_2D = ::testing::Combine( + ::testing::Values(SizeVector({1, 1})), + ::testing::Values(SizeVector({1, 1})), + ::testing::Values(std::vector({0, 0})), + ::testing::Values(std::vector({0, 0})), + ::testing::Values(SizeVector({1, 1})), + ::testing::ValuesIn(numOutChannels_Blocked), + ::testing::Values(ngraph::op::PadType::EXPLICIT) +); + +INSTANTIATE_TEST_CASE_P(smoke_Deconv_2D_1x1_FP32, DeconvolutionLayerCPUTest, + ::testing::Combine( + ::testing::Combine( + convParams_ExplicitPadding_1x1_2D, + ::testing::Values(Precision::FP32), + ::testing::Values(Precision::UNSPECIFIED), + ::testing::Values(Precision::UNSPECIFIED), + ::testing::Values(Layout::ANY), + ::testing::Values(Layout::ANY), + ::testing::Values(std::vector({ 2, 67, 7, 7 })), + ::testing::Values(CommonTestUtils::DEVICE_CPU)), + ::testing::ValuesIn(filterCPUInfoForDevice({conv_avx512_2D_1x1})), + ::testing::ValuesIn(fusingParamsSet), + ::testing::Values(cpuEmptyPluginConfig)), + DeconvolutionLayerCPUTest::getTestCaseName); + +INSTANTIATE_TEST_CASE_P(smoke_Deconv_2D_1x1_BF16, DeconvolutionLayerCPUTest, + ::testing::Combine( + ::testing::Combine( + convParams_ExplicitPadding_1x1_2D, + ::testing::Values(Precision::FP32), + ::testing::Values(Precision::BF16), + ::testing::Values(Precision::BF16), + ::testing::Values(Layout::ANY), + ::testing::Values(Layout::ANY), + ::testing::Values(std::vector({ 2, 67, 7, 7 })), + ::testing::Values(CommonTestUtils::DEVICE_CPU)), + ::testing::ValuesIn(filterCPUInfoForDevice({conv_avx512_2D_1x1})), + ::testing::ValuesIn(fusingParamsSet), + ::testing::Values(cpuBF16PluginConfig)), + DeconvolutionLayerCPUTest::getTestCaseName); + +/* ========= */ + +} // namespace +} // namespace CPULayerTestsDefinitions diff --git a/inference-engine/tests/functional/plugin/cpu/single_layer_tests/fake_quantize.cpp b/inference-engine/tests/functional/plugin/cpu/single_layer_tests/fake_quantize.cpp new file mode 100644 index 00000000000000..5ca327ff39d08f --- /dev/null +++ b/inference-engine/tests/functional/plugin/cpu/single_layer_tests/fake_quantize.cpp @@ -0,0 +1,288 @@ +// Copyright (C) 2021 Intel Corporation +// SPDX-License-Identifier: Apache-2.0 +// + +#include "shared_test_classes/base/layer_test_utils.hpp" +#include "test_utils/cpu_test_utils.hpp" +#include "ngraph_functions/builders.hpp" + +using namespace InferenceEngine; +using namespace ngraph; +using namespace CPUTestUtils; + +namespace CPULayerTestsDefinitions { + +using fqSpecificParams = std::tuple, // output low + std::vector, // output high + std::vector, // 'range' inputs shapes + size_t>; // levels + +using fqLayerTestParamsSet = std::tuple, std::vector>, // il and ih values + bool, // should be decomposed + CPUSpecificParams>; + +class FakeQuantizeLayerCPUTest : public testing::WithParamInterface, + virtual public LayerTestsUtils::LayerTestsCommon, public CPUTestsBase { +public: + static std::string getTestCaseName(testing::TestParamInfo obj) { + fqSpecificParams fqParams; + SizeVector inDataShape; + Precision inPrec; + std::pair, std::vector> inputRangesValues; + bool shouldBeDecomposed; + CPUSpecificParams cpuParams; + std::tie(fqParams, inDataShape, inPrec, inputRangesValues, shouldBeDecomposed, cpuParams) = obj.param; + + int64_t inDataLowBounds, inDataHighBounds; + std::vector inputLow, inputHigh, outputLow, outputHigh; + std::vector inRangesShapes; + size_t levels; + inputLow = inputRangesValues.first; + inputHigh = inputRangesValues.second; + std::tie(inDataLowBounds, inDataHighBounds, outputLow, outputHigh, inRangesShapes, levels) = fqParams; + + std::ostringstream result; + result << "IS=" << CommonTestUtils::vec2str(inDataShape) << "_"; + result << "inPrec=" << inPrec.name() << "_"; + + std::string rs = ""; + for (size_t i = 0; i < inRangesShapes.size(); i++) { + rs += CommonTestUtils::vec2str(inRangesShapes[i]) + "_"; + } + result << "RS=" << rs; + result << "LOW_BOUNDS=" << inDataLowBounds << "_"; + result << "HIGH_BOUNDS=" << inDataHighBounds << "_"; + result << "IL=" << CommonTestUtils::vec2str(inputLow) << "_"; + result << "IH=" << CommonTestUtils::vec2str(inputHigh) << "_"; + result << "OL=" << CommonTestUtils::vec2str(outputLow) << "_"; + result << "OH=" << CommonTestUtils::vec2str(outputHigh) << "_"; + result << "LEVELS=" << levels; + + result << CPUTestsBase::getTestCaseName(cpuParams); + + return result.str(); + } + + void Infer() override { + inferRequest = executableNetwork.CreateInferRequest(); + inputs.clear(); + + const InputsDataMap &inDataMap = cnnNetwork.getInputsInfo(); + auto input = inDataMap.begin(); + + Blob::Ptr blob = FuncTestUtils::createAndFillBlob(input->second->getTensorDesc(), inDataHighBounds - inDataLowBounds, inDataLowBounds); + inferRequest.SetBlob(input->second->name(), blob); + inputs.push_back(blob); + + inferRequest.Infer(); + } + +protected: + std::string layerName; + + void SetUp() override { + targetDevice = CommonTestUtils::DEVICE_CPU; + fqSpecificParams fqParams; + SizeVector inDataShape; + Precision inPrec; + std::pair, std::vector> inputRangesValues; + bool shouldBeDecomposed; + CPUSpecificParams cpuParams; + std::tie(fqParams, inDataShape, inPrec, inputRangesValues, shouldBeDecomposed, cpuParams) = this->GetParam(); + + std::tie(inFmts, outFmts, priority, selectedType) = cpuParams; + + std::vector inRangesShapes; + size_t levels; + std::vector> rangesBounds(RANGES_INPUT_NUMBER); + rangesBounds[0] = inputRangesValues.first; + rangesBounds[1] = inputRangesValues.second; + std::tie(inDataLowBounds, inDataHighBounds, rangesBounds[2], rangesBounds[3], inRangesShapes, levels) = fqParams; + + auto ngInPrec = FuncTestUtils::PrecisionUtils::convertIE2nGraphPrc(inPrec); + ParameterVector params = builder::makeParams(ngInPrec, {inDataShape}); + auto paramOuts = helpers::convert2OutputVector(helpers::castOps2Nodes(params)); + + auto il = builder::makeConstant(ngInPrec, inRangesShapes[0], rangesBounds[0], rangesBounds[0].empty()); + auto ih = builder::makeConstant(ngInPrec, inRangesShapes[1], rangesBounds[1], rangesBounds[1].empty()); + auto ol = builder::makeConstant(ngInPrec, inRangesShapes[2], rangesBounds[2], rangesBounds[2].empty()); + auto oh = builder::makeConstant(ngInPrec, inRangesShapes[3], rangesBounds[3], rangesBounds[3].empty()); + auto fq = std::make_shared(paramOuts[0], il, ih, ol, oh, levels); + + layerName = shouldBeDecomposed ? "" : "Quantize"; + + if (selectedType.empty()) { + selectedType = getPrimitiveType() + "_" + inPrec.name(); + } + + fq->get_rt_info() = getCPUInfo(); + + function = std::make_shared(fq, params, "FakeQuantizeCPU"); + } + +private: + const size_t RANGES_INPUT_NUMBER = 4; + + int64_t inDataLowBounds, inDataHighBounds; +}; + +TEST_P(FakeQuantizeLayerCPUTest, CompareWithRefs) { + Run(); + + CheckPluginRelatedResults(executableNetwork, layerName); +} + + +const std::vector levels = {16, 255, 256}; + +int64_t dataLowBounds{-10}, dataHighBounds{10}; + +const std::vector, std::vector>> input_ranges = { + {{0.0f}, {5.f}}, + {{-10.0f}, {-5.f}} +}; + +const std::vector outputLow{5.0f}, outputHigh{25.0f}; + +namespace fqImpl { + +std::vector memForm4D_jit = { + CPUSpecificParams({nchw}, {nchw}, {}, {}), + CPUSpecificParams({nhwc}, {nhwc}, {}, {}), + CPUSpecificParams({nChw16c}, {nChw16c}, {}, {}) +}; + +const std::vector> rangesShapes4D_jit = { + {{1, 5, 1, 1}, {1, 5, 1, 1}, {1, 5, 1, 1}, {1, 5, 1, 1}}, + {{1, 1, 1, 1}, {1, 1, 1, 1}, {1, 1, 1, 1}, {1, 1, 1, 1}} +}; + +const auto specificParams4D_jit = ::testing::Combine(::testing::Values(dataLowBounds), + ::testing::Values(dataHighBounds), + ::testing::Values(outputLow), + ::testing::Values(outputHigh), + ::testing::ValuesIn(rangesShapes4D_jit), + ::testing::ValuesIn(levels)); +const auto testParams4D_jit = ::testing::Combine(specificParams4D_jit, + ::testing::Values(SizeVector{4, 5, 6, 7}), + ::testing::Values(Precision::FP32), + ::testing::ValuesIn(input_ranges), + ::testing::Values(false), + ::testing::ValuesIn(filterCPUSpecificParams(memForm4D_jit))); +INSTANTIATE_TEST_CASE_P(smoke_FakeQuantizeLayerCPUTest_4D_jit, FakeQuantizeLayerCPUTest, testParams4D_jit, FakeQuantizeLayerCPUTest::getTestCaseName); + + +std::vector memForm4D_ref = { + CPUSpecificParams({nchw}, {nchw}, {"ref_FP32"}, {"ref_FP32"}) +}; + +const std::vector> rangesShapes4D_ref = { + {{4, 1, 1, 1}, {4, 1, 1, 1}, {4, 1, 1, 1}, {4, 1, 1, 1}} +}; + +const auto specificParams4D_ref = ::testing::Combine(::testing::Values(dataLowBounds), + ::testing::Values(dataHighBounds), + ::testing::Values(outputLow), + ::testing::Values(outputHigh), + ::testing::ValuesIn(rangesShapes4D_ref), + ::testing::ValuesIn(levels)); +const auto testParams4D_ref = ::testing::Combine(specificParams4D_ref, + ::testing::Values(SizeVector{4, 5, 6, 7}), + ::testing::Values(Precision::FP32), + ::testing::ValuesIn(input_ranges), + ::testing::Values(false), + ::testing::ValuesIn(memForm4D_ref)); +INSTANTIATE_TEST_CASE_P(smoke_FakeQuantizeLayerCPUTest_4D_ref, FakeQuantizeLayerCPUTest, testParams4D_ref, FakeQuantizeLayerCPUTest::getTestCaseName); + + +std::vector memForm5D_jit = { + CPUSpecificParams({ncdhw}, {ncdhw}, {}, {}), + CPUSpecificParams({ndhwc}, {ndhwc}, {}, {}), + CPUSpecificParams({nCdhw16c}, {nCdhw16c}, {}, {}) +}; + +const std::vector> rangesShapes5D_jit = { + {{1, 4, 1, 1, 1}, {1, 4, 1, 1, 1}, {1, 4, 1, 1, 1}, {1, 4, 1, 1, 1}}, + {{1, 1, 1, 1, 1}, {1, 1, 1, 1, 1}, {1, 1, 1, 1, 1}, {1, 1, 1, 1, 1}} +}; + +const auto specificParams5D_jit = ::testing::Combine(::testing::Values(dataLowBounds), + ::testing::Values(dataHighBounds), + ::testing::Values(outputLow), + ::testing::Values(outputHigh), + ::testing::ValuesIn(rangesShapes5D_jit), + ::testing::ValuesIn(levels)); +const auto testParams5D_jit = ::testing::Combine(specificParams5D_jit, + ::testing::Values(SizeVector{3, 4, 5, 6, 7}), + ::testing::Values(Precision::FP32), + ::testing::ValuesIn(input_ranges), + ::testing::Values(false), + ::testing::ValuesIn(filterCPUSpecificParams(memForm5D_jit))); + +INSTANTIATE_TEST_CASE_P(smoke_FakeQuantizeLayerCPUTest_5D_jit, FakeQuantizeLayerCPUTest, testParams5D_jit, FakeQuantizeLayerCPUTest::getTestCaseName); + + +std::vector memForm5D_ref = { + CPUSpecificParams({ncdhw}, {ncdhw}, {"ref_FP32"}, {"ref_FP32"}) +}; + +const std::vector> rangesShapes5D_ref = { + {{3, 1, 1, 1, 1}, {3, 1, 1, 1, 1}, {3, 1, 1, 1, 1}, {3, 1, 1, 1, 1}} +}; + +const auto specificParams5D_ref = ::testing::Combine(::testing::Values(dataLowBounds), + ::testing::Values(dataHighBounds), + ::testing::Values(outputLow), + ::testing::Values(outputHigh), + ::testing::ValuesIn(rangesShapes5D_ref), + ::testing::ValuesIn(levels)); +const auto testParams5D_ref = ::testing::Combine(specificParams5D_ref, + ::testing::Values(SizeVector{3, 4, 5, 6, 7}), + ::testing::Values(Precision::FP32), + ::testing::ValuesIn(input_ranges), + ::testing::Values(false), + ::testing::ValuesIn(memForm5D_ref)); + +INSTANTIATE_TEST_CASE_P(smoke_FakeQuantizeLayerCPUTest_5D_ref, FakeQuantizeLayerCPUTest, testParams5D_ref, FakeQuantizeLayerCPUTest::getTestCaseName); + +} // namespace fqImpl + +const std::vector dataShapes = { + {4, 5, 6, 7}, + {3, 4, 5, 6, 7}, + {2, 3, 4, 5, 6, 7}, +}; + +const std::vector> rangesShapes = { + {{4, 5, 6, 7}, {4, 5, 6, 7}, {4, 5, 6, 7}, {4, 5, 6, 7}}, + {{1, 5, 1, 1}, {1, 1, 6, 7}, {1, 1, 6, 7}, {1, 1, 6, 7}}, + {{1, 1, 6, 7}, {1, 1, 6, 7}, {1, 1, 6, 7}, {1, 1, 6, 7}}, + {{1, 1, 6, 7}, {1, 1, 6, 7}, {1, 1, 1, 1}, {1, 1, 1, 1}}, + {{1, 1, 6, 1}, {1, 5, 6, 7}, {1, 1, 6, 1}, {1, 1, 6, 1}} +}; + +namespace fqDecompos { + +const auto specificParams = ::testing::Combine(::testing::Values(dataLowBounds), + ::testing::Values(dataHighBounds), + ::testing::Values(outputLow), + ::testing::Values(outputHigh), + ::testing::ValuesIn(rangesShapes), + ::testing::ValuesIn(levels)); +const auto testParams = ::testing::Combine(specificParams, + ::testing::ValuesIn(dataShapes), + ::testing::Values(Precision::FP32), + ::testing::ValuesIn(input_ranges), + ::testing::Values(true), + ::testing::Values(CPUSpecificParams{})); + +INSTANTIATE_TEST_CASE_P(smoke_FakeQuantizeLayerCPUTest_Decompos, FakeQuantizeLayerCPUTest, testParams, FakeQuantizeLayerCPUTest::getTestCaseName); + +} // namespace fqDecompos + +} // namespace CPULayerTestsDefinitions \ No newline at end of file diff --git a/inference-engine/tests/functional/plugin/cpu/single_layer_tests/group_convolution_backprop_data.cpp b/inference-engine/tests/functional/plugin/cpu/single_layer_tests/group_convolution_backprop_data.cpp new file mode 100755 index 00000000000000..f4f512d8988c38 --- /dev/null +++ b/inference-engine/tests/functional/plugin/cpu/single_layer_tests/group_convolution_backprop_data.cpp @@ -0,0 +1,380 @@ +// Copyright (C) 2020 Intel Corporation +// SPDX-License-Identifier: Apache-2.0 +// + +#include +#include "test_utils/cpu_test_utils.hpp" +#include "test_utils/fusing_test_utils.hpp" + +using namespace InferenceEngine; +using namespace CPUTestUtils; + +namespace CPULayerTestsDefinitions { + +using groupConvBackpropDataLayerTestParamsSet = LayerTestsDefinitions::groupConvBackpropDataLayerTestParamsSet; +using groupConvBackpropDataSpecificParams = LayerTestsDefinitions::groupConvBackpropDataSpecificParams; + +typedef std::tuple< + groupConvBackpropDataLayerTestParamsSet, + CPUSpecificParams, + fusingSpecificParams, + std::map> groupDeconvLayerCPUTestParamsSet; + +class GroupDeconvolutionLayerCPUTest : public testing::WithParamInterface, + virtual public LayerTestsUtils::LayerTestsCommon, public CpuTestWithFusing { +public: + static std::string getTestCaseName(testing::TestParamInfo obj) { + groupConvBackpropDataLayerTestParamsSet basicParamsSet; + CPUSpecificParams cpuParams; + fusingSpecificParams fusingParams; + std::map additionalConfig; + std::tie(basicParamsSet, cpuParams, fusingParams, additionalConfig) = obj.param; + + std::ostringstream result; + result << LayerTestsDefinitions::GroupConvBackpropDataLayerTest::getTestCaseName(testing::TestParamInfo( + basicParamsSet, 0)); + + result << CPUTestsBase::getTestCaseName(cpuParams); + result << CpuTestWithFusing::getTestCaseName(fusingParams); + + if (!additionalConfig.empty()) { + result << "_PluginConf"; + for (auto& item : additionalConfig) { + result << "_" << item.first << "=" << item.second; + } + } + + return result.str(); + } + +protected: + void SetUp() { + groupConvBackpropDataLayerTestParamsSet basicParamsSet; + CPUSpecificParams cpuParams; + fusingSpecificParams fusingParams; + std::map additionalConfig; + std::tie(basicParamsSet, cpuParams, fusingParams, additionalConfig) = this->GetParam(); + + configuration.insert(additionalConfig.begin(), additionalConfig.end()); + + std::tie(inFmts, outFmts, priority, selectedType) = cpuParams; + std::tie(postOpMgrPtr, fusedOps) = fusingParams; + + groupConvBackpropDataSpecificParams groupConvParams; + std::vector inputShape; + auto netPrecision = InferenceEngine::Precision::UNSPECIFIED; + std::tie(groupConvParams, netPrecision, inPrc, outPrc, inLayout, outLayout, inputShape, targetDevice) = basicParamsSet; + + if (inPrc == Precision::UNSPECIFIED) { + selectedType += std::string("_") + Precision(Precision::FP32).name(); + } else { + selectedType += std::string("_") + inPrc.name(); + } + + ngraph::op::PadType padType; + InferenceEngine::SizeVector kernel, stride, dilation; + std::vector padBegin, padEnd; + size_t convOutChannels, numGroups; + std::tie(kernel, stride, padBegin, padEnd, dilation, convOutChannels, numGroups, padType) = groupConvParams; + + auto ngPrc = FuncTestUtils::PrecisionUtils::convertIE2nGraphPrc(netPrecision); + auto params = ngraph::builder::makeParams(ngPrc, {inputShape}); + auto paramOuts = ngraph::helpers::convert2OutputVector( + ngraph::helpers::castOps2Nodes(params)); + auto groupConv = std::dynamic_pointer_cast( + ngraph::builder::makeGroupConvolutionBackpropData(paramOuts[0], ngPrc, kernel, stride, padBegin, + padEnd, dilation, padType, convOutChannels, numGroups)); + function = makeNgraphFunction(ngPrc, params, groupConv, "groupConvolutionBackpropData"); + } +}; + +TEST_P(GroupDeconvolutionLayerCPUTest, CompareWithRefs) { + SKIP_IF_CURRENT_TEST_IS_DISABLED() + + Run(); + CheckPluginRelatedResults(executableNetwork, "Deconvolution"); +} + +namespace { + +/* GROUP CONV TEST UTILS */ +std::vector filterParamsSetForDevice(std::vector paramsSet) { + std::vector resParamsSet; + const int cpuParamsIndex = 1; + const int selectedTypeIndex = 3; + + for (auto param : paramsSet) { + auto cpuParams = std::get(param); + auto selectedTypeStr = std::get(cpuParams); + + if (selectedTypeStr.find("jit") != std::string::npos && !with_cpu_x86_sse42()) + continue; + if (selectedTypeStr.find("avx512") != std::string::npos && !with_cpu_x86_avx512f()) + continue; + + resParamsSet.push_back(param); + } + + return resParamsSet; +} +/* ===================== */ + +/* COMMON PARAMS */ +std::vector fusingParamsSet { + emptyFusingSpec, + fusingScaleShift, +}; +const std::map cpuEmptyPluginConfig; +const std::map cpuBF16PluginConfig = { { PluginConfigParams::KEY_ENFORCE_BF16, PluginConfigParams::YES } }; + +/* ============= GroupConvolution params (planar layout) ============= */ +const SizeVector numOutChannels_Planar = {6}; +const SizeVector numGroups_Planar = {2, 3}; + +/* ============= GroupConvolution params (blocked layout) ============= */ +const SizeVector numOutChannels_Blocked = {64}; +const SizeVector numGroups_Blocked = {2, 4}; + +/* ============= GroupConvolution params (DW) ============= */ +const SizeVector numOutChannels_DW = {32}; +const SizeVector numGroups_DW = {32}; + +/* ============= GroupConvolution params (2D) ============= */ +const std::vector kernels2d = {{3, 3}, {1, 1}}; +const std::vector strides2d = {{1, 1}, {2, 2}}; +const std::vector> padBegins2d = {{0, 0}}; +const std::vector> padEnds2d = {{0, 0}}; +const std::vector dilations2d = {{1, 1}}; + +/* ============= GroupConvolution params (3D) ============= */ +const std::vector kernels3d = {{3, 3, 3}, {1, 1, 1}}; +const std::vector strides3d = {{1, 1, 1}, {2, 2, 2}}; +const std::vector> padBegins3d = {{0, 0, 0}}; +const std::vector> padEnds3d = {{0, 0, 0}}; +const std::vector dilations3d = {{1, 1, 1}}; +/* ============= */ + + +/* INSTANCES */ +/* ============= GroupConvolution (Planar 2D) ============= */ +const auto groupConvParams_ExplicitPadding_Planar_2D = ::testing::Combine( + ::testing::ValuesIn(kernels2d), + ::testing::ValuesIn(strides2d), + ::testing::ValuesIn(padBegins2d), + ::testing::ValuesIn(padEnds2d), + ::testing::ValuesIn(dilations2d), + ::testing::ValuesIn(numOutChannels_Planar), + ::testing::ValuesIn(numGroups_Planar), + ::testing::Values(ngraph::op::PadType::EXPLICIT) +); + +INSTANTIATE_TEST_CASE_P(smoke_GroupDeconv_2D_Planar_FP32, GroupDeconvolutionLayerCPUTest, + ::testing::Combine( + ::testing::Combine( + groupConvParams_ExplicitPadding_Planar_2D, + ::testing::Values(Precision::FP32), + ::testing::Values(InferenceEngine::Precision::UNSPECIFIED), + ::testing::Values(InferenceEngine::Precision::UNSPECIFIED), + ::testing::Values(InferenceEngine::Layout::ANY), + ::testing::Values(InferenceEngine::Layout::ANY), + ::testing::Values(std::vector({ 2, 12, 7, 7 })), + ::testing::Values(CommonTestUtils::DEVICE_CPU)), + ::testing::ValuesIn(filterCPUInfoForDevice({conv_gemm_2D})), + ::testing::ValuesIn(fusingParamsSet), + ::testing::Values(cpuEmptyPluginConfig)), + GroupDeconvolutionLayerCPUTest::getTestCaseName); + +INSTANTIATE_TEST_CASE_P(smoke_GroupDeconv_2D_Planar_BF16, GroupDeconvolutionLayerCPUTest, + ::testing::Combine( + ::testing::Combine( + groupConvParams_ExplicitPadding_Planar_2D, + ::testing::Values(Precision::FP32), + ::testing::Values(InferenceEngine::Precision::BF16), + ::testing::Values(InferenceEngine::Precision::BF16), + ::testing::Values(InferenceEngine::Layout::ANY), + ::testing::Values(InferenceEngine::Layout::ANY), + ::testing::Values(std::vector({ 2, 12, 7, 7 })), + ::testing::Values(CommonTestUtils::DEVICE_CPU)), + ::testing::ValuesIn(filterCPUInfoForDevice({conv_gemm_2D})), + ::testing::ValuesIn(fusingParamsSet), + ::testing::Values(cpuBF16PluginConfig)), + GroupDeconvolutionLayerCPUTest::getTestCaseName); + +/* ============= GroupConvolution (Planar 3D) ============= */ +const auto groupConvParams_ExplicitPadding_Planar_3D = ::testing::Combine( + ::testing::ValuesIn(kernels3d), + ::testing::ValuesIn(strides3d), + ::testing::ValuesIn(padBegins3d), + ::testing::ValuesIn(padEnds3d), + ::testing::ValuesIn(dilations3d), + ::testing::ValuesIn(numOutChannels_Planar), + ::testing::ValuesIn(numGroups_Planar), + ::testing::Values(ngraph::op::PadType::EXPLICIT) +); + +INSTANTIATE_TEST_CASE_P(smoke_GroupDeconv_3D_Planar_FP32, GroupDeconvolutionLayerCPUTest, + ::testing::Combine( + ::testing::Combine( + groupConvParams_ExplicitPadding_Planar_3D, + ::testing::Values(Precision::FP32), + ::testing::Values(InferenceEngine::Precision::UNSPECIFIED), + ::testing::Values(InferenceEngine::Precision::UNSPECIFIED), + ::testing::Values(InferenceEngine::Layout::ANY), + ::testing::Values(InferenceEngine::Layout::ANY), + ::testing::Values(std::vector({ 2, 12, 7, 7, 7 })), + ::testing::Values(CommonTestUtils::DEVICE_CPU)), + ::testing::ValuesIn(filterCPUInfoForDevice({conv_gemm_3D})), + ::testing::ValuesIn(fusingParamsSet), + ::testing::Values(cpuEmptyPluginConfig)), + GroupDeconvolutionLayerCPUTest::getTestCaseName); + +INSTANTIATE_TEST_CASE_P(smoke_GroupDeconv_3D_Planar_BF16, GroupDeconvolutionLayerCPUTest, + ::testing::Combine( + ::testing::Combine( + groupConvParams_ExplicitPadding_Planar_3D, + ::testing::Values(Precision::FP32), + ::testing::Values(InferenceEngine::Precision::BF16), + ::testing::Values(InferenceEngine::Precision::BF16), + ::testing::Values(InferenceEngine::Layout::ANY), + ::testing::Values(InferenceEngine::Layout::ANY), + ::testing::Values(std::vector({ 2, 12, 7, 7, 7 })), + ::testing::Values(CommonTestUtils::DEVICE_CPU)), + ::testing::ValuesIn(filterCPUInfoForDevice({conv_gemm_3D})), + ::testing::ValuesIn(fusingParamsSet), + ::testing::Values(cpuBF16PluginConfig)), + GroupDeconvolutionLayerCPUTest::getTestCaseName); + +/* ============= GroupConvolution (Blocked 2D) ============= */ +const auto groupConvParams_ExplicitPadding_Blocked_2D = ::testing::Combine( + ::testing::ValuesIn(kernels2d), + ::testing::ValuesIn(strides2d), + ::testing::ValuesIn(padBegins2d), + ::testing::ValuesIn(padEnds2d), + ::testing::ValuesIn(dilations2d), + ::testing::ValuesIn(numOutChannels_Blocked), + ::testing::ValuesIn(numGroups_Blocked), + ::testing::Values(ngraph::op::PadType::EXPLICIT) +); + +INSTANTIATE_TEST_CASE_P(smoke_GroupDeconv_2D_Blocked_FP32, GroupDeconvolutionLayerCPUTest, + ::testing::Combine( + ::testing::Combine( + groupConvParams_ExplicitPadding_Blocked_2D, + ::testing::Values(Precision::FP32), + ::testing::Values(InferenceEngine::Precision::UNSPECIFIED), + ::testing::Values(InferenceEngine::Precision::UNSPECIFIED), + ::testing::Values(InferenceEngine::Layout::ANY), + ::testing::Values(InferenceEngine::Layout::ANY), + ::testing::Values(std::vector({ 2, 64, 7, 7 })), + ::testing::Values(CommonTestUtils::DEVICE_CPU)), + ::testing::ValuesIn(filterCPUInfoForDevice({conv_avx512_2D})), + ::testing::ValuesIn(fusingParamsSet), + ::testing::Values(cpuEmptyPluginConfig)), + GroupDeconvolutionLayerCPUTest::getTestCaseName); + +INSTANTIATE_TEST_CASE_P(smoke_GroupDeconv_2D_Blocked_BF16, GroupDeconvolutionLayerCPUTest, + ::testing::Combine( + ::testing::Combine( + groupConvParams_ExplicitPadding_Blocked_2D, + ::testing::Values(Precision::FP32), + ::testing::Values(InferenceEngine::Precision::BF16), + ::testing::Values(InferenceEngine::Precision::BF16), + ::testing::Values(InferenceEngine::Layout::ANY), + ::testing::Values(InferenceEngine::Layout::ANY), + ::testing::Values(std::vector({ 2, 64, 7, 7 })), + ::testing::Values(CommonTestUtils::DEVICE_CPU)), + ::testing::ValuesIn(filterCPUInfoForDevice({conv_avx512_2D})), + ::testing::ValuesIn(fusingParamsSet), + ::testing::Values(cpuBF16PluginConfig)), + GroupDeconvolutionLayerCPUTest::getTestCaseName); + +/* ============= GroupConvolution (Blocked 3D) ============= */ +const auto groupConvParams_ExplicitPadding_Blocked_3D = ::testing::Combine( + ::testing::ValuesIn(kernels3d), + ::testing::ValuesIn(strides3d), + ::testing::ValuesIn(padBegins3d), + ::testing::ValuesIn(padEnds3d), + ::testing::ValuesIn(dilations3d), + ::testing::ValuesIn(numOutChannels_Blocked), + ::testing::ValuesIn(numGroups_Blocked), + ::testing::Values(ngraph::op::PadType::EXPLICIT) +); + +INSTANTIATE_TEST_CASE_P(smoke_GroupDeconv_3D_Blocked_FP32, GroupDeconvolutionLayerCPUTest, + ::testing::Combine( + ::testing::Combine( + groupConvParams_ExplicitPadding_Blocked_3D, + ::testing::Values(Precision::FP32), + ::testing::Values(InferenceEngine::Precision::UNSPECIFIED), + ::testing::Values(InferenceEngine::Precision::UNSPECIFIED), + ::testing::Values(InferenceEngine::Layout::ANY), + ::testing::Values(InferenceEngine::Layout::ANY), + ::testing::Values(std::vector({ 2, 64, 7, 7, 7 })), + ::testing::Values(CommonTestUtils::DEVICE_CPU)), + ::testing::ValuesIn(filterCPUInfoForDevice({conv_avx512_3D})), + ::testing::ValuesIn(fusingParamsSet), + ::testing::Values(cpuEmptyPluginConfig)), + GroupDeconvolutionLayerCPUTest::getTestCaseName); + +INSTANTIATE_TEST_CASE_P(smoke_GroupDeconv_3D_Blocked_BF16, GroupDeconvolutionLayerCPUTest, + ::testing::Combine( + ::testing::Combine( + groupConvParams_ExplicitPadding_Blocked_3D, + ::testing::Values(Precision::FP32), + ::testing::Values(InferenceEngine::Precision::BF16), + ::testing::Values(InferenceEngine::Precision::BF16), + ::testing::Values(InferenceEngine::Layout::ANY), + ::testing::Values(InferenceEngine::Layout::ANY), + ::testing::Values(std::vector({ 2, 64, 7, 7, 7 })), + ::testing::Values(CommonTestUtils::DEVICE_CPU)), + ::testing::ValuesIn(filterCPUInfoForDevice({conv_avx512_3D})), + ::testing::ValuesIn(fusingParamsSet), + ::testing::Values(cpuBF16PluginConfig)), + GroupDeconvolutionLayerCPUTest::getTestCaseName); + +/* ============= GroupConvolution (DW 2D) ============= */ +const auto groupConvParams_ExplicitPadding_DW_2D = ::testing::Combine( + ::testing::ValuesIn(kernels2d), + ::testing::ValuesIn(strides2d), + ::testing::ValuesIn(padBegins2d), + ::testing::ValuesIn(padEnds2d), + ::testing::ValuesIn(dilations2d), + ::testing::ValuesIn(numOutChannels_DW), + ::testing::ValuesIn(numGroups_DW), + ::testing::Values(ngraph::op::PadType::EXPLICIT) +); + +INSTANTIATE_TEST_CASE_P(smoke_GroupDeconv_2D_DW_FP32, GroupDeconvolutionLayerCPUTest, + ::testing::Combine( + ::testing::Combine( + groupConvParams_ExplicitPadding_DW_2D, + ::testing::Values(Precision::FP32), + ::testing::Values(InferenceEngine::Precision::UNSPECIFIED), + ::testing::Values(InferenceEngine::Precision::UNSPECIFIED), + ::testing::Values(InferenceEngine::Layout::ANY), + ::testing::Values(InferenceEngine::Layout::ANY), + ::testing::Values(std::vector({ 2, 32, 7, 7 })), + ::testing::Values(CommonTestUtils::DEVICE_CPU)), + ::testing::ValuesIn(filterCPUInfoForDevice({conv_avx512_dw_2D})), + ::testing::ValuesIn(fusingParamsSet), + ::testing::Values(cpuEmptyPluginConfig)), + GroupDeconvolutionLayerCPUTest::getTestCaseName); + +INSTANTIATE_TEST_CASE_P(smoke_GroupDeconv_2D_DW_BF16, GroupDeconvolutionLayerCPUTest, + ::testing::Combine( + ::testing::Combine( + groupConvParams_ExplicitPadding_DW_2D, + ::testing::Values(Precision::FP32), + ::testing::Values(InferenceEngine::Precision::BF16), + ::testing::Values(InferenceEngine::Precision::BF16), + ::testing::Values(InferenceEngine::Layout::ANY), + ::testing::Values(InferenceEngine::Layout::ANY), + ::testing::Values(std::vector({ 2, 32, 7, 7 })), + ::testing::Values(CommonTestUtils::DEVICE_CPU)), + ::testing::ValuesIn(filterCPUInfoForDevice({conv_avx512_dw_2D})), + ::testing::ValuesIn(fusingParamsSet), + ::testing::Values(cpuBF16PluginConfig)), + GroupDeconvolutionLayerCPUTest::getTestCaseName); +} // namespace + +} // namespace CPULayerTestsDefinitions diff --git a/inference-engine/tests/functional/plugin/cpu/single_layer_tests/interpolate.cpp b/inference-engine/tests/functional/plugin/cpu/single_layer_tests/interpolate.cpp index 0293e4b5adf34a..fc037336aeddb5 100644 --- a/inference-engine/tests/functional/plugin/cpu/single_layer_tests/interpolate.cpp +++ b/inference-engine/tests/functional/plugin/cpu/single_layer_tests/interpolate.cpp @@ -287,6 +287,97 @@ INSTANTIATE_TEST_CASE_P(smoke_Interpolate_Enforced_Bf16_Layout_Test, Interpolate ::testing::ValuesIn(filterCPUInfoForDevice())), InterpolateLayerCPUTest::getTestCaseName); + +////////////////////////5D///////////////////////////// +std::vector filterCPUInfoForDevice5D() { + std::vector resCPUParams; + if (with_cpu_x86_avx512f()) { + resCPUParams.push_back(CPUSpecificParams{{nCdhw16c, x, x}, {nCdhw16c}, {"jit_avx512"}, "jit_avx512_FP32"}); + resCPUParams.push_back(CPUSpecificParams{{ndhwc, x, x}, {ndhwc}, {"jit_avx512"}, "jit_avx512_FP32"}); + resCPUParams.push_back(CPUSpecificParams{{ncdhw, x, x}, {ncdhw}, {"jit_avx512"}, "jit_avx512_FP32"}); + } else if (with_cpu_x86_avx2()) { + resCPUParams.push_back(CPUSpecificParams{{nCdhw8c, x, x}, {nCdhw8c}, {"jit_avx2"}, "jit_avx2_FP32"}); + resCPUParams.push_back(CPUSpecificParams{{ndhwc, x, x}, {ndhwc}, {"jit_avx2"}, "jit_avx2_FP32"}); + resCPUParams.push_back(CPUSpecificParams{{ncdhw, x, x}, {ncdhw}, {"jit_avx2"}, "jit_avx2_FP32"}); + } else if (with_cpu_x86_sse42()) { + resCPUParams.push_back(CPUSpecificParams{{nCdhw8c, x, x}, {nCdhw8c}, {"jit_sse42"}, "jit_sse42_FP32"}); + resCPUParams.push_back(CPUSpecificParams{{ndhwc, x, x}, {ndhwc}, {"jit_sse42"}, "jit_sse42_FP32"}); + resCPUParams.push_back(CPUSpecificParams{{ncdhw, x, x}, {ncdhw}, {"jit_sse42"}, "jit_sse42_FP32"}); + } else { + resCPUParams.push_back(CPUSpecificParams{{ncdhw, x, x}, {ncdhw}, {"ref"}, "ref_FP32"}); + } + return resCPUParams; +} + +const std::vector> pads5D = { + {0, 0, 0, 0, 0} +}; + +const std::vector> defaultAxes5D = { + {2, 3, 4} +}; + +const std::vector> defaultScales5D = { + {1.25f, 1.5f, 1.5f} +}; + +const auto interpolateCasesLinearOnnx5D = ::testing::Combine( + ::testing::Values(ngraph::op::v4::Interpolate::InterpolateMode::linear_onnx), + ::testing::ValuesIn(shapeCalculationMode), + ::testing::ValuesIn(coordinateTransformModes), + ::testing::ValuesIn(nearestModes), + ::testing::ValuesIn(antialias), + ::testing::ValuesIn(pads5D), + ::testing::ValuesIn(pads5D), + ::testing::ValuesIn(cubeCoefs), + ::testing::ValuesIn(defaultAxes5D), + ::testing::ValuesIn(defaultScales5D)); + +const auto interpolateCasesNN5D = ::testing::Combine( + ::testing::Values(ngraph::op::v4::Interpolate::InterpolateMode::linear_onnx), + ::testing::ValuesIn(shapeCalculationMode), + ::testing::ValuesIn(coordinateTransformModes), + ::testing::ValuesIn(defNearestModes), + ::testing::ValuesIn(antialias), + ::testing::ValuesIn(pads5D), + ::testing::ValuesIn(pads5D), + ::testing::ValuesIn(cubeCoefs), + ::testing::ValuesIn(defaultAxes5D), + ::testing::ValuesIn(defaultScales5D)); + +// open when ref merged +// INSTANTIATE_TEST_CASE_P(smoke_InterpolateLinearOnnx5D_Layout_Test, InterpolateLayerCPUTest, +// ::testing::Combine( +// ::testing::Combine( +// interpolateCasesLinearOnnx5D, +// ::testing::ValuesIn(netPrecisions), +// ::testing::Values(InferenceEngine::Precision::UNSPECIFIED), +// ::testing::Values(InferenceEngine::Precision::UNSPECIFIED), +// ::testing::Values(InferenceEngine::Layout::ANY), +// ::testing::Values(InferenceEngine::Layout::ANY), +// ::testing::Values(std::vector({1, 21, 4, 10, 10})), +// ::testing::Values(std::vector({1, 21, 5, 15, 15})), +// ::testing::Values(CommonTestUtils::DEVICE_CPU)), +// ::testing::Values(std::map {}), +// ::testing::ValuesIn(filterCPUInfoForDevice5D())), +// InterpolateLayerCPUTest::getTestCaseName); + +// INSTANTIATE_TEST_CASE_P(smoke_InterpolateNN5D_Layout_Test, InterpolateLayerCPUTest, +// ::testing::Combine( +// ::testing::Combine( +// interpolateCasesNN5D, +// ::testing::ValuesIn(netPrecisions), +// ::testing::Values(InferenceEngine::Precision::UNSPECIFIED), +// ::testing::Values(InferenceEngine::Precision::UNSPECIFIED), +// ::testing::Values(InferenceEngine::Layout::ANY), +// ::testing::Values(InferenceEngine::Layout::ANY), +// ::testing::Values(std::vector({1, 21, 4, 10, 10})), +// ::testing::Values(std::vector({1, 21, 5, 15, 15})), +// ::testing::Values(CommonTestUtils::DEVICE_CPU)), +// ::testing::Values(std::map {}), +// ::testing::ValuesIn(filterCPUInfoForDevice5D())), +// InterpolateLayerCPUTest::getTestCaseName); + } // namespace } // namespace CPULayerTestsDefinitions diff --git a/inference-engine/tests/functional/plugin/cpu/single_layer_tests/mvn.cpp b/inference-engine/tests/functional/plugin/cpu/single_layer_tests/mvn.cpp index 6ec277bab3d628..6a74e5fc3d8b24 100644 --- a/inference-engine/tests/functional/plugin/cpu/single_layer_tests/mvn.cpp +++ b/inference-engine/tests/functional/plugin/cpu/single_layer_tests/mvn.cpp @@ -85,6 +85,10 @@ const std::vector> inputShapes_4D = { {2, 19, 5, 10}, {7, 32, 2, 8}, {5, 8, 3, 5}, + {1, 2, 7, 5}, + {1, 4, 5, 5}, + {1, 7, 3, 5}, + {1, 15, 9, 5}, {4, 41, 6, 9} }; @@ -113,11 +117,13 @@ const std::vector epsilon = { std::vector inpOutPrc = {Precision::BF16, Precision::FP32}; std::vector cpuParams_4D = { + CPUSpecificParams({nhwc}, {nhwc}, {}, {}), CPUSpecificParams({nChw16c}, {nChw16c}, {}, {}), CPUSpecificParams({nchw}, {nchw}, {}, {}) }; std::vector cpuParams_5D = { + CPUSpecificParams({ndhwc}, {ndhwc}, {}, {}), CPUSpecificParams({nCdhw16c}, {nCdhw16c}, {}, {}), CPUSpecificParams({ncdhw}, {ncdhw}, {}, {}) }; @@ -150,36 +156,6 @@ const auto Mvn4D = ::testing::Combine( INSTANTIATE_TEST_CASE_P(smoke_CompareWithRefs_4D, MvnLayerCPUTest, Mvn4D, MvnLayerCPUTest::getTestCaseName); - -const auto MvnNHWC = ::testing::Combine( - ::testing::Combine( - ::testing::ValuesIn(inputShapes_4D), - ::testing::Values(InferenceEngine::Precision::FP32), - ::testing::Values(false), - ::testing::Values(true), - ::testing::ValuesIn(epsilon), - ::testing::Values(CommonTestUtils::DEVICE_CPU)), - ::testing::Values(CPUSpecificParams({nhwc}, {nhwc}, {}, {})), - ::testing::ValuesIn(inpOutPrc), - ::testing::ValuesIn(inpOutPrc)); - -INSTANTIATE_TEST_CASE_P(smoke_CompareWithRefs_4D_NHWC, MvnLayerCPUTest, MvnNHWC, MvnLayerCPUTest::getTestCaseName); - -const auto MvnNDHWC = ::testing::Combine( - ::testing::Combine( - ::testing::ValuesIn(inputShapes_5D), - ::testing::Values(InferenceEngine::Precision::FP32), - ::testing::Values(false), - ::testing::Values(true), - ::testing::ValuesIn(epsilon), - ::testing::Values(CommonTestUtils::DEVICE_CPU)), - ::testing::Values(CPUSpecificParams({ndhwc}, {ndhwc}, {}, {})), - ::testing::ValuesIn(inpOutPrc), - ::testing::ValuesIn(inpOutPrc)); - -INSTANTIATE_TEST_CASE_P(smoke_CompareWithRefs_5D_NDHWC, MvnLayerCPUTest, MvnNDHWC, MvnLayerCPUTest::getTestCaseName); - - const auto Mvn5D = ::testing::Combine( ::testing::Combine( ::testing::ValuesIn(inputShapes_5D), @@ -192,7 +168,6 @@ const auto Mvn5D = ::testing::Combine( ::testing::ValuesIn(inpOutPrc), ::testing::ValuesIn(inpOutPrc)); - INSTANTIATE_TEST_CASE_P(smoke_CompareWithRefs_5D, MvnLayerCPUTest, Mvn5D, MvnLayerCPUTest::getTestCaseName); diff --git a/inference-engine/tests/functional/plugin/cpu/single_layer_tests/normalize.cpp b/inference-engine/tests/functional/plugin/cpu/single_layer_tests/normalize.cpp index b13b19e6e22279..ac386aae715b67 100755 --- a/inference-engine/tests/functional/plugin/cpu/single_layer_tests/normalize.cpp +++ b/inference-engine/tests/functional/plugin/cpu/single_layer_tests/normalize.cpp @@ -1,70 +1,67 @@ -// Copyright (C) 2020 Intel Corporation +// Copyright (C) 2020-2021 Intel Corporation // SPDX-License-Identifier: Apache-2.0 // #include +#include "test_utils/fusing_test_utils.hpp" #include "ngraph_functions/builders.hpp" -#include "test_utils/cpu_test_utils.hpp" +using namespace ngraph; using namespace InferenceEngine; using namespace CPUTestUtils; +using namespace LayerTestsDefinitions; namespace CPULayerTestsDefinitions { -typedef std::tuple< - LayerTestsDefinitions::NormalizeL2LayerTestParams, - CPUSpecificParams> -NormalizeL2LayerCPUTestParamSet; +using NormalizeL2LayerCPUTestParamSet = std::tuple; class NormalizeL2LayerCPUTest : public testing::WithParamInterface, - virtual public LayerTestsUtils::LayerTestsCommon, public CPUTestsBase { + virtual public LayerTestsUtils::LayerTestsCommon, public CpuTestWithFusing { public: static std::string getTestCaseName(testing::TestParamInfo obj) { - LayerTestsDefinitions::NormalizeL2LayerTestParams basicParamsSet; + NormalizeL2LayerTestParams basicParamsSet; CPUSpecificParams cpuParams; - std::tie(basicParamsSet, cpuParams) = obj.param; + fusingSpecificParams fusingParams; + std::tie(basicParamsSet, cpuParams, fusingParams) = obj.param; std::ostringstream result; - result << LayerTestsDefinitions::NormalizeL2LayerTest::getTestCaseName(testing::TestParamInfo( - basicParamsSet, 0)); - + result << NormalizeL2LayerTest::getTestCaseName(testing::TestParamInfo(basicParamsSet, 0)); result << CPUTestsBase::getTestCaseName(cpuParams); + result << CpuTestWithFusing::getTestCaseName(fusingParams); return result.str(); } + protected: void SetUp() override { - LayerTestsDefinitions::NormalizeL2LayerTestParams basicParamsSet; + NormalizeL2LayerTestParams basicParamsSet; CPUSpecificParams cpuParams; - std::tie(basicParamsSet, cpuParams) = this->GetParam(); + fusingSpecificParams fusingParams; + std::tie(basicParamsSet, cpuParams, fusingParams) = this->GetParam(); std::tie(inFmts, outFmts, priority, selectedType) = cpuParams; + std::tie(postOpMgrPtr, fusedOps) = fusingParams; std::vector axes; float eps; - ngraph::op::EpsMode eps_mode; - InferenceEngine::SizeVector inputShapes; - InferenceEngine::Precision netPrecision; + op::EpsMode eps_mode; + SizeVector inputShapes; + Precision netPrecision; std::tie(axes, eps, eps_mode, inputShapes, netPrecision, targetDevice) = basicParamsSet; + inPrc = outPrc = netPrecision; auto netPrc = FuncTestUtils::PrecisionUtils::convertIE2nGraphPrc(netPrecision); - auto param = ngraph::builder::makeParams(netPrc, {inputShapes}); - auto paramOuts = ngraph::helpers::convert2OutputVector(ngraph::helpers::castOps2Nodes(param)); - auto normalize_l2 = ngraph::builder::makeNormalizeL2(paramOuts[0], axes, eps, eps_mode); - - ngraph::ResultVector results{std::make_shared(normalize_l2)}; + auto params = builder::makeParams(netPrc, {inputShapes}); + auto paramOuts = helpers::convert2OutputVector(helpers::castOps2Nodes(params)); + auto normalize = builder::makeNormalizeL2(paramOuts[0], axes, eps, eps_mode); - if (Precision::BF16 == netPrecision) { - selectedType = "unknown_BF16"; - } else if (Precision::FP32 == netPrecision) { - selectedType = "unknown_FP32"; - } + function = makeNgraphFunction(netPrc, params, normalize, "Normalize"); + selectedType = "unknown_" + std::string(netPrecision.name()); threshold = 0.015f; - - normalize_l2->get_rt_info() = getCPUInfo(); - - function = std::make_shared(results, param, "Normalize"); + checkFusingPosition = false; } }; @@ -77,53 +74,123 @@ TEST_P(NormalizeL2LayerCPUTest, CompareWithRefs) { namespace { -const std::vector> axes = { - {}, - {1}, +/* ============= Common params ============= */ +const auto fusingMultiplySharedChannel = fusingSpecificParams{std::make_shared(std::vector{ + {[](std::shared_ptr inpNode, const ngraph::element::Type& ngPrc, ngraph::ParameterVector& params){ + SizeVector secondMultInShape(1, 1); + auto secondMultInput = builder::makeConstant(ngPrc, Shape(secondMultInShape), std::vector{}, true); + return std::make_shared(inpNode, secondMultInput); + }, "Multiply(SharedChannel)"}}), {"Multiply"}}; + +const auto fusingMultiplyNoSharedChannel = fusingSpecificParams{std::make_shared(std::vector{ + {[](std::shared_ptr inpNode, const ngraph::element::Type& ngPrc, ngraph::ParameterVector& params){ + SizeVector secondMultInShape(inpNode->get_shape().size(), 1); + secondMultInShape[1] = inpNode->get_shape()[1]; + auto secondMultInput = builder::makeConstant(ngPrc, Shape(secondMultInShape), std::vector{}, true); + return std::make_shared(inpNode, secondMultInput); + }, "Multiply(NoSharedChannel)"}}), {"Multiply"}}; + +std::vector fusingParamsSet { + emptyFusingSpec, + fusingMultiplySharedChannel, + fusingMultiplyNoSharedChannel }; -const std::vector eps = { 1e-4f }; -const std::vector epsMode = { - ngraph::op::EpsMode::ADD, - ngraph::op::EpsMode::MAX, +const float epsilon = 1e-4f; +const op::EpsMode epsMode = op::EpsMode::ADD; +const std::vector netPrecisions = { + Precision::FP32, + Precision::BF16 }; -std::vector cpuParams_4D = { - CPUSpecificParams({nChw16c}, {nChw16c}, {}, {}), - CPUSpecificParams({nhwc}, {nhwc}, {}, {}), - CPUSpecificParams({nchw}, {nchw}, {}, {}) +/* ============= 2D ============= */ +const std::vector> inputShape_2D = { + {2, 3}, + {2, 16}, + {3, 20} }; +const std::vector> axes_2D = { + {1} +}; -const std::vector netPrecisions = { - Precision::FP32, - Precision::BF16 +const auto normalizeParams_2D = ::testing::Combine(::testing::ValuesIn(axes_2D), + ::testing::Values(epsilon), + ::testing::Values(epsMode), + ::testing::ValuesIn(inputShape_2D), + ::testing::ValuesIn(netPrecisions), + ::testing::Values(CommonTestUtils::DEVICE_CPU)); + +const auto testParams_2D = ::testing::Combine(normalizeParams_2D, + ::testing::Values(CPUSpecificParams{}), + ::testing::ValuesIn(fusingParamsSet)); + +INSTANTIATE_TEST_CASE_P(smoke_2D, NormalizeL2LayerCPUTest, testParams_2D, NormalizeL2LayerCPUTest::getTestCaseName); + +/* ============= 3D ============= */ +const std::vector> inputShape_3D = { + {2, 3, 4}, + {2, 16, 6}, + {3, 20, 10} +}; + +const std::vector> axes_3D = { + {1, 2}, + {1} +}; + +const auto normalizeParams_3D = ::testing::Combine(::testing::ValuesIn(axes_3D), + ::testing::Values(epsilon), + ::testing::Values(epsMode), + ::testing::ValuesIn(inputShape_3D), + ::testing::ValuesIn(netPrecisions), + ::testing::Values(CommonTestUtils::DEVICE_CPU)); + +const auto testParams_3D = ::testing::Combine(normalizeParams_3D, + ::testing::Values(CPUSpecificParams{}), + ::testing::ValuesIn(fusingParamsSet)); + +INSTANTIATE_TEST_CASE_P(smoke_3D, NormalizeL2LayerCPUTest, testParams_3D, NormalizeL2LayerCPUTest::getTestCaseName); + +/* ============= 4D ============= */ +const std::vector> inputShape_4D = { + {2, 3, 4, 4}, + {2, 16, 7, 6}, + {3, 20, 2, 10} }; -const auto NormalizeL23D = testing::Combine( - testing::Combine( - testing::ValuesIn(axes), - testing::ValuesIn(eps), - testing::ValuesIn(epsMode), - testing::Values(std::vector{1, 32, 17}), - testing::ValuesIn(netPrecisions), - testing::Values(CommonTestUtils::DEVICE_CPU)), - testing::Values(emptyCPUSpec)); +const std::vector> axes_4D = { + {1, 2, 3}, + {1} +}; -INSTANTIATE_TEST_CASE_P(smoke_NormalizeL2CompareWithRefs_3D, NormalizeL2LayerCPUTest, NormalizeL23D, NormalizeL2LayerCPUTest::getTestCaseName); +std::vector getCPUSpecificParams() { + std::vector result; + result.push_back(CPUSpecificParams({nchw}, {nchw}, {}, {})); + if (with_cpu_x86_sse42()) { + result.push_back(CPUSpecificParams({nhwc}, {nhwc}, {}, {})); + if (with_cpu_x86_avx512f()) { + result.push_back(CPUSpecificParams({nChw16c}, {nChw16c}, {}, {})); + } else if (with_cpu_x86_avx2()) { + result.push_back(CPUSpecificParams({nChw8c}, {nChw8c}, {}, {})); + } + } + return result; +} -const auto NormalizeL24D = testing::Combine( - testing::Combine( - testing::ValuesIn(axes), - testing::ValuesIn(eps), - testing::ValuesIn(epsMode), - testing::Values(std::vector{1, 3, 10, 5}), - testing::ValuesIn(netPrecisions), - testing::Values(CommonTestUtils::DEVICE_CPU)), - testing::ValuesIn(filterCPUSpecificParams(cpuParams_4D))); +const auto normalizeParams_4D = ::testing::Combine(::testing::ValuesIn(axes_4D), + ::testing::Values(epsilon), + ::testing::Values(epsMode), + ::testing::ValuesIn(inputShape_4D), + ::testing::ValuesIn(netPrecisions), + ::testing::Values(CommonTestUtils::DEVICE_CPU)); -INSTANTIATE_TEST_CASE_P(smoke_NormalizeL2CompareWithRefs_4D, NormalizeL2LayerCPUTest, NormalizeL24D, NormalizeL2LayerCPUTest::getTestCaseName); +const auto testParams_4D = ::testing::Combine(normalizeParams_4D, + ::testing::ValuesIn(getCPUSpecificParams()), + ::testing::ValuesIn(fusingParamsSet)); +INSTANTIATE_TEST_CASE_P(smoke_4D, NormalizeL2LayerCPUTest, testParams_4D, NormalizeL2LayerCPUTest::getTestCaseName); } // namespace + } // namespace CPULayerTestsDefinitions diff --git a/inference-engine/tests/functional/plugin/cpu/single_layer_tests/split.cpp b/inference-engine/tests/functional/plugin/cpu/single_layer_tests/split.cpp index 01520262399d3c..5ac9e3bc3f0c88 100644 --- a/inference-engine/tests/functional/plugin/cpu/single_layer_tests/split.cpp +++ b/inference-engine/tests/functional/plugin/cpu/single_layer_tests/split.cpp @@ -1,4 +1,4 @@ -// Copyright (C) 2020 Intel Corporation +// Copyright (C) 2021 Intel Corporation // SPDX-License-Identifier: Apache-2.0 // @@ -91,8 +91,11 @@ const auto planar_5D_ref = CPUSpecificParams{{ncdhw}, {ncdhw}, {"ref"}, "ref"}; const auto planar_4D = CPUSpecificParams{{nchw}, {nchw}, {}, "unknown"}; const auto planar_5D = CPUSpecificParams{{ncdhw}, {ncdhw}, {}, "unknown"}; -const auto planarChannels_4D = CPUSpecificParams{{nhwc}, {nhwc}, {}, "ref"}; -const auto planarChannels_5D = CPUSpecificParams{{ndhwc}, {ndhwc}, {}, "ref"}; +const auto perChannels_4D = CPUSpecificParams{{nhwc}, {nhwc}, {}, "ref"}; +const auto perChannels_5D = CPUSpecificParams{{ndhwc}, {ndhwc}, {}, "ref"}; + +const auto perChannelsToPlanar_4D = CPUSpecificParams{{nhwc}, {nchw}, {}, "ref"}; +const auto perChannelsToPlanar_5D = CPUSpecificParams{{ndhwc}, {ncdhw}, {}, "ref"}; const auto blocked8_4D = CPUSpecificParams{{nChw8c}, {nChw8c}, {}, "unknown"}; const auto blocked8_5D = CPUSpecificParams{{nCdhw8c}, {nCdhw8c}, {}, "unknown"}; @@ -114,6 +117,28 @@ const std::vector netPrecisions = { Precision::BF16 }; +INSTANTIATE_TEST_CASE_P(smoke_Split4D_CPU_Nspc2NcspSpecial, SplitLayerCPUTest, + ::testing::Combine( + ::testing::Values(4), + ::testing::Values(1), + ::testing::ValuesIn(netPrecisions), + ::testing::Values(std::vector({3, 28, 24, 9})), + ::testing::Values(std::vector({})), + ::testing::Values(CommonTestUtils::DEVICE_CPU), + ::testing::Values(perChannelsToPlanar_4D)), + SplitLayerCPUTest::getTestCaseName); + +INSTANTIATE_TEST_CASE_P(smoke_Split5D_CPU_Nspc2NcspSpecial, SplitLayerCPUTest, + ::testing::Combine( + ::testing::Values(3), + ::testing::Values(1), + ::testing::ValuesIn(netPrecisions), + ::testing::Values(std::vector({3, 21, 24, 9, 15})), + ::testing::Values(std::vector({})), + ::testing::Values(CommonTestUtils::DEVICE_CPU), + ::testing::Values(perChannelsToPlanar_5D)), + SplitLayerCPUTest::getTestCaseName); + INSTANTIATE_TEST_CASE_P(smoke_Split4D_CPU_Block8inPlace, SplitLayerCPUTest, ::testing::Combine( ::testing::Values(3), @@ -122,7 +147,7 @@ INSTANTIATE_TEST_CASE_P(smoke_Split4D_CPU_Block8inPlace, SplitLayerCPUTest, ::testing::Values(std::vector({3, 24, 24, 9})), ::testing::Values(std::vector({})), ::testing::Values(CommonTestUtils::DEVICE_CPU), - ::testing::Values(planar_4D, planar_4D_ref, planarChannels_4D, blocked8_4D)), + ::testing::Values(planar_4D, planar_4D_ref, perChannels_4D, blocked8_4D)), SplitLayerCPUTest::getTestCaseName); INSTANTIATE_TEST_CASE_P(smoke_Split4D_CPU_Block8, SplitLayerCPUTest, @@ -133,7 +158,7 @@ INSTANTIATE_TEST_CASE_P(smoke_Split4D_CPU_Block8, SplitLayerCPUTest, ::testing::Values(std::vector({3, 24, 24, 9})), ::testing::Values(std::vector({})), ::testing::Values(CommonTestUtils::DEVICE_CPU), - ::testing::Values(planar_4D, planar_4D_ref, planarChannels_4D, blocked8_4D_ref)), + ::testing::Values(planar_4D, planar_4D_ref, perChannels_4D, blocked8_4D_ref)), SplitLayerCPUTest::getTestCaseName); INSTANTIATE_TEST_CASE_P(smoke_Split4D_CPU_Block16inPlace, SplitLayerCPUTest, @@ -166,7 +191,7 @@ INSTANTIATE_TEST_CASE_P(smoke_Split5D_CPU_Block8inPlace, SplitLayerCPUTest, ::testing::Values(std::vector({3, 24, 24, 9, 15})), ::testing::Values(std::vector({})), ::testing::Values(CommonTestUtils::DEVICE_CPU), - ::testing::Values(planar_5D, planar_5D_ref, planarChannels_5D, blocked8_5D)), + ::testing::Values(planar_5D, planar_5D_ref, perChannels_5D, blocked8_5D)), SplitLayerCPUTest::getTestCaseName); INSTANTIATE_TEST_CASE_P(smoke_Split5D_CPU_Block8, SplitLayerCPUTest, @@ -177,7 +202,7 @@ INSTANTIATE_TEST_CASE_P(smoke_Split5D_CPU_Block8, SplitLayerCPUTest, ::testing::Values(std::vector({3, 24, 24, 9, 15})), ::testing::Values(std::vector({})), ::testing::Values(CommonTestUtils::DEVICE_CPU), - ::testing::Values(planar_5D, planar_5D_ref, planarChannels_5D, blocked8_5D_ref)), + ::testing::Values(planar_5D, planar_5D_ref, perChannels_5D, blocked8_5D_ref)), SplitLayerCPUTest::getTestCaseName); INSTANTIATE_TEST_CASE_P(smoke_Split5D_CPU_Block16inPlace, SplitLayerCPUTest, diff --git a/inference-engine/tests/functional/plugin/cpu/test_utils/cpu_test_utils.cpp b/inference-engine/tests/functional/plugin/cpu/test_utils/cpu_test_utils.cpp index eae5f1fade7bd1..755c95a7b488e0 100644 --- a/inference-engine/tests/functional/plugin/cpu/test_utils/cpu_test_utils.cpp +++ b/inference-engine/tests/functional/plugin/cpu/test_utils/cpu_test_utils.cpp @@ -77,6 +77,8 @@ std::string CPUTestsBase::impls2str(const std::vector &priority) { } void CPUTestsBase::CheckPluginRelatedResults(InferenceEngine::ExecutableNetwork &execNet, std::string nodeType) const { + if (nodeType.empty()) return; + ASSERT_TRUE(!selectedType.empty()) << "Node type is not defined."; bool isNodeFound = false; InferenceEngine::CNNNetwork execGraphInfo = execNet.GetExecGraphInfo(); diff --git a/inference-engine/tests/functional/plugin/cpu/test_utils/fusing_test_utils.cpp b/inference-engine/tests/functional/plugin/cpu/test_utils/fusing_test_utils.cpp index 6b58c48544f94d..93fa14d9a99efa 100644 --- a/inference-engine/tests/functional/plugin/cpu/test_utils/fusing_test_utils.cpp +++ b/inference-engine/tests/functional/plugin/cpu/test_utils/fusing_test_utils.cpp @@ -1,4 +1,4 @@ -// Copyright (C) 2020 Intel Corporation +// Copyright (C) 2020-2021 Intel Corporation // SPDX-License-Identifier: Apache-2.0 // @@ -56,10 +56,11 @@ void CpuTestWithFusing::CheckPluginRelatedResults(InferenceEngine::ExecutableNet auto layerType = getExecValue("layerType", rtInfo); if (layerType == nodeType) { auto originalLayersNames = getExecValue("originalLayersNames", rtInfo); - auto pos = originalLayersNames.find(nodeType); - ASSERT_TRUE(pos != std::string::npos) << "Node type " << nodeType << " has not been found!"; + std::string opFriendlyName = op->get_friendly_name(); + auto pos = originalLayersNames.find(opFriendlyName); + ASSERT_TRUE(pos != std::string::npos) << "Operation name " << op->get_friendly_name() << " has not been found in originalLayersNames!"; for (auto fusedOp : fusedOps) { - pos = originalLayersNames.find(fusedOp, pos); + pos = originalLayersNames.find(fusedOp, checkFusingPosition ? pos : 0); ASSERT_TRUE(pos != std::string::npos) << "Fused op " << fusedOp << " has not been found!"; } } diff --git a/inference-engine/tests/functional/plugin/cpu/test_utils/fusing_test_utils.hpp b/inference-engine/tests/functional/plugin/cpu/test_utils/fusing_test_utils.hpp index 469f867ff277ec..f7e22884ea8157 100644 --- a/inference-engine/tests/functional/plugin/cpu/test_utils/fusing_test_utils.hpp +++ b/inference-engine/tests/functional/plugin/cpu/test_utils/fusing_test_utils.hpp @@ -1,4 +1,4 @@ -// Copyright (C) 2020 Intel Corporation +// Copyright (C) 2020-2021 Intel Corporation // SPDX-License-Identifier: Apache-2.0 // @@ -69,6 +69,7 @@ class CpuTestWithFusing : public CPUTestsBase { protected: std::shared_ptr postOpMgrPtr; std::vector fusedOps; + bool checkFusingPosition = true; }; /* FUSING PATTERNS */ @@ -126,6 +127,25 @@ const auto fusingReluScaleShift = fusingSpecificParams{std::make_shared(ngraph::element::f32, newShape, {}, true); return std::make_shared(inpNode, constNode); }, "Add(PerChannel)"}}), {"Relu", "Add"}}; +const auto fusingScaleShift = fusingSpecificParams{ std::make_shared(std::vector{ + {[](std::shared_ptr inpNode, const ngraph::element::Type& ngPrc, ngraph::ParameterVector& params) { + auto shape = inpNode->get_shape(); + if (shape.size() == 1) + THROW_IE_EXCEPTION << "If shape.size() == 1 then Granularity can be PerTensor only"; + ngraph::Shape newShape(shape.size(), 1); + newShape[1] = shape[1]; + auto constNode = ngraph::builder::makeConstant(ngraph::element::f32, newShape, {}, true); + return std::make_shared(inpNode, constNode); + }, "Multiply(PerChannel)"}, + {[](std::shared_ptr inpNode, const ngraph::element::Type& ngPrc, ngraph::ParameterVector& params) { + auto shape = inpNode->get_shape(); + if (shape.size() == 1) + THROW_IE_EXCEPTION << "If shape.size() == 1 then Granularity can be PerTensor only"; + ngraph::Shape newShape(shape.size(), 1); + newShape[1] = shape[1]; + auto constNode = ngraph::builder::makeConstant(ngraph::element::f32, newShape, {}, true); + return std::make_shared(inpNode, constNode); + }, "Add(PerChannel)"}}), {"Add"} }; const auto fusingFakeQuantizePerChannelRelu = fusingSpecificParams{std::make_shared(std::vector{ {[](std::shared_ptr inpNode, const ngraph::element::Type& ngPrc, ngraph::ParameterVector& params){ auto localPrc = inpNode->get_element_type(); diff --git a/inference-engine/tests/functional/plugin/gna/shared_tests_instances/skip_tests_config.cpp b/inference-engine/tests/functional/plugin/gna/shared_tests_instances/skip_tests_config.cpp index 99ea78b165f7ad..ec2853f0eb298e 100644 --- a/inference-engine/tests/functional/plugin/gna/shared_tests_instances/skip_tests_config.cpp +++ b/inference-engine/tests/functional/plugin/gna/shared_tests_instances/skip_tests_config.cpp @@ -9,6 +9,7 @@ std::vector disabledTestPatterns() { return { + ".*TensorNamesTest\\.CheckAddOutput.*", // TODO: FIX BUG 31661 // TODO: support InferRequest in GNAPlugin ".*InferRequestTests\\.canRun3AsyncRequestsConsistentlyFromThreadsWithoutWait.*", diff --git a/inference-engine/tests/functional/plugin/gna/shared_tests_instances/subgraph_tests/tensor_names.cpp b/inference-engine/tests/functional/plugin/gna/shared_tests_instances/subgraph_tests/tensor_names.cpp new file mode 100644 index 00000000000000..0729a36e9a4dc6 --- /dev/null +++ b/inference-engine/tests/functional/plugin/gna/shared_tests_instances/subgraph_tests/tensor_names.cpp @@ -0,0 +1,17 @@ +// Copyright (C) 2021 Intel Corporation +// SPDX-License-Identifier: Apache-2.0 +// + +#include + +#include "subgraph_tests/tensor_names.hpp" +#include "common_test_utils/test_constants.hpp" + +using namespace SubgraphTestsDefinitions; + +namespace { + INSTANTIATE_TEST_CASE_P(smoke_Check, TensorNamesTest, + ::testing::Values(CommonTestUtils::DEVICE_GNA), + TensorNamesTest::getTestCaseName); +} // namespace + diff --git a/inference-engine/tests/functional/plugin/gpu/shared_tests_instances/configuration_tests/dynamic_batch.cpp b/inference-engine/tests/functional/plugin/gpu/shared_tests_instances/configuration_tests/dynamic_batch.cpp new file mode 100644 index 00000000000000..d9390e4cee6ce9 --- /dev/null +++ b/inference-engine/tests/functional/plugin/gpu/shared_tests_instances/configuration_tests/dynamic_batch.cpp @@ -0,0 +1,38 @@ +// Copyright (C) 2020 Intel Corporation +// SPDX-License-Identifier: Apache-2.0 +#include +#include "common_test_utils/test_constants.hpp" + +namespace ConfigurationTestsDefinitions { +namespace { +std::vector batch_sizes = { + 16, + 1, + 5, + 9, + 16 +}; + +std::map additional_config = { +}; +} // namespace + + +INSTANTIATE_TEST_CASE_P(smoke_DynamicBatchTest_async, DynamicBatchTest, + ::testing::Combine( + ::testing::Values(CommonTestUtils::DEVICE_GPU), + ::testing::Values(InferenceEngine::Precision::FP32), + ::testing::Values(batch_sizes), + ::testing::Values(true), + ::testing::Values(additional_config)), + DynamicBatchTest::getTestCaseName); + +INSTANTIATE_TEST_CASE_P(smoke_DynamicBatchTest_sync, DynamicBatchTest, + ::testing::Combine( + ::testing::Values(CommonTestUtils::DEVICE_GPU), + ::testing::Values(InferenceEngine::Precision::FP32), + ::testing::Values(batch_sizes), + ::testing::Values(false), + ::testing::Values(additional_config)), + DynamicBatchTest::getTestCaseName); +} // namespace ConfigurationTestsDefinitions diff --git a/inference-engine/tests/functional/plugin/gpu/shared_tests_instances/low_precision_transformations/convolution_qdq_transformation.cpp b/inference-engine/tests/functional/plugin/gpu/shared_tests_instances/low_precision_transformations/convolution_qdq_transformation.cpp new file mode 100644 index 00000000000000..a6344d5625e9ce --- /dev/null +++ b/inference-engine/tests/functional/plugin/gpu/shared_tests_instances/low_precision_transformations/convolution_qdq_transformation.cpp @@ -0,0 +1,249 @@ +// Copyright (C) 2021 Intel Corporation +// SPDX-License-Identifier: Apache-2.0 +// + +#include + +#include "low_precision_transformations/convolution_qdq_transformation.hpp" +#include "low_precision_transformations/convolution_with_incorrect_weights.hpp" +#include "common_test_utils/test_constants.hpp" + +using namespace LayerTestsDefinitions; + +namespace { +const std::vector netPrecisions = { + ngraph::element::f32, + // ngraph::element::f16 +}; + +const std::vector trasformationParamValues = { + LayerTestsUtils::LayerTransformationParamsNGraphFactory::createParams().setUpdatePrecisions(true), + // LayerTestsUtils::LayerTransformationParamsNGraphFactory::createParams().setUpdatePrecisions(false), +}; + +const std::vector params = { + // Actual: + // + // Constant + // | Constant Constant Constant Constant + // | /FP32 /FP32 /FP32 /FP32 + // FakeQuantize FakeQuantize + // |FP32 |FP32 + // | | + // Convert Constant Convert + // |U8 |U8 |I8 + // | | | + // Convert Convert Convert Constant + // \FP32 /FP32 |FP32 /I8 + // \ / | / + // Subtract Constant Subtract Constant + // \FP32 /FP32 |FP32 /FP32 + // \ / | / + // Multiply Multiply + // \FP32 /FP32 + // \ / + // Convolution + // + // Transformed: + // + // Parameter Constant Constant + // \U8 /U8 /I8 + // \ / / + // Subtract Subtract + // \FP32 /FP32 + // \ / + // Convolution Constant + // \FP32 /FP32 + // \ / + // Multiply + { + { 256ul, {{ 1, 1, 1, 1 }}, { -12.8f }, { 12.7f }, { 0.f }, { 255.f }, ngraph::element::f32 }, + { ngraph::element::u8, false }, + { + {ngraph::element::f32}, + { {128.f}, ngraph::element::f32, {}, false, 1ul, ngraph::element::u8, true }, + { {0.1f}, ngraph::element::f32, {}, false } + }, + { std::vector{ 15.f }, ngraph::element::f32}, + { 255ul, ngraph::Shape({ 1, 1, 1, 1 }), { 0.f }, { 25.5f }, { -128.f }, { 127.f }, ngraph::element::f32 }, + { ngraph::element::i8, false }, + { + { ngraph::element::f32, false }, + { {-128.f}, ngraph::element::f32, {}, false, 1ul, ngraph::element::i8, true }, + { {0.2f}, ngraph::element::f32, {}, false } + }, + "output_original", + "U8" + }, + + // Actual: + // + // Constant + // | Constant Constant Constant Constant + // | /FP32 /FP32 /FP32 /FP32 + // FakeQuantize FakeQuantize + // |FP32 |FP32 + // | | + // Convert Constant Convert + // |U8 |U8 |I8 + // | | | + // Convert Convert Convert + // \FP32 /FP32 |FP32 + // \ / | + // Subtract Constant | Constant + // \FP32 /FP32 | /FP32 + // \ / | / + // Multiply Multiply + // \FP32 /FP32 + // \ / + // Convolution + // + // Transformed: + // + // Parameter Constant + // \U8 /U8 + // \ / + // Subtract Constant + // \FP32 /I8 + // \ / + // Convolution Constant + // \FP32 /FP32 + // \ / + // Multiply + { + { 256ul, {{ 1, 1, 1, 1 }}, { -12.8f }, { 12.7f }, { 0.f }, { 255.f }, ngraph::element::f32 }, + { ngraph::element::u8, false }, + { + {ngraph::element::f32}, + {}, + { {0.1f}, ngraph::element::f32, {}, false } + }, + { std::vector{ 15.f }, ngraph::element::f32}, + { 255ul, ngraph::Shape({ 1, 1, 1, 1 }), { 0.f }, { 25.5f }, { -128.f }, { 127.f }, ngraph::element::f32 }, + { ngraph::element::i8, false }, + { + { ngraph::element::f32, false }, + {}, + { {0.2f}, ngraph::element::f32, {}, false } + }, + "output_original", + "U8" + }, + + // Actual: + // + // FQ + // |FP32 + // | + // Convert Convert Constant Constant + // |U8 |U8 |U8 |U8 + // | | | | + // Convert Convert Convert Convert + // \FP32 /FP32 \FP32 /FP32 + // \ / \ / + // Subtract Constant Subtract Constant + // \FP32 /FP32 \FP32 /FP32 + // \ / \ / + // Multiply Multiply + // \FP32 /FP32 + // \ / + // Convolution + // + // Transformed: + // + // FQ Constant Constant + // \U8 /U8 / I8 + // \ / / + // Subtract Subtract + // \FP32 /FP32 + // \ / + // Convolution Constant + // \FP32 /FP32 + // \ / + // Multiply + { + { 256ul, {{ 1, 1, 1, 1 }}, { -12.8f }, { 12.7f }, { 0.f }, { 255.f }, ngraph::element::f32 }, + { ngraph::element::u8, false }, + { + { ngraph::element::f32, false }, + { {128.f}, ngraph::element::f32, {}, false, 1ul, ngraph::element::u8, true }, + { {0.1f}, ngraph::element::f32, {}, false } + }, + {{0.5f}, ngraph::element::i8}, + {}, + {}, + { + { ngraph::element::f32, false }, + { {128.f}, ngraph::element::f32, {}, false, 1ul, ngraph::element::i8, true }, + { {0.2f}, ngraph::element::f32, {}, false } + }, + "output_original", + "U8" + }, + + // Actual: + // + // FQ + // |FP32 + // | + // Convert Convert + // |U8 |U8 + // | | + // Convert Convert Constant + // \FP32 /FP32 \U8 + // \ / \ + // Subtract Constant Convert Constant + // \FP32 /FP32 \FP32 /FP32 + // \ / \ / + // Multiply Multiply + // \FP32 /FP32 + // \ / + // Convolution + // + // Transformed: + // + // FQ Constant Constant + // \U8 /U8 / I8 + // \ / / + // Subtract Subtract + // \FP32 /FP32 + // \ / + // Convolution Constant + // \FP32 /FP32 + // \ / + // Multiply + { + { 256ul, {{ 1, 1, 1, 1 }}, { -12.8f }, { 12.7f }, { 0.f }, { 255.f }, ngraph::element::f32 }, + { ngraph::element::u8, false }, + { + { ngraph::element::f32, false }, + { {128.f}, ngraph::element::f32, {}, false, 1ul, ngraph::element::u8, true }, + { {0.1f}, ngraph::element::f32, {}, false } + }, + {{0.5f}, ngraph::element::i8}, + {}, + {}, + { + { ngraph::element::f32, false }, + {}, + { {0.2f}, ngraph::element::f32, {}, false } + }, + "output_original", + "U8" + }, +}; + +const std::vector shapes = { + { 1, 3, 4, 4 }, + { 4, 3, 4, 4 } +}; + +INSTANTIATE_TEST_CASE_P(smoke_LPT, ConvolutionQDqTransformation, + ::testing::Combine( + ::testing::ValuesIn(netPrecisions), + ::testing::ValuesIn(shapes), + ::testing::Values(CommonTestUtils::DEVICE_GPU), + ::testing::ValuesIn(trasformationParamValues), + ::testing::ValuesIn(params)), + ConvolutionQDqTransformation::getTestCaseName); +} // namespace diff --git a/inference-engine/tests/functional/plugin/gpu/shared_tests_instances/low_precision_transformations/convolution_transformation.cpp b/inference-engine/tests/functional/plugin/gpu/shared_tests_instances/low_precision_transformations/convolution_transformation.cpp index e1832b85f5aca9..9c9e61e431f52b 100644 --- a/inference-engine/tests/functional/plugin/gpu/shared_tests_instances/low_precision_transformations/convolution_transformation.cpp +++ b/inference-engine/tests/functional/plugin/gpu/shared_tests_instances/low_precision_transformations/convolution_transformation.cpp @@ -1,4 +1,4 @@ -// Copyright (C) 2019 Intel Corporation +// Copyright (C) 2019-2021 Intel Corporation // SPDX-License-Identifier: Apache-2.0 // @@ -18,7 +18,7 @@ const std::vector netPrecisions = { const std::vector trasformationParamValues = { LayerTestsUtils::LayerTransformationParamsNGraphFactory::createParams().setUpdatePrecisions(true), - LayerTestsUtils::LayerTransformationParamsNGraphFactory::createParams().setUpdatePrecisions(false), + // LayerTestsUtils::LayerTransformationParamsNGraphFactory::createParams().setUpdatePrecisions(false), }; const std::vector params = { @@ -26,19 +26,25 @@ const std::vector params { 256ul, ngraph::Shape { 1, 1, 1, 1 }, { 0.f }, { 255.f }, { 0.f }, { 25.5f } }, false, {}, - false + false, + "output", + "" }, { {}, false, { 255ul, ngraph::Shape { 1, 1, 1, 1 }, { 0.f }, { 254.f }, { -12.7f }, { 12.7f } }, - false + false, + "output", + "" }, { { 256ul, ngraph::Shape { 1, 1, 1, 1 }, { 0.f }, { 255.f }, { 0.f }, { 25.5f } }, false, { 255ul, ngraph::Shape { 1, 1, 1, 1 }, { 0.f }, { 254.f }, { -12.7f }, { 12.7f } }, - false + false, + "output_original", + "U8" }, { { 256ul, ngraph::Shape { 1, 1, 1, 1 }, { 0.f }, { 255.f }, { -12.75f }, { 6.375f } }, diff --git a/inference-engine/tests/functional/plugin/gpu/shared_tests_instances/low_precision_transformations/fake_quantize_and_two_output_branches_with_convolution.cpp b/inference-engine/tests/functional/plugin/gpu/shared_tests_instances/low_precision_transformations/fake_quantize_and_two_output_branches_with_convolution.cpp index 9bbd0465de23c0..417455cdf04458 100644 --- a/inference-engine/tests/functional/plugin/gpu/shared_tests_instances/low_precision_transformations/fake_quantize_and_two_output_branches_with_convolution.cpp +++ b/inference-engine/tests/functional/plugin/gpu/shared_tests_instances/low_precision_transformations/fake_quantize_and_two_output_branches_with_convolution.cpp @@ -28,7 +28,6 @@ const std::vector testValues = } }; -// TODO: add something to avoid cleanup and enable INSTANTIATE_TEST_CASE_P(smoke_LPT, FakeQuantizeAndTwoOutputBranchesWithConvolutionTransformation, ::testing::Combine( ::testing::ValuesIn(netPrecisions), diff --git a/inference-engine/tests/functional/plugin/gpu/shared_tests_instances/low_precision_transformations/fake_quantize_transformation.cpp b/inference-engine/tests/functional/plugin/gpu/shared_tests_instances/low_precision_transformations/fake_quantize_transformation.cpp index f627ae13182c0c..300f9ad292c2ab 100644 --- a/inference-engine/tests/functional/plugin/gpu/shared_tests_instances/low_precision_transformations/fake_quantize_transformation.cpp +++ b/inference-engine/tests/functional/plugin/gpu/shared_tests_instances/low_precision_transformations/fake_quantize_transformation.cpp @@ -33,7 +33,6 @@ const std::vector fakeQuantizeOnD // { 256ul, { 1ul }, { -1.28f} , { 1.27f } } }; -// TODO: add something to avoid cleanup and enable INSTANTIATE_TEST_CASE_P(smoke_LPT, FakeQuantizeTransformation, ::testing::Combine( ::testing::ValuesIn(netPrecisions), diff --git a/inference-engine/tests/functional/plugin/gpu/shared_tests_instances/low_precision_transformations/fake_quantize_with_dq_not_optimal_transformation.cpp b/inference-engine/tests/functional/plugin/gpu/shared_tests_instances/low_precision_transformations/fake_quantize_with_dq_not_optimal_transformation.cpp new file mode 100644 index 00000000000000..9d3b43c2583128 --- /dev/null +++ b/inference-engine/tests/functional/plugin/gpu/shared_tests_instances/low_precision_transformations/fake_quantize_with_dq_not_optimal_transformation.cpp @@ -0,0 +1,111 @@ +// Copyright (C) 2021 Intel Corporation +// SPDX-License-Identifier: Apache-2.0 +// + +#include + +#include "low_precision_transformations/fake_quantize_with_dq_not_optimal_transformation.hpp" +#include "common_test_utils/test_constants.hpp" +#include "lpt_ngraph_functions/fake_quantize_function.hpp" + +using namespace LayerTestsDefinitions; +using namespace ngraph::pass::low_precision; + +namespace { +const std::vector netPrecisions = { + InferenceEngine::Precision::FP32 +}; + +const std::vector trasformationParamValues = { + LayerTestsUtils::LayerTransformationParamsFactory::createParamsU8I8AndI8().setUpdatePrecisions(true), + // LayerTestsUtils::LayerTransformationParamsFactory::createParamsU8I8AndI8().setUpdatePrecisions(false), +}; + +const std::vector fakeQuantizeOnDataValues = { + { + { 256ul, {{ 1, 1, 1, 1 }}, { 0.f }, { 25.5f }, { -128.f }, { 127.f }, ngraph::element::f32 }, + { ngraph::element::i8, false }, + { + { ngraph::element::f32, false }, + { {-128.f}, ngraph::element::f32, {}, false, 1ul, ngraph::element::i8, true }, + { {0.1f}, ngraph::element::f32, {}, false } + }, + {{5.f}, ngraph::element::i8}, + {}, + {}, + { + { ngraph::element::f32, false }, + { {127.f}, ngraph::element::f32, {}, false, 1ul, ngraph::element::i8, true }, + { {0.3f}, ngraph::element::f32, {}, false } + }, + {}, + "I8" + }, + { + { 256ul, {{ 1, 1, 1, 1 }}, { 0.f }, { 25.5f }, { -128.f }, { 127.f }, ngraph::element::f32 }, + { ngraph::element::i8, false }, + { + { ngraph::element::f32, false }, + {}, + { {0.1f}, ngraph::element::f32, {}, false } + }, + {{5.f}, ngraph::element::i8}, + {}, + {}, + { + { ngraph::element::f32, false }, + {}, + { {0.3f}, ngraph::element::f32, {}, false } + }, + {}, + "I8" + }, + { + { 256ul, {{ 1, 1, 1, 1 }}, { 0.f }, { 25.5f }, { -128.f }, { 127.f }, ngraph::element::f32 }, + { ngraph::element::i8, false }, + { + { ngraph::element::f32, false }, + { }, + { {0.1f}, ngraph::element::f32, {}, false } + }, + {{5.f}, ngraph::element::i8}, + {}, + {}, + { + { ngraph::element::f32, false }, + { {127.f}, ngraph::element::f32, {}, false, 1ul, ngraph::element::i8, true }, + { {0.3f}, ngraph::element::f32, {}, false } + }, + {}, + "I8" + }, + { + { 256ul, {{ 1, 1, 1, 1 }}, { 0.f }, { 25.5f }, { -128.f }, { 127.f }, ngraph::element::f32 }, + { ngraph::element::i8, false }, + { + { ngraph::element::f32, false }, + { {-128.f}, ngraph::element::f32, {}, false, 1ul, ngraph::element::i8, true }, + { {0.1f}, ngraph::element::f32, {}, false } + }, + {{5.f}, ngraph::element::i8}, + {}, + {}, + { + { ngraph::element::f32, false }, + { }, + { {0.3f}, ngraph::element::f32, {}, false } + }, + {}, + "I8" + } +}; + +INSTANTIATE_TEST_CASE_P(smoke_LPT, FakeQuantizeWithNotOptimalTransformation, + ::testing::Combine( + ::testing::ValuesIn(netPrecisions), + ::testing::Values(InferenceEngine::SizeVector({ 1, 3, 16, 16 })), + ::testing::Values(CommonTestUtils::DEVICE_GPU), + ::testing::ValuesIn(trasformationParamValues), + ::testing::ValuesIn(fakeQuantizeOnDataValues)), + FakeQuantizeWithNotOptimalTransformation::getTestCaseName); +} // namespace diff --git a/inference-engine/tests/functional/plugin/gpu/shared_tests_instances/low_precision_transformations/mat_mul_with_constant_transformation.cpp b/inference-engine/tests/functional/plugin/gpu/shared_tests_instances/low_precision_transformations/mat_mul_with_constant_transformation.cpp index b577f406ad4f5b..83a481fef69691 100644 --- a/inference-engine/tests/functional/plugin/gpu/shared_tests_instances/low_precision_transformations/mat_mul_with_constant_transformation.cpp +++ b/inference-engine/tests/functional/plugin/gpu/shared_tests_instances/low_precision_transformations/mat_mul_with_constant_transformation.cpp @@ -12,14 +12,40 @@ using namespace InferenceEngine::details; namespace { const std::vector precisions = { ngraph::element::f32 }; +//transpose_a = false, transpose_b = true std::vector testValues = { + // 3D with different values { - { 1, 32 }, - { 256ul, ngraph::Shape({}), {0.f}, {25.5f}, {0.f}, {25.5f} }, - { 32, 10 }, - std::vector(32 * 10, 1.f), - { 256ul, ngraph::Shape({}), {-12.8f}, {12.7f}, {-12.8f}, {12.7f} } - } + { 2, 3, 4 }, + { 256ul, {{1, 1, 1}, {1, 1, 1}, {1, 3, 1}, {1, 3, 1}}, {0.f}, {255.f}, {0.f, 0.f, 0.f}, {255.f, 25.5f, 255.f} }, + { 2, 4 }, + std::vector(4 * 2, 2.f), + { 256ul, {{1}, {1}, {2, 1}, {2, 1}}, {-128.f}, {127.f}, {-128.f, -12.8f}, {127.f, 12.7f} }, + }, + // 3D with different values + { + { 1, 3, 4 }, + { 256ul, {{1, 1, 1}, {1, 1, 1}, {1, 1, 1}, {1, 1, 1}}, {-10.5f}, {4.5f}, {-10.5f}, {4.5f} }, + { 2, 4 }, + std::vector(4 * 2, 2.f), + { 256ul, {{1}, {1}, {2, 1}, {2, 1}}, {-128.f}, {127.f}, {-128.f, -12.8f}, {127.f, 12.7f} }, + }, + // 3D with the same values + { + { 1, 3, 4 }, + { 256ul, {{1}, {1}, {1}, {1}}, {0.f}, {255.f}, {0.f}, {25.5f} }, + { 4, 4 }, + std::vector(4 * 4, 2.f), + { 256ul, {{1}, {1}, {1}, {1}}, {-128.f}, {127.f}, {-128.f}, {127.f} }, + }, + // 2D with subtract on activations + { + { 2, 3 }, + { 256ul, {{1}, {1}, {2, 1}, {2, 1}}, {-10.f}, {5.f}, {-10.f, -5.f}, {5.f, 5.f} }, + { 2, 3 }, + std::vector{1, 2, 3, 4, 5, 6}, + { 256ul, {{1}, {1}, {1}, {1}}, {-128.f}, {127.f}, {-12.8f}, {12.7f} }, + }, }; INSTANTIATE_TEST_CASE_P(smoke_LPT, MatMulWithConstantTransformation, diff --git a/inference-engine/tests/functional/plugin/gpu/shared_tests_instances/low_precision_transformations/prelu_transformation.cpp b/inference-engine/tests/functional/plugin/gpu/shared_tests_instances/low_precision_transformations/prelu_transformation.cpp index 5eb1dec35c5008..b1baac53569682 100644 --- a/inference-engine/tests/functional/plugin/gpu/shared_tests_instances/low_precision_transformations/prelu_transformation.cpp +++ b/inference-engine/tests/functional/plugin/gpu/shared_tests_instances/low_precision_transformations/prelu_transformation.cpp @@ -23,7 +23,8 @@ std::vector testValues = { { { 256ul, ngraph::Shape({}), {-12.8f / 2.f}, {12.7f}, {-12.8f / 2.f}, {12.7f} }, true } }; -INSTANTIATE_TEST_CASE_P(LPT, PReluTransformation, +// PRelu in low precision is not supported in GPU +INSTANTIATE_TEST_CASE_P(DISABLED_LPT, PReluTransformation, ::testing::Combine( ::testing::ValuesIn(precisions), ::testing::Values(ngraph::Shape({ 1, 3, 16, 16 })), diff --git a/inference-engine/tests/functional/plugin/gpu/shared_tests_instances/low_precision_transformations/transpose_after_matmul_transformation.cpp b/inference-engine/tests/functional/plugin/gpu/shared_tests_instances/low_precision_transformations/transpose_after_matmul_transformation.cpp index 97af7bed3732a5..086c5115734d80 100644 --- a/inference-engine/tests/functional/plugin/gpu/shared_tests_instances/low_precision_transformations/transpose_after_matmul_transformation.cpp +++ b/inference-engine/tests/functional/plugin/gpu/shared_tests_instances/low_precision_transformations/transpose_after_matmul_transformation.cpp @@ -1,4 +1,4 @@ -// Copyright (C) 2020 Intel Corporation +// Copyright (C) 2020-2021 Intel Corporation // SPDX-License-Identifier: Apache-2.0 // diff --git a/inference-engine/tests/functional/plugin/gpu/shared_tests_instances/single_layer_tests/scatter_elements_update.cpp b/inference-engine/tests/functional/plugin/gpu/shared_tests_instances/single_layer_tests/scatter_elements_update.cpp new file mode 100644 index 00000000000000..c929ef6834448a --- /dev/null +++ b/inference-engine/tests/functional/plugin/gpu/shared_tests_instances/single_layer_tests/scatter_elements_update.cpp @@ -0,0 +1,48 @@ +// Copyright (C) 2020 Intel Corporation +// SPDX-License-Identifier: Apache-2.0 +// + +#include +#include + +#include "single_layer_tests/scatter_elements_update.hpp" +#include "common_test_utils/test_constants.hpp" + +using namespace LayerTestsDefinitions; +using namespace ngraph::opset3; + +namespace { +// map> +std::map, std::map, std::vector>> axesShapeInShape { + {{10, 12, 15}, {{{1, 2, 4}, {0, 1, 2}}, {{2, 2, 2}, {-1, -2, -3}}}}, + {{15, 9, 8, 12}, {{{1, 2, 2, 2}, {0, 1, 2, 3}}, {{1, 2, 1, 4}, {-1, -2, -3, -4}}}}, + {{9, 9, 8, 8, 11, 10}, {{{1, 2, 1, 2, 1, 2}, {5, -3}}}}, +}; + +// index value should not be random data +const std::vector> idxValue = { + {1, 0, 4, 6, 2, 3, 7, 5} +}; + +const std::vector inputPrecisions = { + InferenceEngine::Precision::FP32, + InferenceEngine::Precision::FP16, + InferenceEngine::Precision::I32, +}; + +const std::vector idxPrecisions = { + InferenceEngine::Precision::I32, + InferenceEngine::Precision::I64, +}; + +const auto ScatterEltUpdateCases = ::testing::Combine( + ::testing::ValuesIn(ScatterElementsUpdateLayerTest::combineShapes(axesShapeInShape)), + ::testing::ValuesIn(idxValue), + ::testing::ValuesIn(inputPrecisions), + ::testing::ValuesIn(idxPrecisions), + ::testing::Values(CommonTestUtils::DEVICE_GPU) +); + +INSTANTIATE_TEST_CASE_P(smoke_ScatterEltsUpdate, ScatterElementsUpdateLayerTest, + ScatterEltUpdateCases, ScatterElementsUpdateLayerTest::getTestCaseName); +} // namespace \ No newline at end of file diff --git a/inference-engine/tests/functional/plugin/gpu/shared_tests_instances/single_layer_tests/scatter_update.cpp b/inference-engine/tests/functional/plugin/gpu/shared_tests_instances/single_layer_tests/scatter_update.cpp index 915b17194385eb..690ba1b81ffd42 100644 --- a/inference-engine/tests/functional/plugin/gpu/shared_tests_instances/single_layer_tests/scatter_update.cpp +++ b/inference-engine/tests/functional/plugin/gpu/shared_tests_instances/single_layer_tests/scatter_update.cpp @@ -25,8 +25,9 @@ const std::vector idxPrecisions = { // map> std::map, std::map, std::vector>> axesShapeInShape { - {{10, 16, 12, 15}, {{{2, 4}, {0, 1, 2, 3}}, {{8}, {-1, -2, -3, -4}}}}, - {{10, 9, 10, 9, 10}, {{{8}, {-3, -1, 0, 2, 4}}, {{4, 2}, {-2, 2}}}}, + {{10, 16, 12, 15}, {{{2, 2, 2}, {0, 1, 2, 3}}, {{2, 4}, {0, 1, 2, 3}}, {{8}, {0, 1, 2, 3}}}}, + {{10, 9, 10, 9, 10}, {{{8}, {0, 1, 2, 3, 4}}, {{4, 2}, {0, 1, 2, 3, 4}}}}, + {{10, 9, 10, 9, 10, 12}, {{{8}, {0, 1, 2, 3, 4, 5}}}}, }; //indices should not be random value const std::vector> idxValue = { diff --git a/inference-engine/tests/functional/plugin/gpu/shared_tests_instances/skip_tests_config.cpp b/inference-engine/tests/functional/plugin/gpu/shared_tests_instances/skip_tests_config.cpp index 619efbe767d818..bbbe4dc1d8a5e0 100644 --- a/inference-engine/tests/functional/plugin/gpu/shared_tests_instances/skip_tests_config.cpp +++ b/inference-engine/tests/functional/plugin/gpu/shared_tests_instances/skip_tests_config.cpp @@ -19,8 +19,11 @@ std::vector disabledTestPatterns() { R"(.*EltwiseLayerTest.*eltwiseOpType=Pow.*netPRC=I64.*)", R"(.*EltwiseLayerTest.*IS=\(.*\..*\..*\..*\..*\).*eltwiseOpType=Pow.*secondaryInputType=CONSTANT.*)", // TODO: Issue: 43794 - R"(.*(PreprocessTest).*(SetScalePreProcess).*)", - R"(.*(PreprocessTest).*(ReverseInputChannelsPreProcess).*)", + R"(.*(PreprocessTest).*(SetScalePreProcessSetBlob).*)", + R"(.*(PreprocessTest).*(SetScalePreProcessGetBlob).*)", + R"(.*(PreprocessTest).*(SetMeanValuePreProcessSetBlob).*)", + R"(.*(PreprocessTest).*(SetMeanImagePreProcessSetBlob).*)", + R"(.*(PreprocessTest).*(ReverseInputChannelsPreProcessGetBlob).*)", // TODO: Issue: 41467 -- "unsupported element type f16 op Convert" R"(.*(ConvertLayerTest).*targetPRC=FP16.*)", // TODO: Issue: 41462 @@ -48,9 +51,10 @@ std::vector disabledTestPatterns() { R"(.*(LSTMSequence).*mode=CONVERT_TO_TI_RAND_SEQ_LEN.*)", R"(.*(smoke_DetectionOutput3In).*)", R"(.*(smoke_DetectionOutput5In).*)", - R"(.*(ScatterUpdateLayerTest).*)", // INT8 StridedSlice not supported R"(.*(LPT/StridedSliceTransformation).*)", + // TODO: Issue: 47219 + R"(.*DynamicBatchTest.*)", }; } diff --git a/inference-engine/tests/functional/plugin/gpu/shared_tests_instances/subgraph_tests/tensor_names.cpp b/inference-engine/tests/functional/plugin/gpu/shared_tests_instances/subgraph_tests/tensor_names.cpp new file mode 100644 index 00000000000000..b5258c33fd5e89 --- /dev/null +++ b/inference-engine/tests/functional/plugin/gpu/shared_tests_instances/subgraph_tests/tensor_names.cpp @@ -0,0 +1,18 @@ +// Copyright (C) 2021 Intel Corporation +// SPDX-License-Identifier: Apache-2.0 +// + +#include + +#include "subgraph_tests/tensor_names.hpp" +#include "common_test_utils/test_constants.hpp" + +using namespace SubgraphTestsDefinitions; + +namespace { + INSTANTIATE_TEST_CASE_P(smoke_Check, TensorNamesTest, + ::testing::Values(CommonTestUtils::DEVICE_GPU), + TensorNamesTest::getTestCaseName); +} // namespace + + diff --git a/inference-engine/tests/functional/plugin/myriad/ngraph/transformations/dynamic_to_static_shape_broadcast.cpp b/inference-engine/tests/functional/plugin/myriad/ngraph/transformations/dynamic_to_static_shape_broadcast.cpp index 7c7996da502401..80b76464c8ad91 100644 --- a/inference-engine/tests/functional/plugin/myriad/ngraph/transformations/dynamic_to_static_shape_broadcast.cpp +++ b/inference-engine/tests/functional/plugin/myriad/ngraph/transformations/dynamic_to_static_shape_broadcast.cpp @@ -211,7 +211,8 @@ class DynamicToStaticShapeBroadcastBidirectionalTests : public CommonTestUtils:: } const auto broadcast = std::make_shared(broadcastInput, shapeOfNode, ngraph::op::BroadcastType::BIDIRECTIONAL); - + // tests are invalid -- output shape of broadcast gets fully deduced and transformations stop working for this particular graph + broadcast->set_output_type(0, broadcast->get_output_element_type(0), ngraph::PartialShape::dynamic(broadcast->get_output_partial_shape(0).rank())); auto function = std::make_shared( ngraph::NodeVector{broadcast}, params, diff --git a/inference-engine/tests/functional/plugin/myriad/shared_tests_instances/skip_tests_config.cpp b/inference-engine/tests/functional/plugin/myriad/shared_tests_instances/skip_tests_config.cpp index d3a5ca903eac8d..3a97d5d35ccd4b 100644 --- a/inference-engine/tests/functional/plugin/myriad/shared_tests_instances/skip_tests_config.cpp +++ b/inference-engine/tests/functional/plugin/myriad/shared_tests_instances/skip_tests_config.cpp @@ -25,9 +25,6 @@ std::vector disabledTestPatterns() { R"(.*TopKLayerTest.*mode=min.*sort=index.*)", // TODO: Issue: 40961 R"(.*(ConstantResultSubgraphTest).*)", - // TODO: Issue: 43795 - R"(.*(PreprocessTest).*(SetMeanValuePreProcess).*)", - R"(.*(PreprocessTest).*(ReverseInputChannelsPreProcess).*)", // TODO: Issue: 42828 R"(.*DSR_NonMaxSuppression.*NBoxes=(5|20|200).*)", // TODO: Issue: 42721 diff --git a/inference-engine/tests/functional/plugin/myriad/shared_tests_instances/subgraph_tests/tensor_names.cpp b/inference-engine/tests/functional/plugin/myriad/shared_tests_instances/subgraph_tests/tensor_names.cpp new file mode 100644 index 00000000000000..93e978ab427b07 --- /dev/null +++ b/inference-engine/tests/functional/plugin/myriad/shared_tests_instances/subgraph_tests/tensor_names.cpp @@ -0,0 +1,19 @@ +// Copyright (C) 2021 Intel Corporation +// SPDX-License-Identifier: Apache-2.0 +// + +#include + +#include "subgraph_tests/tensor_names.hpp" +#include "common_test_utils/test_constants.hpp" + +using namespace SubgraphTestsDefinitions; + +namespace { + INSTANTIATE_TEST_CASE_P(smoke_Check, TensorNamesTest, + ::testing::Values(CommonTestUtils::DEVICE_MYRIAD), + TensorNamesTest::getTestCaseName); +} // namespace + + + diff --git a/inference-engine/tests/functional/plugin/shared/include/behavior/invalid_cases/proposal.hpp b/inference-engine/tests/functional/plugin/shared/include/behavior/invalid_cases/proposal.hpp index 880f9d1fda665e..5ca19336b489bf 100644 --- a/inference-engine/tests/functional/plugin/shared/include/behavior/invalid_cases/proposal.hpp +++ b/inference-engine/tests/functional/plugin/shared/include/behavior/invalid_cases/proposal.hpp @@ -23,6 +23,7 @@ class ProposalBehTest protected: void SetUp() override; void Validate() override {}; + void Run() override; const LayerTestsDefinitions::normalize_type normalize = true; const LayerTestsDefinitions::feat_stride_type feat_stride = 1; diff --git a/inference-engine/tests/functional/plugin/shared/include/behavior/set_preprocess.hpp b/inference-engine/tests/functional/plugin/shared/include/behavior/set_preprocess.hpp index 3ca9ab37c2cd08..ec258bfabb9059 100644 --- a/inference-engine/tests/functional/plugin/shared/include/behavior/set_preprocess.hpp +++ b/inference-engine/tests/functional/plugin/shared/include/behavior/set_preprocess.hpp @@ -62,7 +62,7 @@ TEST_P(PreprocessTest, SetPreProcessToInferRequest) { } } -TEST_P(PreprocessTest, SetMeanImagePreProcess) { +TEST_P(PreprocessTest, SetMeanImagePreProcessGetBlob) { // Skip test according to plugin specific disabledTestPatterns() (if any) SKIP_IF_CURRENT_TEST_IS_DISABLED() std::shared_ptr ngraph; @@ -129,7 +129,77 @@ TEST_P(PreprocessTest, SetMeanImagePreProcess) { } } -TEST_P(PreprocessTest, SetMeanValuePreProcess) { +TEST_P(PreprocessTest, SetMeanImagePreProcessSetBlob) { + // Skip test according to plugin specific disabledTestPatterns() (if any) + SKIP_IF_CURRENT_TEST_IS_DISABLED() + std::shared_ptr ngraph; + { + ngraph::PartialShape shape({1, 3, 10, 10}); + ngraph::element::Type type(ngraph::element::Type_t::f32); + auto param = std::make_shared(type, shape); + param->set_friendly_name("param"); + auto relu = std::make_shared(param); + relu->set_friendly_name("relu"); + auto result = std::make_shared(relu); + result->set_friendly_name("result"); + + ngraph::ParameterVector params = {param}; + ngraph::ResultVector results = {result}; + + ngraph = std::make_shared(results, params); + } + + // Create CNNNetwork from ngrpah::Function + InferenceEngine::CNNNetwork cnnNet(ngraph); + + auto &preProcess = cnnNet.getInputsInfo().begin()->second->getPreProcess(); + preProcess.init(3); + for (size_t i = 0; i < 3; i++) { + preProcess[i]->meanData = make_blob_with_precision(InferenceEngine::TensorDesc(InferenceEngine::Precision::FP32, + {10, 10}, + InferenceEngine::Layout::HW)); + preProcess[i]->meanData->allocate(); + auto lockedMem = preProcess[i]->meanData->buffer(); + auto* data = lockedMem.as(); + for (size_t j = 0; j < 100; j++) { + data[j] = 0; + data[j] -= i * 100 + j; + } + } + preProcess.setVariant(InferenceEngine::MEAN_IMAGE); + // Load CNNNetwork to target plugins + auto execNet = ie->LoadNetwork(cnnNet, targetDevice, configuration); + // Create InferRequest + auto req = execNet.CreateInferRequest(); + + auto inBlob = make_blob_with_precision(cnnNet.getInputsInfo().begin()->second->getTensorDesc()); + inBlob->allocate(); + req.SetBlob("param", inBlob); + + // Fill input + { + auto locketMem = inBlob->buffer(); + auto *inData = locketMem.as(); + for (size_t i = 0; i < inBlob->size(); i++) + inData[i] = i; + } + + req.Infer(); + + // Check output + auto outBlob = req.GetBlob(cnnNet.getOutputsInfo().begin()->first); + { + auto inMem = inBlob->cbuffer(); + const auto* inData = inMem.as(); + auto outMem = outBlob->cbuffer(); + const auto* outData = outMem.as(); + ASSERT_EQ(inBlob->size(), outBlob->size()); + for (size_t i = 0; i < inBlob->size(); i++) + ASSERT_EQ(inData[i] + inData[i], outData[i]); + } +} + +TEST_P(PreprocessTest, SetMeanValuePreProcessGetBlob) { // Skip test according to plugin specific disabledTestPatterns() (if any) SKIP_IF_CURRENT_TEST_IS_DISABLED() std::shared_ptr ngraph; @@ -190,7 +260,72 @@ TEST_P(PreprocessTest, SetMeanValuePreProcess) { } } -TEST_P(PreprocessTest, ReverseInputChannelsPreProcess) { +TEST_P(PreprocessTest, SetMeanValuePreProcessSetBlob) { + // Skip test according to plugin specific disabledTestPatterns() (if any) + SKIP_IF_CURRENT_TEST_IS_DISABLED() + std::shared_ptr ngraph; + { + ngraph::PartialShape shape({1, 3, 10, 10}); + ngraph::element::Type type(ngraph::element::Type_t::f32); + auto param = std::make_shared(type, shape); + param->set_friendly_name("param"); + auto relu = std::make_shared(param); + relu->set_friendly_name("relu"); + auto result = std::make_shared(relu); + result->set_friendly_name("result"); + + ngraph::ParameterVector params = {param}; + ngraph::ResultVector results = {result}; + + ngraph = std::make_shared(results, params); + } + + // Create CNNNetwork from ngrpah::Function + InferenceEngine::CNNNetwork cnnNet(ngraph); + + auto &preProcess = cnnNet.getInputsInfo().begin()->second->getPreProcess(); + preProcess.init(3); + preProcess[0]->meanValue = -5; + preProcess[1]->meanValue = -5; + preProcess[2]->meanValue = -5; + preProcess[0]->stdScale = 1; + preProcess[1]->stdScale = 1; + preProcess[2]->stdScale = 1; + preProcess.setVariant(InferenceEngine::MEAN_VALUE); + // Load CNNNetwork to target plugins + auto execNet = ie->LoadNetwork(cnnNet, targetDevice, configuration); + // Create InferRequest + auto req = execNet.CreateInferRequest(); + + auto inBlob = make_blob_with_precision(cnnNet.getInputsInfo().begin()->second->getTensorDesc()); + inBlob->allocate(); + req.SetBlob("param", inBlob); + + // Fill input + { + auto locketMem = inBlob->buffer(); + auto *inData = locketMem.as(); + for (size_t i = 0; i < inBlob->size(); i++) + inData[i] = i; + } + + req.Infer(); + + // Check output + auto outBlob = req.GetBlob(cnnNet.getOutputsInfo().begin()->first); + { + auto inMem = inBlob->cbuffer(); + const auto* inData = inMem.as(); + auto outMem = outBlob->cbuffer(); + const auto* outData = outMem.as(); + ASSERT_EQ(inBlob->size(), outBlob->size()); + for (size_t i = 0; i < inBlob->size(); i++) + ASSERT_EQ(inData[i]+5, outData[i]); + } +} + + +TEST_P(PreprocessTest, ReverseInputChannelsPreProcessGetBlob) { // Skip test according to plugin specific disabledTestPatterns() (if any) SKIP_IF_CURRENT_TEST_IS_DISABLED() std::shared_ptr ngraph; @@ -253,7 +388,74 @@ TEST_P(PreprocessTest, ReverseInputChannelsPreProcess) { } } -TEST_P(PreprocessTest, SetScalePreProcess) { + +TEST_P(PreprocessTest, ReverseInputChannelsPreProcessSetBlob) { + // Skip test according to plugin specific disabledTestPatterns() (if any) + SKIP_IF_CURRENT_TEST_IS_DISABLED() + std::shared_ptr ngraph; + { + ngraph::PartialShape shape({1, 3, 10, 10}); + ngraph::element::Type type(ngraph::element::Type_t::f32); + auto param = std::make_shared(type, shape); + param->set_friendly_name("param"); + auto relu = std::make_shared(param); + relu->set_friendly_name("relu"); + auto result = std::make_shared(relu); + result->set_friendly_name("result"); + + ngraph::ParameterVector params = {param}; + ngraph::ResultVector results = {result}; + + ngraph = std::make_shared(results, params); + } + + // Create CNNNetwork from ngrpah::Function + InferenceEngine::CNNNetwork cnnNet(ngraph); + + auto &preProcess = cnnNet.getInputsInfo().begin()->second->getPreProcess(); + preProcess.setColorFormat(InferenceEngine::ColorFormat::RGB); + // Load CNNNetwork to target plugins + auto execNet = ie->LoadNetwork(cnnNet, targetDevice, configuration); + // Create InferRequest + auto req = execNet.CreateInferRequest(); + + auto inBlob = make_blob_with_precision(cnnNet.getInputsInfo().begin()->second->getTensorDesc()); + inBlob->allocate(); + req.SetBlob("param", inBlob); + + // Fill input + { + auto locketMem = inBlob->buffer(); + auto *inData = locketMem.as(); + for (size_t i = 0; i < inBlob->size(); i++) + inData[i] = i; + } + + req.Infer(); + + // Check output + auto outBlob = req.GetBlob(cnnNet.getOutputsInfo().begin()->first); + { + auto inMem = inBlob->cbuffer(); + const auto* inData = inMem.as(); + auto outMem = outBlob->cbuffer(); + const auto* outData = outMem.as(); + ASSERT_EQ(inBlob->size(), outBlob->size()); + for (size_t i = 0; i < 3; i++) + for (size_t j = 0; j < 100; j++) { + // BGR to RGB + if (!i) { + ASSERT_EQ(inData[j], outData[200 + j]); + } else if (i == j) { + ASSERT_EQ(inData[100 + j], outData[100 + j]); + } else { + ASSERT_EQ(inData[200 + j], outData[j]); + } + } + } +} + +TEST_P(PreprocessTest, SetScalePreProcessGetBlob) { // Skip test according to plugin specific disabledTestPatterns() (if any) SKIP_IF_CURRENT_TEST_IS_DISABLED() std::shared_ptr ngraph; @@ -314,4 +516,69 @@ TEST_P(PreprocessTest, SetScalePreProcess) { } } + +TEST_P(PreprocessTest, SetScalePreProcessSetBlob) { + // Skip test according to plugin specific disabledTestPatterns() (if any) + SKIP_IF_CURRENT_TEST_IS_DISABLED() + std::shared_ptr ngraph; + { + ngraph::PartialShape shape({1, 3, 10, 10}); + ngraph::element::Type type(ngraph::element::Type_t::f32); + auto param = std::make_shared(type, shape); + param->set_friendly_name("param"); + auto relu = std::make_shared(param); + relu->set_friendly_name("relu"); + auto result = std::make_shared(relu); + result->set_friendly_name("result"); + + ngraph::ParameterVector params = {param}; + ngraph::ResultVector results = {result}; + + ngraph = std::make_shared(results, params); + } + + // Create CNNNetwork from ngrpah::Function + InferenceEngine::CNNNetwork cnnNet(ngraph); + + auto &preProcess = cnnNet.getInputsInfo().begin()->second->getPreProcess(); + preProcess.init(3); + preProcess[0]->stdScale = 2; + preProcess[1]->stdScale = 2; + preProcess[2]->stdScale = 2; + preProcess[0]->meanValue = 0; + preProcess[1]->meanValue = 0; + preProcess[2]->meanValue = 0; + preProcess.setVariant(InferenceEngine::MEAN_VALUE); + // Load CNNNetwork to target plugins + auto execNet = ie->LoadNetwork(cnnNet, targetDevice, configuration); + // Create InferRequest + auto req = execNet.CreateInferRequest(); + + auto inBlob = make_blob_with_precision(cnnNet.getInputsInfo().begin()->second->getTensorDesc()); + inBlob->allocate(); + req.SetBlob("param", inBlob); + + // Fill input + { + auto locketMem = inBlob->buffer(); + auto *inData = locketMem.as(); + for (size_t i = 0; i < inBlob->size(); i++) + inData[i] = i; + } + + req.Infer(); + + // Check output + auto outBlob = req.GetBlob(cnnNet.getOutputsInfo().begin()->first); + { + auto inMem = inBlob->cbuffer(); + const auto* inData = inMem.as(); + auto outMem = outBlob->cbuffer(); + const auto* outData = outMem.as(); + ASSERT_EQ(inBlob->size(), outBlob->size()); + for (size_t i = 0; i < inBlob->size(); i++) + ASSERT_EQ(inData[i]*2, outData[i]); + } +} + } // namespace BehaviorTestsDefinitions diff --git a/inference-engine/tests/functional/plugin/shared/include/configuration_tests/dynamic_batch.hpp b/inference-engine/tests/functional/plugin/shared/include/configuration_tests/dynamic_batch.hpp new file mode 100644 index 00000000000000..973ae5b6e4bad9 --- /dev/null +++ b/inference-engine/tests/functional/plugin/shared/include/configuration_tests/dynamic_batch.hpp @@ -0,0 +1,44 @@ +// Copyright (C) 2020 Intel Corporation +// SPDX-License-Identifier: Apache-2.0 +// + +#pragma once + +#include +#include +#include +#include + +#include "shared_test_classes/base/layer_test_utils.hpp" + +namespace ConfigurationTestsDefinitions { +typedef std::tuple< + std::string, // Device + InferenceEngine::Precision, // Network precision + std::vector, // Batch sizes + bool, // Asynchronous execution + std::map // Additional configuration +> dynamicBatchTestParams; + +class DynamicBatchTest : public LayerTestsUtils::LayerTestsCommon, + public testing::WithParamInterface { +private: + bool run_async = false; + size_t max_batch_size = 0; + std::vector batch_sizes; + std::vector> reference_inputs; + std::vector> scaled_inputs; + std::vector>> reference_outputs; + std::vector> actual_outputs; + std::vector infer_requests; +protected: + void SetUp() override; + void Run() override; + + void LoadNetwork(); + void Infer() override; + void Validate() override; +public: + static std::string getTestCaseName(const testing::TestParamInfo &obj); +}; +} // namespace ConfigurationTestsDefinitions diff --git a/inference-engine/tests/functional/plugin/shared/include/execution_graph_tests/remove_parameter.hpp b/inference-engine/tests/functional/plugin/shared/include/execution_graph_tests/remove_parameter.hpp new file mode 100644 index 00000000000000..e2b19c5ef07611 --- /dev/null +++ b/inference-engine/tests/functional/plugin/shared/include/execution_graph_tests/remove_parameter.hpp @@ -0,0 +1,15 @@ +// Copyright (C) 2021 Intel Corporation +// SPDX-License-Identifier: Apache-2.0 +// + +#include "gtest/gtest.h" + +namespace ExecutionGraphTests { + +class ExecGraphRemoveParameterNode + : public testing::TestWithParam { +public: + static std::string getTestCaseName(testing::TestParamInfo obj); +}; + +} // namespace ExecutionGraphTests diff --git a/inference-engine/tests/functional/plugin/shared/include/low_precision_transformations/convolution_qdq_transformation.hpp b/inference-engine/tests/functional/plugin/shared/include/low_precision_transformations/convolution_qdq_transformation.hpp new file mode 100644 index 00000000000000..5eb91f469bf774 --- /dev/null +++ b/inference-engine/tests/functional/plugin/shared/include/low_precision_transformations/convolution_qdq_transformation.hpp @@ -0,0 +1,68 @@ +// Copyright (C) 2021 Intel Corporation +// SPDX-License-Identifier: Apache-2.0 +// + +#pragma once + +#include +#include + +#include "shared_test_classes/base/low_precision_transformations/layer_transformation.hpp" +#include "lpt_ngraph_functions/common/constant.hpp" +#include "lpt_ngraph_functions/common/dequantization_operations.hpp" +#include "lpt_ngraph_functions/common/fake_quantize_on_data.hpp" +#include "lpt_ngraph_functions/common/fake_quantize_on_weights.hpp" + +namespace LayerTestsDefinitions { + +class ConvolutionQDqTransformationParam { +public: + ngraph::builder::subgraph::FakeQuantizeOnDataWithConstant fakeQuantizeOnData; + ngraph::builder::subgraph::DequantizationOperations::Convert convertOnData; + ngraph::builder::subgraph::DequantizationOperations dequantizationOnData; + + ngraph::builder::subgraph::Constant constantOnWeights; + ngraph::builder::subgraph::FakeQuantizeOnWeights fakeQuantizeOnWeights; + ngraph::builder::subgraph::DequantizationOperations::Convert convertOnWeights; + ngraph::builder::subgraph::DequantizationOperations dequantizationOnWeights; + + std::string layerName; + std::string expectedKernelType; +}; + +inline std::ostream& operator<<(std::ostream& out, const ConvolutionQDqTransformationParam& data) { + return out << "_" << + data.fakeQuantizeOnData << "_" << + data.convertOnData << "_" << + data.dequantizationOnData << "_" << + + data.constantOnWeights << "_" << + data.fakeQuantizeOnWeights << "_" << + data.convertOnWeights << "_" << + data.dequantizationOnWeights << + + data.layerName << "_" << + data.expectedKernelType; +} + +typedef std::tuple< + ngraph::element::Type, + ngraph::Shape, + std::string, + ngraph::pass::low_precision::LayerTransformation::Params, + ConvolutionQDqTransformationParam +> ConvolutionQDqTransformationParams; + +class ConvolutionQDqTransformation : + public testing::WithParamInterface, + public LayerTestsUtils::LayerTransformation { +public: + static std::string getTestCaseName(testing::TestParamInfo obj); + +protected: + void SetUp() override; + + void Run() override; +}; + +} // namespace LayerTestsDefinitions diff --git a/inference-engine/tests/functional/plugin/shared/include/low_precision_transformations/fake_quantize_with_dq_not_optimal_transformation.hpp b/inference-engine/tests/functional/plugin/shared/include/low_precision_transformations/fake_quantize_with_dq_not_optimal_transformation.hpp new file mode 100644 index 00000000000000..369f8e172e280e --- /dev/null +++ b/inference-engine/tests/functional/plugin/shared/include/low_precision_transformations/fake_quantize_with_dq_not_optimal_transformation.hpp @@ -0,0 +1,69 @@ +// Copyright (C) 2021 Intel Corporation +// SPDX-License-Identifier: Apache-2.0 +// + +#pragma once + +#include +#include +#include "lpt_ngraph_functions/fake_quantize_function.hpp" +#include "shared_test_classes/base/low_precision_transformations/layer_transformation.hpp" + +#include "lpt_ngraph_functions/fake_quantize_and_convolution_function.hpp" +#include "lpt_ngraph_functions/common/dequantization_operations.hpp" +#include "lpt_ngraph_functions/common/constant.hpp" +#include "lpt_ngraph_functions/common/fake_quantize_on_data.hpp" +#include "lpt_ngraph_functions/common/fake_quantize_on_weights.hpp" + +namespace LayerTestsDefinitions { + +class FakeQuantizeWithNotOptimalTransformationTestValues { +public: + ngraph::builder::subgraph::FakeQuantizeOnDataWithConstant fqOnData; + ngraph::builder::subgraph::DequantizationOperations::Convert convertOnData; + ngraph::builder::subgraph::DequantizationOperations dequantizationOnData; + + ngraph::builder::subgraph::Constant constantOnWeights; + ngraph::builder::subgraph::FakeQuantizeOnWeights fqOnWeights; + ngraph::builder::subgraph::DequantizationOperations::Convert convertOnWeights; + ngraph::builder::subgraph::DequantizationOperations dequantizationOnWeights; + + ngraph::builder::subgraph::DequantizationOperations dequantizationAfter; + std::string expectedPrecision; +}; + +inline std::ostream& operator<<(std::ostream& out, const FakeQuantizeWithNotOptimalTransformationTestValues& data) { + return out << "_" << + data.fqOnData << "_" << + data.convertOnData << "_" << + data.dequantizationOnData << "_" << + + data.constantOnWeights << "_" << + data.fqOnWeights << "_" << + data.convertOnWeights << "_" << + data.dequantizationOnWeights << + + data.dequantizationAfter << "_" << + data.expectedPrecision; +} + +// ngraph::builder::subgraph::FakeQuantizeOnData +typedef std::tuple< + InferenceEngine::Precision, + InferenceEngine::SizeVector, + std::string, + ngraph::pass::low_precision::LayerTransformation::Params, + FakeQuantizeWithNotOptimalTransformationTestValues> FakeQuantizeTransformationParams; + +class FakeQuantizeWithNotOptimalTransformation : + public testing::WithParamInterface, + public LayerTestsUtils::LayerTransformation { +public: + static std::string getTestCaseName(testing::TestParamInfo obj); + +protected: + void SetUp() override; + void Run() override; +}; + +} // namespace LayerTestsDefinitions diff --git a/inference-engine/tests/functional/plugin/shared/include/low_precision_transformations/mat_mul_transformation.hpp b/inference-engine/tests/functional/plugin/shared/include/low_precision_transformations/mat_mul_transformation.hpp index 1ac5bc2cdc8078..15e849fda6f83e 100644 --- a/inference-engine/tests/functional/plugin/shared/include/low_precision_transformations/mat_mul_transformation.hpp +++ b/inference-engine/tests/functional/plugin/shared/include/low_precision_transformations/mat_mul_transformation.hpp @@ -19,6 +19,8 @@ class MatMulTransformationTestValues { ngraph::builder::subgraph::FakeQuantizeOnData fqOnData1; ngraph::Shape inputShape2; ngraph::builder::subgraph::FakeQuantizeOnData fqOnData2; + std::string expectedKernelName; + std::string expectedRuntimePrecision; }; typedef std::tuple< @@ -36,6 +38,7 @@ class MatMulTransformation : protected: void SetUp() override; + void Run() override; private: void validate(); diff --git a/inference-engine/tests/functional/plugin/shared/include/low_precision_transformations/mat_mul_with_constant_transformation.hpp b/inference-engine/tests/functional/plugin/shared/include/low_precision_transformations/mat_mul_with_constant_transformation.hpp index 98493955c1a7f9..8e858673c8def2 100644 --- a/inference-engine/tests/functional/plugin/shared/include/low_precision_transformations/mat_mul_with_constant_transformation.hpp +++ b/inference-engine/tests/functional/plugin/shared/include/low_precision_transformations/mat_mul_with_constant_transformation.hpp @@ -17,10 +17,10 @@ namespace LayerTestsDefinitions { class MatMulWithConstantTransformationTestValues { public: ngraph::Shape inputShape; - ngraph::builder::subgraph::FakeQuantizeOnData fqOnData; + ngraph::builder::subgraph::FakeQuantizeOnDataWithConstant fqOnData; ngraph::Shape weightsConstShape; std::vector weightsConstValues; - ngraph::builder::subgraph::FakeQuantizeOnWeights fqOnWeights; + ngraph::builder::subgraph::FakeQuantizeOnDataWithConstant fqOnWeights; std::string layerName; std::string expectedKernelType; }; diff --git a/inference-engine/tests/functional/plugin/shared/include/single_layer_tests/ctc_greedy_decoder_seq_len.hpp b/inference-engine/tests/functional/plugin/shared/include/single_layer_tests/ctc_greedy_decoder_seq_len.hpp new file mode 100644 index 00000000000000..28ff5a9ac1f7cf --- /dev/null +++ b/inference-engine/tests/functional/plugin/shared/include/single_layer_tests/ctc_greedy_decoder_seq_len.hpp @@ -0,0 +1,15 @@ +// Copyright (C) 2021 Intel Corporation +// SPDX-License-Identifier: Apache-2.0 +// + +#pragma once + +#include "shared_test_classes/single_layer/ctc_greedy_decoder_seq_len.hpp" + +namespace LayerTestsDefinitions { + +TEST_P(CTCGreedyDecoderSeqLenLayerTest, CompareWithRefs) { + Run(); +}; + +} // namespace LayerTestsDefinitions diff --git a/inference-engine/tests/functional/plugin/shared/include/single_layer_tests/mvn.hpp b/inference-engine/tests/functional/plugin/shared/include/single_layer_tests/mvn.hpp index 24cbc6169185ed..3366dc1f9ac2f4 100644 --- a/inference-engine/tests/functional/plugin/shared/include/single_layer_tests/mvn.hpp +++ b/inference-engine/tests/functional/plugin/shared/include/single_layer_tests/mvn.hpp @@ -1,4 +1,4 @@ -// Copyright (C) 2020 Intel Corporation +// Copyright (C) 2020-2021 Intel Corporation // SPDX-License-Identifier: Apache-2.0 // @@ -12,4 +12,8 @@ TEST_P(MvnLayerTest, CompareWithRefs) { Run(); }; +TEST_P(Mvn6LayerTest, CompareWithRefs) { + Run(); +}; + } // namespace LayerTestsDefinitions \ No newline at end of file diff --git a/inference-engine/tests/functional/plugin/shared/include/subgraph_tests/tensor_names.hpp b/inference-engine/tests/functional/plugin/shared/include/subgraph_tests/tensor_names.hpp new file mode 100644 index 00000000000000..d545e47705e445 --- /dev/null +++ b/inference-engine/tests/functional/plugin/shared/include/subgraph_tests/tensor_names.hpp @@ -0,0 +1,166 @@ +// Copyright (C) 2021 Intel Corporation +// SPDX-License-Identifier: Apache-2.0 +// + +#pragma once + +#include "shared_test_classes/subgraph/tensor_names.hpp" +#include + +namespace SubgraphTestsDefinitions { + +TEST_P(TensorNamesTest, CheckTensorNames) { + cnnNetwork = InferenceEngine::CNNNetwork{function}; + ConfigureNetwork(); + + auto inputs = cnnNetwork.getInputsInfo(); + auto outputs = cnnNetwork.getOutputsInfo(); + std::unordered_set inNames; + for (const auto& in : inputs) + inNames.emplace(in.first); + std::unordered_set outNames; + for (const auto& out : outputs) + outNames.emplace(out.first); + + for (const auto& param : function->get_parameters()) { + ASSERT_TRUE(inNames.count(cnnNetwork.getOVNameForOperation(param->get_friendly_name()))); + for (const auto& name : param->get_output_tensor(0).get_names()) + ASSERT_TRUE(inNames.count(cnnNetwork.getOVNameForTensor(name))); + } + + for (const auto& result : function->get_results()) { + ASSERT_TRUE(outNames.count(cnnNetwork.getOVNameForOperation(result->get_friendly_name()))); + for (const auto& name : result->input_value(0).get_tensor().get_names()) + ASSERT_TRUE(outNames.count(cnnNetwork.getOVNameForTensor(name))); + } + + executableNetwork = core->LoadNetwork(cnnNetwork, targetDevice, configuration); + inferRequest = executableNetwork.CreateInferRequest(); + + for (const auto& param : function->get_parameters()) { + ASSERT_NO_THROW(inferRequest.GetBlob(cnnNetwork.getOVNameForOperation(param->get_friendly_name()))); + for (const auto& name : param->get_output_tensor(0).get_names()) + ASSERT_NO_THROW(inferRequest.GetBlob(cnnNetwork.getOVNameForTensor(name))); + } + + for (const auto& result : function->get_results()) { + ASSERT_NO_THROW(inferRequest.GetBlob(cnnNetwork.getOVNameForOperation(result->get_friendly_name()))); + for (const auto& name : result->get_input_tensor(0).get_names()) { + ASSERT_NO_THROW(inferRequest.GetBlob(cnnNetwork.getOVNameForTensor(name))); + } + } +} + +TEST_P(TensorNamesTest, CheckTensorNamesAfterClone) { + cnnNetwork = InferenceEngine::CNNNetwork{function}; + InferenceEngine::CNNNetwork clonedNet(static_cast(cnnNetwork)); + ConfigureNetwork(); + + auto inputs = clonedNet.getInputsInfo(); + auto outputs = clonedNet.getOutputsInfo(); + std::unordered_set inNames; + for (const auto& in : inputs) + inNames.emplace(in.first); + std::unordered_set outNames; + for (const auto& out : outputs) + outNames.emplace(out.first); + + for (const auto& param : function->get_parameters()) { + ASSERT_TRUE(inNames.count(clonedNet.getOVNameForOperation(param->get_friendly_name()))); + for (const auto& name : param->get_output_tensor(0).get_names()) + ASSERT_TRUE(inNames.count(clonedNet.getOVNameForTensor(name))); + } + + for (const auto& result : function->get_results()) { + ASSERT_TRUE(outNames.count(clonedNet.getOVNameForOperation(result->get_friendly_name()))); + + for (const auto& name : result->get_input_tensor(0).get_names()) { + ASSERT_TRUE(outNames.count(clonedNet.getOVNameForTensor(name))); + } + } + + executableNetwork = core->LoadNetwork(clonedNet, targetDevice, configuration); + inferRequest = executableNetwork.CreateInferRequest(); + + for (const auto& param : function->get_parameters()) { + ASSERT_NO_THROW(inferRequest.GetBlob(clonedNet.getOVNameForOperation(param->get_friendly_name()))); + for (const auto& name : param->get_output_tensor(0).get_names()) + ASSERT_NO_THROW(inferRequest.GetBlob(clonedNet.getOVNameForTensor(name))); + } + + for (const auto& result : function->get_results()) { + ASSERT_NO_THROW(inferRequest.GetBlob(clonedNet.getOVNameForOperation(result->get_friendly_name()))); + for (const auto& name : result->input_value(0).get_tensor().get_names()) + ASSERT_NO_THROW(inferRequest.GetBlob(clonedNet.getOVNameForTensor(name))); + } +} + +TEST_P(TensorNamesTest, CheckAddOutput) { + SKIP_IF_CURRENT_TEST_IS_DISABLED(); + cnnNetwork = InferenceEngine::CNNNetwork{function}; + ConfigureNetwork(); + + auto inputs = cnnNetwork.getInputsInfo(); + auto outputs = cnnNetwork.getOutputsInfo(); + std::unordered_set inNames; + for (const auto& in : inputs) + inNames.emplace(in.first); + std::unordered_set outNames; + for (const auto& out : outputs) + outNames.emplace(out.first); + + ASSERT_EQ(1, inputs.size()); + ASSERT_EQ(1, outputs.size()); + ASSERT_EQ(1, function->get_results().size()); + + // Check that relu_prev doesn't exist in output and input maps + ASSERT_THROW(cnnNetwork.getOVNameForOperation("relu_prev"), InferenceEngine::NotFound); + for (const std::string& tensor_name : {"relu,prev_t", "identity_prev_t"}) { + ASSERT_THROW(cnnNetwork.getOVNameForOperation(tensor_name), InferenceEngine::NotFound); + } + + // Add relu_prev as output + cnnNetwork.addOutput("relu_prev"); + + inputs = cnnNetwork.getInputsInfo(); + outputs = cnnNetwork.getOutputsInfo(); + inNames.clear(); + for (const auto& in : inputs) + inNames.emplace(in.first); + outNames.clear(); + for (const auto& out : outputs) + outNames.emplace(out.first); + + ASSERT_EQ(1, inputs.size()); + ASSERT_EQ(2, outputs.size()); + ASSERT_EQ(2, function->get_results().size()); + + // Check that relu_prev exists in output map + ASSERT_FALSE(inNames.count(cnnNetwork.getOVNameForOperation("relu_prev"))); + for (const std::string& tensor_name : {"relu,prev_t", "identity_prev_t"}) { + ASSERT_FALSE(inNames.count(cnnNetwork.getOVNameForTensor(tensor_name))); + } + ASSERT_TRUE(outNames.count(cnnNetwork.getOVNameForOperation("relu_prev"))); + for (const std::string& tensor_name : {"relu,prev_t", "identity_prev_t"}) { + ASSERT_TRUE(outNames.count(cnnNetwork.getOVNameForTensor(tensor_name))); + } + + executableNetwork = core->LoadNetwork(cnnNetwork, targetDevice, configuration); + inferRequest = executableNetwork.CreateInferRequest(); + + for (const auto& param : cnnNetwork.getFunction()->get_parameters()) { + ASSERT_NO_THROW(inferRequest.GetBlob(cnnNetwork.getOVNameForOperation(param->get_friendly_name()))); + for (const auto& name : param->get_output_tensor(0).get_names()) + ASSERT_NO_THROW(inferRequest.GetBlob(cnnNetwork.getOVNameForTensor(name))); + } + + for (const auto& result : cnnNetwork.getFunction()->get_results()) { + ASSERT_NO_THROW(inferRequest.GetBlob(cnnNetwork.getOVNameForOperation(result->get_friendly_name()))); + for (const auto& name : result->get_input_tensor(0).get_names()) { + ASSERT_NO_THROW(inferRequest.GetBlob(cnnNetwork.getOVNameForTensor(name))); + } + } +} + +} // namespace SubgraphTestsDefinitions + diff --git a/inference-engine/tests/functional/plugin/shared/src/behavior/invalid_cases/proposal.cpp b/inference-engine/tests/functional/plugin/shared/src/behavior/invalid_cases/proposal.cpp index 3ba5ed9a5316c9..2292a8b6a293a9 100644 --- a/inference-engine/tests/functional/plugin/shared/src/behavior/invalid_cases/proposal.cpp +++ b/inference-engine/tests/functional/plugin/shared/src/behavior/invalid_cases/proposal.cpp @@ -93,6 +93,11 @@ void ProposalBehTest::SetUp() { function = std::make_shared(results, params, "proposal"); } +void ProposalBehTest::Run() { + LoadNetwork(); + Infer(); +} + TEST_P(ProposalBehTest, CompareWithRefs) { ASSERT_THROW(Run(), InferenceEngine::details::InferenceEngineException); } diff --git a/inference-engine/tests/functional/plugin/shared/src/configuration_tests/dynamic_batch.cpp b/inference-engine/tests/functional/plugin/shared/src/configuration_tests/dynamic_batch.cpp new file mode 100644 index 00000000000000..f1144bab8af0d5 --- /dev/null +++ b/inference-engine/tests/functional/plugin/shared/src/configuration_tests/dynamic_batch.cpp @@ -0,0 +1,164 @@ +// Copyright (C) 2021 Intel Corporation +// SPDX-License-Identifier: Apache-2.0 +// + +#include +#include +#include +#include +#include +#include + +#include "ie_core.hpp" + +#include "ie_transformations.hpp" +#include "common_test_utils/common_utils.hpp" +#include "functional_test_utils/blob_utils.hpp" +#include "functional_test_utils/precision_utils.hpp" +#include "functional_test_utils/plugin_cache.hpp" +#include "functional_test_utils/skip_tests_config.hpp" +#include "ngraph_functions/utils/ngraph_helpers.hpp" +#include "ngraph_functions/builders.hpp" + +#include +#include "transformations/control_flow/unroll_tensor_iterator.hpp" +#include "configuration_tests/dynamic_batch.hpp" + +#include "ngraph_functions/subgraph_builders.hpp" + +namespace ConfigurationTestsDefinitions { + + std::string DynamicBatchTest::getTestCaseName(const testing::TestParamInfo &obj) { + std::string targetDevice; + InferenceEngine::Precision netPrecision; + std::vector batchSizes; + bool runAsync; + std::map config; + std::tie(targetDevice, netPrecision, batchSizes, runAsync, config) = obj.param; + std::ostringstream result; + + result << "netPrecision=" << netPrecision.name() << "_"; + result << "BS=" << CommonTestUtils::vec2str(batchSizes) << "_"; + result << std::string(runAsync ? "Async" : "Sync") << "_"; + result << "targetDevice=" << targetDevice; + return result.str(); + } + + size_t hiddenSize; + + + void DynamicBatchTest::SetUp() { + InferenceEngine::Precision netPrecision; + std::map config; + std::tie(targetDevice, netPrecision, batch_sizes, run_async, config) = this->GetParam(); + configuration.insert(config.begin(), config.end()); + configuration[InferenceEngine::PluginConfigParams::KEY_DYN_BATCH_ENABLED] = InferenceEngine::PluginConfigParams::YES; + + max_batch_size = *std::max_element(batch_sizes.begin(), batch_sizes.end()); + + function = ngraph::builder::subgraph::makeSingleConv(); + } + + void DynamicBatchTest::LoadNetwork() { + cnnNetwork = InferenceEngine::CNNNetwork{function}; + ConfigureNetwork(); + cnnNetwork.setBatchSize(max_batch_size); + executableNetwork = core->LoadNetwork(cnnNetwork, targetDevice, configuration); + } + + void DynamicBatchTest::Infer() { + inferRequest = executableNetwork.CreateInferRequest(); + inputs.clear(); + + for (int i = 0; i < batch_sizes.size(); i++) { + auto batch_size = batch_sizes[i]; + + cnnNetwork.setBatchSize(batch_size); + inputs.clear(); + for (const auto &input : cnnNetwork.getInputsInfo()) { + const auto &info = input.second; + auto blob = GenerateInput(*info); + inputs.push_back(blob); + } + reference_inputs.push_back(inputs); + reference_outputs.push_back(CalculateRefs()); + } + + for (int i = 0; i < batch_sizes.size(); i++) { + infer_requests.push_back(executableNetwork.CreateInferRequest()); + auto batch_size = batch_sizes[i]; + + auto& infer_request = infer_requests[i]; + infer_request.SetBatch(batch_size); + + inputs.clear(); + for (const auto &input : executableNetwork.GetInputsInfo()) { + const auto &info = input.second; + auto blob = GenerateInput(*info); + infer_request.SetBlob(info->name(), blob); + inputs.push_back(blob); + } + + scaled_inputs.push_back(inputs); + + for (int j = 0; j < reference_inputs[i].size(); j++) { + auto& ref = reference_inputs[i][j]; + auto& actual = scaled_inputs[i][j]; + + auto byte_num = ref->byteSize(); + auto ref_ptr = ref->buffer().as(); + auto actual_ptr = actual->buffer().as(); + + for (int k = 0; k < byte_num; k++) { + actual_ptr[k] = ref_ptr[k]; + } + } + } + + for (auto& infer_request : infer_requests) { + if (run_async) { + infer_request.StartAsync(); + } else { + infer_request.Infer(); + } + } + + if (run_async) { + for (auto& infer_request : infer_requests) { + auto status = infer_request.Wait(10000); + if (status != InferenceEngine::StatusCode::OK) { + GTEST_FAIL() << "Inference request status after wait is not OK"; + } + } + } + } + + void DynamicBatchTest::Validate() { + for (int i = 0; i < infer_requests.size(); i++) { + auto outputs = std::vector{}; + for (const auto &output : executableNetwork.GetOutputsInfo()) { + const auto &name = output.first; + outputs.push_back(infer_requests[i].GetBlob(name)); + } + for (int j = 0; j < reference_outputs[i].size(); j++) { + if (reference_outputs[i][j].size() < outputs[j]->byteSize()) { + auto actual_ptr = outputs[j]->buffer().as(); + for (int k = reference_outputs[i][j].size(); k < outputs[j]->byteSize(); k++) actual_ptr[k] = 0; + reference_outputs[i][j].resize(outputs[j]->byteSize()); + } + } + Compare(reference_outputs[i], outputs); + } + } + + void DynamicBatchTest::Run() { + SKIP_IF_CURRENT_TEST_IS_DISABLED(); + LoadNetwork(); + Infer(); + Validate(); + } + + TEST_P(DynamicBatchTest, CompareWithRefs) { + Run(); + }; +} // namespace ConfigurationTestsDefinitions diff --git a/inference-engine/tests/functional/plugin/shared/src/execution_graph_tests/remove_parameter.cpp b/inference-engine/tests/functional/plugin/shared/src/execution_graph_tests/remove_parameter.cpp new file mode 100644 index 00000000000000..3f23df27a1833e --- /dev/null +++ b/inference-engine/tests/functional/plugin/shared/src/execution_graph_tests/remove_parameter.cpp @@ -0,0 +1,110 @@ +// Copyright (C) 2021 Intel Corporation +// SPDX-License-Identifier: Apache-2.0 +// + +#include "execution_graph_tests/remove_parameter.hpp" +#include "functional_test_utils/skip_tests_config.hpp" + +#include +#include + +namespace ExecutionGraphTests { + +std::string ExecGraphRemoveParameterNode::getTestCaseName( + testing::TestParamInfo obj) { + std::string targetDevice = obj.param; + return "Dev=" + targetDevice; +} + +/** + * Replacing parameter by another node change indexing for other parameters, + * check that we still can correctly process changed network. + */ +TEST_P(ExecGraphRemoveParameterNode, RemoveParameterNode) { + SKIP_IF_CURRENT_TEST_IS_DISABLED() + + auto device_name = this->GetParam(); + ngraph::Shape shape = {3, 2}; + float in_data_2[6] = {1.0, 1.0, 1.0, 1.0, 1.0, 1.0}; + float in_data[6] = {1.0, 2.0, 3.0, 4.0, 5.0, 6.0}; + ngraph::element::Type type = ngraph::element::f32; + + using std::make_shared; + using namespace ngraph::op; + + // Some simple graph with 2 Parameters + // in2 in1 // + // \ / | // + // mul | // + // \ | // + // sum // + // | // + // out // + auto input = make_shared(type, shape); + auto input2 = make_shared(type, shape); + auto mul = make_shared(input2, input); + auto sum = make_shared(mul, input); + + auto function = std::make_shared( + ngraph::NodeVector{sum}, ngraph::ParameterVector{input2, input}, + "SimpleNet"); + + // Load into plugin and get exec graph + auto ie = InferenceEngine::Core(); + auto net = InferenceEngine::CNNNetwork(function); + auto exec_net = ie.LoadNetwork(net, device_name); + auto exec_graph = exec_net.GetExecGraphInfo(); + auto infer_req = exec_net.CreateInferRequest(); + InferenceEngine::TensorDesc tDesc(InferenceEngine::Precision::FP32, shape, + InferenceEngine::Layout::NC); + InferenceEngine::Blob::Ptr inBlob2 = + InferenceEngine::make_shared_blob(tDesc, in_data_2); + infer_req.SetBlob(input2->get_name(), inBlob2); + + InferenceEngine::Blob::Ptr inBlob = + InferenceEngine::make_shared_blob(tDesc, in_data); + infer_req.SetBlob(input->get_name(), inBlob); + + infer_req.Infer(); + + auto outBlob = infer_req.GetBlob(sum->get_name()); + InferenceEngine::MemoryBlob::CPtr output = + InferenceEngine::as(outBlob); + auto outputHolder = output->rmap(); + const auto ref_result = outputHolder.as(); + + ASSERT_EQ(function->get_parameter_index(input2), 0); + ASSERT_EQ(function->get_parameter_index(input), 1); + + // Replace input2 by constant + auto const_in = + make_shared(type, shape, std::vector(6, 1.0)); + mul->input(0).replace_source_output(const_in->output(0)); + function->remove_parameter(input2); + + ASSERT_EQ(function->get_parameters().size(), 1); + ASSERT_EQ(function->get_parameter_index(input), 0); + + // Load new function into plugin and get exec graph + auto new_net = InferenceEngine::CNNNetwork(function); + auto new_exec_net = ie.LoadNetwork(new_net, device_name); + auto new_exec_graph = new_exec_net.GetExecGraphInfo(); + + // infer new graph + auto new_infer_req = new_exec_net.CreateInferRequest(); + new_infer_req.SetBlob(input->get_name(), inBlob); + + new_infer_req.Infer(); + + auto new_outBlob = new_infer_req.GetBlob(sum->get_name()); + InferenceEngine::MemoryBlob::CPtr new_output = + InferenceEngine::as(new_outBlob); + auto new_outputHolder = new_output->rmap(); + const auto result = new_outputHolder.as(); + + for (int i = 0; i < 6; i++) { + ASSERT_NEAR(result[i], ref_result[i], 1e-5); + } +} + +} // namespace ExecutionGraphTests diff --git a/inference-engine/tests/functional/plugin/shared/src/low_precision_transformations/convolution_qdq_transformation.cpp b/inference-engine/tests/functional/plugin/shared/src/low_precision_transformations/convolution_qdq_transformation.cpp new file mode 100644 index 00000000000000..a4780649188737 --- /dev/null +++ b/inference-engine/tests/functional/plugin/shared/src/low_precision_transformations/convolution_qdq_transformation.cpp @@ -0,0 +1,70 @@ +// Copyright (C) 2021 Intel Corporation +// SPDX-License-Identifier: Apache-2.0 +// + +#include "low_precision_transformations/convolution_qdq_transformation.hpp" + +#include +#include +#include +#include + +#include + +#include "common_test_utils/common_utils.hpp" +#include "functional_test_utils/plugin_cache.hpp" +#include "shared_test_classes/base/layer_test_utils.hpp" +#include "functional_test_utils/blob_utils.hpp" +#include "ngraph_functions/pass/convert_prc.hpp" +#include "lpt_ngraph_functions/fake_quantize_and_convolution_function.hpp" + +namespace LayerTestsDefinitions { + +std::string ConvolutionQDqTransformation::getTestCaseName(testing::TestParamInfo obj) { + ngraph::element::Type netPrecision; + ngraph::Shape inputShape; + std::string targetDevice; + ngraph::pass::low_precision::LayerTransformation::Params params; + ConvolutionQDqTransformationParam param; + std::tie(netPrecision, inputShape, targetDevice, params, param) = obj.param; + + std::ostringstream result; + result << getTestCaseNameByParams(netPrecision, inputShape, targetDevice, params) << param; + return result.str(); +} + +void ConvolutionQDqTransformation::SetUp() { + // threshold = 0.1f; + + ngraph::element::Type netPrecision; + ngraph::Shape inputShape; + ngraph::pass::low_precision::LayerTransformation::Params params; + ConvolutionQDqTransformationParam param; + std::tie(netPrecision, inputShape, targetDevice, params, param) = this->GetParam(); + + function = ngraph::builder::subgraph::FakeQuantizeAndConvolutionFunction::get( + netPrecision, + inputShape, + param.fakeQuantizeOnData, + param.convertOnData, + param.dequantizationOnData, + param.constantOnWeights, + param.fakeQuantizeOnWeights, + param.convertOnWeights, + param.dequantizationOnWeights, + {}); +} + +void ConvolutionQDqTransformation::Run() { + LayerTestsCommon::Run(); + + const auto params = std::get<4>(GetParam()); + const auto actualType = getRuntimePrecision(params.layerName); + EXPECT_EQ(actualType, params.expectedKernelType); +} + +TEST_P(ConvolutionQDqTransformation, CompareWithRefImpl) { + Run(); +}; + +} // namespace LayerTestsDefinitions diff --git a/inference-engine/tests/functional/plugin/shared/src/low_precision_transformations/convolution_transformation.cpp b/inference-engine/tests/functional/plugin/shared/src/low_precision_transformations/convolution_transformation.cpp index f9d8df8d7c51dd..6d746e9846153f 100755 --- a/inference-engine/tests/functional/plugin/shared/src/low_precision_transformations/convolution_transformation.cpp +++ b/inference-engine/tests/functional/plugin/shared/src/low_precision_transformations/convolution_transformation.cpp @@ -1,4 +1,4 @@ -// Copyright (C) 2020 Intel Corporation +// Copyright (C) 2020-2021 Intel Corporation // SPDX-License-Identifier: Apache-2.0 // @@ -29,8 +29,8 @@ std::string ConvolutionTransformation::getTestCaseName(testing::TestParamInfoGetParam(); - function = ngraph::builder::subgraph::FakeQuantizeAndConvolutionFunction::getOriginal( + function = ngraph::builder::subgraph::FakeQuantizeAndConvolutionFunction::get( netPrecision, inputShape, // TODO: pass from test parameters @@ -78,7 +78,12 @@ void ConvolutionTransformation::validate() { ASSERT_FALSE(parent == nullptr); const std::string typeName = parent->get_type_name(); - if (param.fakeQuantizeOnData.empty() || param.fakeQuantizeOnWeights.empty()) { + const auto isQuantizationSupported = [](const ngraph::builder::subgraph::FakeQuantizeOnData& fq) { + return (fq.quantizationLevel == 255) || (fq.quantizationLevel == 256); + }; + + if (param.fakeQuantizeOnData.empty() || (!isQuantizationSupported(param.fakeQuantizeOnData)) || + param.fakeQuantizeOnWeights.empty() || (!isQuantizationSupported(param.fakeQuantizeOnWeights))) { ASSERT_EQ("ConvolutionIE", typeName); } else { ASSERT_EQ("ScaleShiftIE", typeName); diff --git a/inference-engine/tests/functional/plugin/shared/src/low_precision_transformations/fake_quantize_with_dq_not_optimal_transformation.cpp b/inference-engine/tests/functional/plugin/shared/src/low_precision_transformations/fake_quantize_with_dq_not_optimal_transformation.cpp new file mode 100644 index 00000000000000..3e005334d709b7 --- /dev/null +++ b/inference-engine/tests/functional/plugin/shared/src/low_precision_transformations/fake_quantize_with_dq_not_optimal_transformation.cpp @@ -0,0 +1,63 @@ +// Copyright (C) 2021 Intel Corporation +// SPDX-License-Identifier: Apache-2.0 +// + +#include "low_precision_transformations/fake_quantize_with_dq_not_optimal_transformation.hpp" + +#include +#include +#include +#include +#include + +#include +#include "lpt_ngraph_functions/fake_quantize_and_convolution_function.hpp" + +namespace LayerTestsDefinitions { + +std::string FakeQuantizeWithNotOptimalTransformation::getTestCaseName(testing::TestParamInfo obj) { + InferenceEngine::Precision netPrecision; + InferenceEngine::SizeVector inputShapes; + std::string targetDevice; + ngraph::pass::low_precision::LayerTransformation::Params params; + FakeQuantizeWithNotOptimalTransformationTestValues testValues; + std::tie(netPrecision, inputShapes, targetDevice, params, testValues) = obj.param; + + std::ostringstream result; + result << getTestCaseNameByParams(netPrecision, inputShapes, targetDevice, params) << "_" << testValues; + return result.str(); +} + +void FakeQuantizeWithNotOptimalTransformation::SetUp() { + InferenceEngine::SizeVector inputShape; + InferenceEngine::Precision netPrecision; + ngraph::pass::low_precision::LayerTransformation::Params params; + FakeQuantizeWithNotOptimalTransformationTestValues testValues; + std::tie(netPrecision, inputShape, targetDevice, params, testValues) = this->GetParam(); + + function = ngraph::builder::subgraph::FakeQuantizeAndConvolutionFunction::get( + FuncTestUtils::PrecisionUtils::convertIE2nGraphPrc(netPrecision), + inputShape, + testValues.fqOnData, + testValues.convertOnData, + testValues.dequantizationOnData, + testValues.constantOnWeights, + testValues.fqOnWeights, + testValues.convertOnWeights, + testValues.dequantizationOnWeights, + testValues.dequantizationAfter); +} + +void FakeQuantizeWithNotOptimalTransformation::Run() { + LayerTestsCommon::Run(); + + const auto params = std::get<4>(GetParam()); + const auto actualType = getRuntimePrecision("output_original"); + EXPECT_EQ(actualType, params.expectedPrecision); +} + +TEST_P(FakeQuantizeWithNotOptimalTransformation, CompareWithRefImpl) { + Run(); +}; + +} // namespace LayerTestsDefinitions diff --git a/inference-engine/tests/functional/plugin/shared/src/low_precision_transformations/fuse_convert_transformation.cpp b/inference-engine/tests/functional/plugin/shared/src/low_precision_transformations/fuse_convert_transformation.cpp index 5d8228f975cc10..72b58ce078832f 100644 --- a/inference-engine/tests/functional/plugin/shared/src/low_precision_transformations/fuse_convert_transformation.cpp +++ b/inference-engine/tests/functional/plugin/shared/src/low_precision_transformations/fuse_convert_transformation.cpp @@ -1,4 +1,4 @@ -// Copyright (C) 2020 Intel Corporation +// Copyright (C) 2020-2021 Intel Corporation // SPDX-License-Identifier: Apache-2.0 // @@ -19,8 +19,6 @@ #include "ngraph_functions/pass/convert_prc.hpp" #include "lpt_ngraph_functions/fuse_convert_function.hpp" -#include - namespace LayerTestsDefinitions { std::string FuseConvertTransformation::getTestCaseName(testing::TestParamInfo obj) { diff --git a/inference-engine/tests/functional/plugin/shared/src/low_precision_transformations/mat_mul_transformation.cpp b/inference-engine/tests/functional/plugin/shared/src/low_precision_transformations/mat_mul_transformation.cpp index 86e72caffaaca9..4844b02359ccb5 100644 --- a/inference-engine/tests/functional/plugin/shared/src/low_precision_transformations/mat_mul_transformation.cpp +++ b/inference-engine/tests/functional/plugin/shared/src/low_precision_transformations/mat_mul_transformation.cpp @@ -82,7 +82,7 @@ void MatMulTransformation::validate() { MatMulTransformationTestValues testValues; std::tie(precision, inputShape, targetDevice, testValues) = this->GetParam(); - const auto params = LayerTestsUtils::LayerTransformationParamsNGraphFactory::createParamsU8I8(); + const auto params = LayerTestsUtils::LayerTransformationParamsNGraphFactory::createParams(); const auto transformed = transformNGraph(params, getLowPrecisionTransformationsNGraph(params)); const auto output = transformed->get_output_op(0); @@ -91,6 +91,15 @@ void MatMulTransformation::validate() { ASSERT_EQ("ScaleShiftIE", typeName); } +void MatMulTransformation::Run() { + LayerTestsCommon::Run(); + + const auto params = std::get<3>(GetParam()); + const auto actualType = getRuntimePrecision(params.expectedKernelName); + + EXPECT_EQ(actualType, params.expectedRuntimePrecision); +} + TEST_P(MatMulTransformation, CompareWithRefImpl) { Run(); }; diff --git a/inference-engine/tests/functional/plugin/shared/src/low_precision_transformations/mat_mul_with_constant_transformation.cpp b/inference-engine/tests/functional/plugin/shared/src/low_precision_transformations/mat_mul_with_constant_transformation.cpp index a519b6e32e47b4..b82968d20092dc 100644 --- a/inference-engine/tests/functional/plugin/shared/src/low_precision_transformations/mat_mul_with_constant_transformation.cpp +++ b/inference-engine/tests/functional/plugin/shared/src/low_precision_transformations/mat_mul_with_constant_transformation.cpp @@ -27,6 +27,7 @@ std::string MatMulWithConstantTransformation::getTestCaseName(testing::TestParam std::ostringstream result; result << + testValues.inputShape << "_" << precision << "_" << targetDevice << "_" << testValues.fqOnData << "_" << @@ -78,13 +79,13 @@ void MatMulWithConstantTransformation::validate() { MatMulWithConstantTransformationTestValues testValues; std::tie(precision, targetDevice, testValues) = this->GetParam(); - const auto params = LayerTestsUtils::LayerTransformationParamsNGraphFactory::createParamsU8I8(); + const auto params = LayerTestsUtils::LayerTransformationParamsNGraphFactory::createParams(); const auto transformed = transformNGraph(params, getLowPrecisionTransformationsNGraph(params)); const auto output = transformed->get_output_op(0); const auto scaleShift = output->get_input_node_shared_ptr(0); const std::string typeName = scaleShift->get_type_name(); - ASSERT_EQ("ScaleShiftIE", typeName); + ASSERT_TRUE("ScaleShiftIE" == typeName || "Eltwise" == typeName); } void MatMulWithConstantTransformation::Run() { diff --git a/inference-engine/tests/functional/plugin/shared/src/low_precision_transformations/multiply_to_group_convolution_transformation.cpp b/inference-engine/tests/functional/plugin/shared/src/low_precision_transformations/multiply_to_group_convolution_transformation.cpp index 4c306189ef18d4..eb31edb8d8df17 100644 --- a/inference-engine/tests/functional/plugin/shared/src/low_precision_transformations/multiply_to_group_convolution_transformation.cpp +++ b/inference-engine/tests/functional/plugin/shared/src/low_precision_transformations/multiply_to_group_convolution_transformation.cpp @@ -1,4 +1,4 @@ -// Copyright (C) 2020 Intel Corporation +// Copyright (C) 2020-2021 Intel Corporation // SPDX-License-Identifier: Apache-2.0 // @@ -19,8 +19,6 @@ #include "ngraph_functions/pass/convert_prc.hpp" #include "lpt_ngraph_functions/multiply_to_group_convolution_function.hpp" -#include - namespace LayerTestsDefinitions { std::string MultiplyToGroupConvolutionTransformation::getTestCaseName(testing::TestParamInfo obj) { diff --git a/inference-engine/tests/functional/plugin/shared/src/low_precision_transformations/mvn_transformation.cpp b/inference-engine/tests/functional/plugin/shared/src/low_precision_transformations/mvn_transformation.cpp index c1a21b5db3225f..0e2295314680c2 100644 --- a/inference-engine/tests/functional/plugin/shared/src/low_precision_transformations/mvn_transformation.cpp +++ b/inference-engine/tests/functional/plugin/shared/src/low_precision_transformations/mvn_transformation.cpp @@ -1,4 +1,4 @@ -// Copyright (C) 2020 Intel Corporation +// Copyright (C) 2020-2021 Intel Corporation // SPDX-License-Identifier: Apache-2.0 // @@ -19,8 +19,6 @@ #include "ngraph_functions/pass/convert_prc.hpp" #include "lpt_ngraph_functions/mvn_function.hpp" -#include - namespace LayerTestsDefinitions { std::string MVNTransformation::getTestCaseName(testing::TestParamInfo obj) { diff --git a/inference-engine/tests/functional/plugin/shared/src/low_precision_transformations/squeeze_transformation.cpp b/inference-engine/tests/functional/plugin/shared/src/low_precision_transformations/squeeze_transformation.cpp index 80fd06ae846993..636326372a194a 100644 --- a/inference-engine/tests/functional/plugin/shared/src/low_precision_transformations/squeeze_transformation.cpp +++ b/inference-engine/tests/functional/plugin/shared/src/low_precision_transformations/squeeze_transformation.cpp @@ -1,4 +1,4 @@ -// Copyright (C) 2020 Intel Corporation +// Copyright (C) 2020-2021 Intel Corporation // SPDX-License-Identifier: Apache-2.0 // @@ -15,8 +15,6 @@ #include "ngraph_functions/subgraph_builders.hpp" #include "lpt_ngraph_functions/squeeze_function.hpp" -#include - namespace LayerTestsDefinitions { inline std::ostream& operator<<(std::ostream& os, const std::vector& values) { diff --git a/inference-engine/tests/functional/shared_test_classes/include/shared_test_classes/base/low_precision_transformations/layer_transformation.hpp b/inference-engine/tests/functional/shared_test_classes/include/shared_test_classes/base/low_precision_transformations/layer_transformation.hpp index 4f352d60cc5eef..05c07533da738f 100644 --- a/inference-engine/tests/functional/shared_test_classes/include/shared_test_classes/base/low_precision_transformations/layer_transformation.hpp +++ b/inference-engine/tests/functional/shared_test_classes/include/shared_test_classes/base/low_precision_transformations/layer_transformation.hpp @@ -1,4 +1,4 @@ -// Copyright (C) 2020 Intel Corporation +// Copyright (C) 2020-2021 Intel Corporation // SPDX-License-Identifier: Apache-2.0 // @@ -17,6 +17,7 @@ namespace LayerTestsUtils { class LayerTransformationParamsNGraphFactory { public: + static ngraph::pass::low_precision::LayerTransformation::Params createParamsU8I8AndI8(); static ngraph::pass::low_precision::LayerTransformation::Params createParamsU8I8(); static ngraph::pass::low_precision::LayerTransformation::Params createParamsI8I8(); static ngraph::pass::low_precision::LayerTransformation::Params createParams(); diff --git a/inference-engine/tests/functional/shared_test_classes/include/shared_test_classes/single_layer/ctc_greedy_decoder.hpp b/inference-engine/tests/functional/shared_test_classes/include/shared_test_classes/single_layer/ctc_greedy_decoder.hpp index 5824f6b69d0a7a..728b3b8b2c4f76 100644 --- a/inference-engine/tests/functional/shared_test_classes/include/shared_test_classes/single_layer/ctc_greedy_decoder.hpp +++ b/inference-engine/tests/functional/shared_test_classes/include/shared_test_classes/single_layer/ctc_greedy_decoder.hpp @@ -1,32 +1,15 @@ -// Copyright (C) 2020 Intel Corporation +// Copyright (C) 2020-2021 Intel Corporation // SPDX-License-Identifier: Apache-2.0 // #pragma once -#include -#include -#include -#include -#include -#include -#include #include +#include +#include +#include - -#include "ie_core.hpp" -#include "ie_precision.hpp" -#include "details/ie_exception.hpp" - -#include "ngraph/opsets/opset1.hpp" - -#include "functional_test_utils/blob_utils.hpp" #include "shared_test_classes/base/layer_test_utils.hpp" -#include "common_test_utils/common_utils.hpp" - -#include "ngraph_functions/utils/ngraph_helpers.hpp" -#include "ngraph_functions/builders.hpp" - namespace LayerTestsDefinitions { typedef std::tuple< @@ -46,10 +29,6 @@ class CTCGreedyDecoderLayerTest static std::string getTestCaseName(const testing::TestParamInfo& obj); protected: - InferenceEngine::SizeVector inputShapes; - InferenceEngine::SizeVector sequenceLengths; - bool mergeRepeated; - void SetUp() override; }; diff --git a/inference-engine/tests/functional/shared_test_classes/include/shared_test_classes/single_layer/ctc_greedy_decoder_seq_len.hpp b/inference-engine/tests/functional/shared_test_classes/include/shared_test_classes/single_layer/ctc_greedy_decoder_seq_len.hpp new file mode 100644 index 00000000000000..bfc8e0cde64cec --- /dev/null +++ b/inference-engine/tests/functional/shared_test_classes/include/shared_test_classes/single_layer/ctc_greedy_decoder_seq_len.hpp @@ -0,0 +1,34 @@ +// Copyright (C) 2021 Intel Corporation +// SPDX-License-Identifier: Apache-2.0 +// + +#pragma once + +#include +#include +#include +#include + +#include "shared_test_classes/base/layer_test_utils.hpp" + +namespace LayerTestsDefinitions { +typedef std::tuple< + InferenceEngine::SizeVector, // Input shape + InferenceEngine::Precision, // Probabilities precision + InferenceEngine::Precision, // Indices precision + int, // Blank index + bool, // Merge repeated + std::string // Device name + > ctcGreedyDecoderSeqLenParams; + +class CTCGreedyDecoderSeqLenLayerTest + : public testing::WithParamInterface, + virtual public LayerTestsUtils::LayerTestsCommon { +public: + static std::string getTestCaseName(const testing::TestParamInfo& obj); + +protected: + void SetUp() override; +}; + +} // namespace LayerTestsDefinitions diff --git a/inference-engine/tests/functional/shared_test_classes/include/shared_test_classes/single_layer/mvn.hpp b/inference-engine/tests/functional/shared_test_classes/include/shared_test_classes/single_layer/mvn.hpp index ad8372225593f9..771eba2fa5a0aa 100644 --- a/inference-engine/tests/functional/shared_test_classes/include/shared_test_classes/single_layer/mvn.hpp +++ b/inference-engine/tests/functional/shared_test_classes/include/shared_test_classes/single_layer/mvn.hpp @@ -1,4 +1,4 @@ -// Copyright (C) 2020 Intel Corporation +// Copyright (C) 2020-2021 Intel Corporation // SPDX-License-Identifier: Apache-2.0 // @@ -8,7 +8,6 @@ #include #include "shared_test_classes/base/layer_test_utils.hpp" -#include "ngraph_functions/builders.hpp" namespace LayerTestsDefinitions { @@ -22,10 +21,29 @@ typedef std::tuple< class MvnLayerTest : public testing::WithParamInterface, virtual public LayerTestsUtils::LayerTestsCommon { public: - static std::string getTestCaseName(testing::TestParamInfo obj); + static std::string getTestCaseName(const testing::TestParamInfo& obj); protected: void SetUp() override; }; -} // namespace LayerTestsDefinitions \ No newline at end of file +typedef std::tuple< + InferenceEngine::SizeVector, // Input shapes + InferenceEngine::Precision, // Data precision + InferenceEngine::Precision, // Axes precision + std::vector, // Axes + bool, // Normalize variance + float, // Epsilon + std::string, // Epsilon mode + std::string // Device name + > mvn6Params; + +class Mvn6LayerTest : public testing::WithParamInterface, virtual public LayerTestsUtils::LayerTestsCommon { +public: + static std::string getTestCaseName(const testing::TestParamInfo& obj); + +protected: + void SetUp() override; +}; + +} // namespace LayerTestsDefinitions diff --git a/inference-engine/tests/functional/shared_test_classes/include/shared_test_classes/subgraph/tensor_names.hpp b/inference-engine/tests/functional/shared_test_classes/include/shared_test_classes/subgraph/tensor_names.hpp new file mode 100644 index 00000000000000..dfa2cbeaa259d7 --- /dev/null +++ b/inference-engine/tests/functional/shared_test_classes/include/shared_test_classes/subgraph/tensor_names.hpp @@ -0,0 +1,28 @@ +// Copyright (C) 2021 Intel Corporation +// SPDX-License-Identifier: Apache-2.0 +// + +#pragma once + +#include +#include +#include +#include + +#include "shared_test_classes/base/layer_test_utils.hpp" +#include "ngraph_functions/builders.hpp" + +namespace SubgraphTestsDefinitions { + +typedef std::tuple< + std::string // Device name +> constResultParams; + +class TensorNamesTest : public testing::WithParamInterface, + virtual public LayerTestsUtils::LayerTestsCommon { +public: + static std::string getTestCaseName(testing::TestParamInfo obj); +protected: + void SetUp() override; +}; +} // namespace SubgraphTestsDefinitions diff --git a/inference-engine/tests/functional/shared_test_classes/src/base/layer_test_utils.cpp b/inference-engine/tests/functional/shared_test_classes/src/base/layer_test_utils.cpp index 5b6d6e8faa3253..402e5ffd319af0 100644 --- a/inference-engine/tests/functional/shared_test_classes/src/base/layer_test_utils.cpp +++ b/inference-engine/tests/functional/shared_test_classes/src/base/layer_test_utils.cpp @@ -1,4 +1,4 @@ -// Copyright (C) 2019-2021 Intel Corporation +// Copyright (C) 2019-2021 Intel Corporation // SPDX-License-Identifier: Apache-2.0 // #include @@ -211,7 +211,9 @@ void LayerTestsCommon::Serialize() { bool success; std::string message; std::tie(success, message) = - compare_functions(result.getFunction(), function); + compare_functions(result.getFunction(), function, false, false, false, + true, // precision + true); // attributes EXPECT_TRUE(success) << message; diff --git a/inference-engine/tests/functional/shared_test_classes/src/base/low_precision_transformations/layer_transformation.cpp b/inference-engine/tests/functional/shared_test_classes/src/base/low_precision_transformations/layer_transformation.cpp index 932c7085913b44..436b5c079a888f 100644 --- a/inference-engine/tests/functional/shared_test_classes/src/base/low_precision_transformations/layer_transformation.cpp +++ b/inference-engine/tests/functional/shared_test_classes/src/base/low_precision_transformations/layer_transformation.cpp @@ -1,4 +1,4 @@ -// Copyright (C) 2020 Intel Corporation +// Copyright (C) 2020-2021 Intel Corporation // SPDX-License-Identifier: Apache-2.0 // @@ -8,6 +8,7 @@ #include #include +#include "cpp_interfaces/interface/ie_internal_plugin_config.hpp" #include "functional_test_utils/blob_utils.hpp" #include "ngraph_functions/pass/convert_prc.hpp" @@ -19,6 +20,16 @@ using namespace ngraph; namespace LayerTestsUtils { +ngraph::pass::low_precision::LayerTransformation::Params LayerTransformationParamsNGraphFactory::createParamsU8I8AndI8() { + return ngraph::pass::low_precision::LayerTransformation::Params( + true, + ngraph::pass::low_precision::LayerTransformation::QuantizedTensorAlignment::None, + ngraph::pass::low_precision::LayerTransformation::QuantizedTensorAlignment::None, + true, + { ngraph::element::u8, ngraph::element::i8 }, + { ngraph::element::i8 }); +} + ngraph::pass::low_precision::LayerTransformation::Params LayerTransformationParamsNGraphFactory::createParamsU8I8() { return ngraph::pass::low_precision::LayerTransformation::Params( true, @@ -41,6 +52,8 @@ ngraph::pass::low_precision::LayerTransformation::Params LayerTransformationPara LayerTransformation::LayerTransformation() { threshold = 0.05; + auto& configuration = GetConfiguration(); + configuration[PluginConfigInternalParams::KEY_LP_TRANSFORMS_MODE] = PluginConfigParams::YES; } InferenceEngine::Blob::Ptr LayerTransformation::GenerateInput( diff --git a/inference-engine/tests/functional/shared_test_classes/src/single_layer/ctc_greedy_decoder.cpp b/inference-engine/tests/functional/shared_test_classes/src/single_layer/ctc_greedy_decoder.cpp index 7f073ea11ea566..1de0313680c1bf 100644 --- a/inference-engine/tests/functional/shared_test_classes/src/single_layer/ctc_greedy_decoder.cpp +++ b/inference-engine/tests/functional/shared_test_classes/src/single_layer/ctc_greedy_decoder.cpp @@ -1,13 +1,13 @@ -// Copyright (C) 2020 Intel Corporation -// +// Copyright (C) 2020-2021 Intel Corporation // SPDX-License-Identifier: Apache-2.0 // #include "shared_test_classes/single_layer/ctc_greedy_decoder.hpp" +#include "ngraph_functions/builders.hpp" namespace LayerTestsDefinitions { std::string CTCGreedyDecoderLayerTest::getTestCaseName( - const testing::TestParamInfo& obj) { + const testing::TestParamInfo& obj) { InferenceEngine::Precision netPrecision; InferenceEngine::Precision inPrc, outPrc; InferenceEngine::Layout inLayout, outLayout; @@ -36,19 +36,22 @@ std::string CTCGreedyDecoderLayerTest::getTestCaseName( } void CTCGreedyDecoderLayerTest::SetUp() { - auto netPrecision = InferenceEngine::Precision::UNSPECIFIED; + InferenceEngine::Precision netPrecision; + InferenceEngine::Precision inPrc, outPrc; + InferenceEngine::Layout inLayout, outLayout; + InferenceEngine::SizeVector inputShapes; + bool mergeRepeated; std::tie(netPrecision, inPrc, outPrc, inLayout, outLayout, inputShapes, mergeRepeated, targetDevice) = GetParam(); - sequenceLengths = { inputShapes.at(0), inputShapes.at(1) }; + auto ngPrc = FuncTestUtils::PrecisionUtils::convertIE2nGraphPrc(netPrecision); - auto paramsIn = ngraph::builder::makeParams(ngPrc, { inputShapes, sequenceLengths }); - auto paramsOut = ngraph::helpers::convert2OutputVector( + auto paramsIn = ngraph::builder::makeParams(ngPrc, { inputShapes }); + auto paramOuts = ngraph::helpers::convert2OutputVector( ngraph::helpers::castOps2Nodes(paramsIn)); - auto ctcGreedyDecoder = std::make_shared( - paramsOut[0], - paramsOut[1], - mergeRepeated); + + auto ctcGreedyDecoder = std::dynamic_pointer_cast( + ngraph::builder::makeCTCGreedyDecoder(paramOuts[0], mergeRepeated)); ngraph::ResultVector results{ std::make_shared(ctcGreedyDecoder) }; - function = std::make_shared(results, paramsIn, "Grn"); + function = std::make_shared(results, paramsIn, "CTCGreedyDecoder"); } } // namespace LayerTestsDefinitions diff --git a/inference-engine/tests/functional/shared_test_classes/src/single_layer/ctc_greedy_decoder_seq_len.cpp b/inference-engine/tests/functional/shared_test_classes/src/single_layer/ctc_greedy_decoder_seq_len.cpp new file mode 100644 index 00000000000000..0d480f1642af8b --- /dev/null +++ b/inference-engine/tests/functional/shared_test_classes/src/single_layer/ctc_greedy_decoder_seq_len.cpp @@ -0,0 +1,66 @@ +// Copyright (C) 2021 Intel Corporation +// SPDX-License-Identifier: Apache-2.0 +// + +#include +#include +#include + +#include "shared_test_classes/single_layer/ctc_greedy_decoder_seq_len.hpp" +#include "ngraph_functions/builders.hpp" + +namespace LayerTestsDefinitions { +std::string CTCGreedyDecoderSeqLenLayerTest::getTestCaseName( + const testing::TestParamInfo& obj) { + InferenceEngine::SizeVector inputShapes; + InferenceEngine::Precision dataPrecision, indicesPrecision; + int blankIndex; + bool mergeRepeated; + std::string targetDevice; + std::tie(inputShapes, + dataPrecision, + indicesPrecision, + blankIndex, + mergeRepeated, + targetDevice) = obj.param; + + std::ostringstream result; + + result << "IS=" << CommonTestUtils::vec2str(inputShapes) << '_'; + result << "dataPRC=" << dataPrecision.name() << '_'; + result << "idxPRC=" << indicesPrecision.name() << '_'; + result << "BlankIdx=" << blankIndex << '_'; + result << "mergeRepeated=" << std::boolalpha << mergeRepeated << '_'; + result << "trgDev=" << targetDevice; + + return result.str(); +} + +void CTCGreedyDecoderSeqLenLayerTest::SetUp() { + InferenceEngine::SizeVector inputShapes; + InferenceEngine::Precision dataPrecision, indicesPrecision; + int blankIndex; + bool mergeRepeated; + std::tie(inputShapes, + dataPrecision, + indicesPrecision, + blankIndex, + mergeRepeated, + targetDevice) = GetParam(); + + auto ngDataPrc = FuncTestUtils::PrecisionUtils::convertIE2nGraphPrc(dataPrecision); + auto ngIdxPrc = FuncTestUtils::PrecisionUtils::convertIE2nGraphPrc(indicesPrecision); + auto paramsIn = ngraph::builder::makeParams(ngDataPrc, { inputShapes }); + auto paramOuts = ngraph::helpers::convert2OutputVector( + ngraph::helpers::castOps2Nodes(paramsIn)); + + auto ctcGreedyDecoderSeqLen = std::dynamic_pointer_cast( + ngraph::builder::makeCTCGreedyDecoderSeqLen(paramOuts[0], blankIndex, mergeRepeated, ngIdxPrc)); + + ngraph::ResultVector results; + for (int i = 0; i < ctcGreedyDecoderSeqLen->get_output_size(); i++) { + results.push_back(std::make_shared(ctcGreedyDecoderSeqLen->output(i))); + } + function = std::make_shared(results, paramsIn, "CTCGreedyDecoderSeqLen"); +} +} // namespace LayerTestsDefinitions diff --git a/inference-engine/tests/functional/shared_test_classes/src/single_layer/ctc_loss.cpp b/inference-engine/tests/functional/shared_test_classes/src/single_layer/ctc_loss.cpp index 4e1047f0232f4b..ab9f2c08617ef7 100644 --- a/inference-engine/tests/functional/shared_test_classes/src/single_layer/ctc_loss.cpp +++ b/inference-engine/tests/functional/shared_test_classes/src/single_layer/ctc_loss.cpp @@ -53,10 +53,10 @@ void CTCLossLayerTest::SetUp() { auto params = ngraph::builder::makeParams(ngFpPrc, {logitsShapes}); auto paramOuts = ngraph::helpers::convert2OutputVector( ngraph::helpers::castOps2Nodes(params)); - auto conv = std::dynamic_pointer_cast( + auto ctcLoss = std::dynamic_pointer_cast( ngraph::builder::makeCTCLoss(paramOuts[0], logitsLength, labels, labelsLength, blankIndex, ngFpPrc, ngIntPrc, preprocessCollapseRepeated, ctcMergeRepeated, unique)); - ngraph::ResultVector results{std::make_shared(conv)}; + ngraph::ResultVector results{std::make_shared(ctcLoss)}; function = std::make_shared(results, params, "CTCLoss"); } } // namespace LayerTestsDefinitions diff --git a/inference-engine/tests/functional/shared_test_classes/src/single_layer/mvn.cpp b/inference-engine/tests/functional/shared_test_classes/src/single_layer/mvn.cpp index ea42aec04ef0d3..0b27aaed76b756 100644 --- a/inference-engine/tests/functional/shared_test_classes/src/single_layer/mvn.cpp +++ b/inference-engine/tests/functional/shared_test_classes/src/single_layer/mvn.cpp @@ -1,12 +1,13 @@ -// Copyright (C) 2020 Intel Corporation +// Copyright (C) 2020-2021 Intel Corporation // SPDX-License-Identifier: Apache-2.0 // #include "shared_test_classes/single_layer/mvn.hpp" +#include "ngraph_functions/builders.hpp" namespace LayerTestsDefinitions { -std::string MvnLayerTest::getTestCaseName(testing::TestParamInfo obj) { +std::string MvnLayerTest::getTestCaseName(const testing::TestParamInfo& obj) { InferenceEngine::SizeVector inputShapes; InferenceEngine::Precision inputPrecision; bool acrossChannels, normalizeVariance; @@ -36,4 +37,47 @@ void MvnLayerTest::SetUp() { ngraph::ResultVector results{std::make_shared(mvn)}; function = std::make_shared(results, param, "mvn"); } -} // namespace LayerTestsDefinitions \ No newline at end of file + + +std::string Mvn6LayerTest::getTestCaseName(const testing::TestParamInfo& obj) { + InferenceEngine::SizeVector inputShapes; + InferenceEngine::Precision dataPrecision, axesPrecision; + std::vector axes; + bool normalizeVariance; + float eps; + std::string epsMode; + std::string targetDevice; + std::tie(inputShapes, dataPrecision, axesPrecision, axes, normalizeVariance, eps, epsMode, targetDevice) = obj.param; + std::ostringstream result; + result << "IS=" << CommonTestUtils::vec2str(inputShapes) << "_"; + result << "DataPrc=" << dataPrecision.name() << "_"; + result << "AxPrc=" << axesPrecision.name() << "_"; + result << "Ax=" << CommonTestUtils::vec2str(axes) << "_"; + result << "NormVariance=" << (normalizeVariance ? "TRUE" : "FALSE") << "_"; + result << "Eps=" << eps << "_"; + result << "EM=" << epsMode << "_"; + result << "TargetDevice=" << targetDevice; + return result.str(); +} + +void Mvn6LayerTest::SetUp() { + InferenceEngine::SizeVector inputShapes; + InferenceEngine::Precision dataPrecision, axesPrecision; + std::vector axes; + bool normalizeVariance; + float eps; + std::string epsMode; + std::tie(inputShapes, dataPrecision, axesPrecision, axes, normalizeVariance, eps, epsMode, targetDevice) = this->GetParam(); + + auto dataType = FuncTestUtils::PrecisionUtils::convertIE2nGraphPrc(dataPrecision); + auto axesType = FuncTestUtils::PrecisionUtils::convertIE2nGraphPrc(axesPrecision); + + auto param = ngraph::builder::makeParams(dataType, {inputShapes}); + auto paramOuts = ngraph::helpers::convert2OutputVector(ngraph::helpers::castOps2Nodes(param)); + auto axesNode = ngraph::builder::makeConstant(axesType, ngraph::Shape{axes.size()}, axes); + auto mvn = ngraph::builder::makeMVN6(paramOuts[0], axesNode, normalizeVariance, eps, epsMode); + ngraph::ResultVector results{std::make_shared(mvn)}; + function = std::make_shared(results, param, "MVN6"); +} + +} // namespace LayerTestsDefinitions diff --git a/inference-engine/tests/functional/shared_test_classes/src/subgraph/tensor_names.cpp b/inference-engine/tests/functional/shared_test_classes/src/subgraph/tensor_names.cpp new file mode 100644 index 00000000000000..13f155a6d5c891 --- /dev/null +++ b/inference-engine/tests/functional/shared_test_classes/src/subgraph/tensor_names.cpp @@ -0,0 +1,35 @@ +// Copyright (C) 2021 Intel Corporation +// SPDX-License-Identifier: Apache-2.0 +// + +#include "shared_test_classes/subgraph/tensor_names.hpp" + +namespace SubgraphTestsDefinitions { + +std::string TensorNamesTest::getTestCaseName(testing::TestParamInfo obj) { + std::string targetDevice; + std::tie(targetDevice) = obj.param; + std::ostringstream result; + result << "TargetDevice=" << targetDevice; + return result.str(); +} + +void TensorNamesTest::SetUp() { + std::tie(targetDevice) = this->GetParam(); + + auto parameter = std::make_shared(ngraph::element::Type_t::f32, ngraph::Shape{1, 3, 10, 10}); + parameter->set_friendly_name("parameter"); + parameter->get_output_tensor(0).set_names({"input"}); + auto relu_prev = std::make_shared(parameter); + relu_prev->set_friendly_name("relu_prev"); + relu_prev->get_output_tensor(0).set_names({"relu,prev_t", "identity_prev_t"}); + auto relu = std::make_shared(relu_prev); + relu->set_friendly_name("relu"); + relu->get_output_tensor(0).set_names({"relu,t", "identity"}); + const ngraph::ResultVector results{std::make_shared(relu)}; + results[0]->set_friendly_name("out"); + ngraph::ParameterVector params{parameter}; + function = std::make_shared(results, params, "TensorNames"); +} + +} // namespace SubgraphTestsDefinitions diff --git a/inference-engine/tests/ie_test_utils/common_test_utils/CMakeLists.txt b/inference-engine/tests/ie_test_utils/common_test_utils/CMakeLists.txt index 4b19b4190ed256..6145bc606b47a4 100644 --- a/inference-engine/tests/ie_test_utils/common_test_utils/CMakeLists.txt +++ b/inference-engine/tests/ie_test_utils/common_test_utils/CMakeLists.txt @@ -30,11 +30,11 @@ function(add_gtest_libraries) if(TARGET "${target_name}") get_target_property(_target_cxx_flags ${target_name} COMPILE_FLAGS) if(_target_cxx_flags) - if(CMAKE_CXX_FLAGS_DEBUG MATCHES ".+/Z7.+") - string(REPLACE "-Zi" " " _target_cxx_flags ${_target_cxx_flags}) - message(STATUS "Removing -Zi flag from target " ${target_name}) - set_target_properties(${target_name} PROPERTIES COMPILE_FLAGS "${_target_cxx_flags}") - endif() + if(CMAKE_CXX_FLAGS_DEBUG MATCHES ".+/Z7.+") + string(REPLACE "-Zi" " " _target_cxx_flags ${_target_cxx_flags}) + message(STATUS "Removing -Zi flag from target " ${target_name}) + set_target_properties(${target_name} PROPERTIES COMPILE_FLAGS "${_target_cxx_flags}") + endif() endif() endif() endforeach() diff --git a/inference-engine/tests/ie_test_utils/common_test_utils/ngraph_test_utils.cpp b/inference-engine/tests/ie_test_utils/common_test_utils/ngraph_test_utils.cpp index fa05b365e7add5..a3780a67e87267 100644 --- a/inference-engine/tests/ie_test_utils/common_test_utils/ngraph_test_utils.cpp +++ b/inference-engine/tests/ie_test_utils/common_test_utils/ngraph_test_utils.cpp @@ -5,14 +5,21 @@ #include "ngraph_test_utils.hpp" #include +#include +#include #include #include +#include #include +#include +#include +#include #include #include #include #include +#include #include namespace { @@ -70,7 +77,7 @@ std::string to_str(const T& v) { return std::to_string(v); } -std::pair error(std::string s) { +FunctionsComparator::Result error(std::string s) { return {false, std::move(s)}; } @@ -83,22 +90,583 @@ std::string name(const Node& n) { return n->get_friendly_name(); } +namespace attr_comparison { + +using AttrName = std::string; + +class Result { +public: + explicit Result(std::string m = {}) : m_message(std::move(m)) {} + + const std::string& message() const { + return m_message; + } + + bool has_error() const { + return !m_message.empty(); + } + + Result& operator+=(const std::string& msg) { + m_message.append(m_break_line_no, '\n').append(msg); + m_break_line_no = 1; + return *this; + } + +private: + std::string m_message; + int m_break_line_no{0}; +}; + +using SubGraphOpInputDescription = + std::vector>; + +using SubGraphOpOutputDescription = + std::vector>; + +using SpecialBodyPorts = ngraph::opset6::Loop::SpecialBodyPorts; + +namespace storage { + +class MemoryChunk { +public: + using Data = std::vector; + MemoryChunk(Data data) : m_data{std::move(data)} {} + + Data::const_pointer data() const { + return m_data.data(); + } + + size_t size() const { + return m_data.size(); + } + +private: + Data m_data; +}; + +template +class AttributeStorage { +public: + bool insert_value(AttrName name, AttrValue value) { + return m_attributes.insert({std::move(name), std::move(value)}).second; + } + + const AttrValue* get_value(const AttrName& name) const { + const auto found = m_attributes.find(name); + if (found != end(m_attributes)) { + return std::addressof(found->second); + } + return {}; + } + + std::size_t get_attributes_number() const { + return m_attributes.size(); + } + +private: + std::map m_attributes; +}; + +class Storage : private AttributeStorage, + private AttributeStorage, + private AttributeStorage, + private AttributeStorage, + private AttributeStorage, + private AttributeStorage, + private AttributeStorage, + private AttributeStorage, + private AttributeStorage, + private AttributeStorage, + private AttributeStorage, + private AttributeStorage, + private AttributeStorage, + private AttributeStorage>, + private AttributeStorage>, + private AttributeStorage>, + private AttributeStorage>, + private AttributeStorage>, + private AttributeStorage>, + private AttributeStorage>, + private AttributeStorage>, + private AttributeStorage>, + private AttributeStorage>, + private AttributeStorage>, + private AttributeStorage, + private AttributeStorage, + private AttributeStorage { +public: + template + const AttributeStorage& storage() const { + return *static_cast*>(this); + } + template + AttributeStorage& storage() { + return *static_cast*>(this); + } + + size_t stored_attributes_number() const { + return storage().get_attributes_number() + + storage().get_attributes_number() + + storage().get_attributes_number() + + storage().get_attributes_number() + + storage().get_attributes_number() + + storage().get_attributes_number() + + storage().get_attributes_number() + + storage().get_attributes_number() + + storage().get_attributes_number() + + storage().get_attributes_number() + + storage().get_attributes_number() + + storage().get_attributes_number() + + storage().get_attributes_number() + + storage>().get_attributes_number() + + storage>().get_attributes_number() + + storage>().get_attributes_number() + + storage>().get_attributes_number() + + storage>().get_attributes_number() + + storage>().get_attributes_number() + + storage>().get_attributes_number() + + storage>().get_attributes_number() + + storage>().get_attributes_number() + + storage>().get_attributes_number() + + storage>().get_attributes_number() + + storage().get_attributes_number() + + storage().get_attributes_number() + + storage().get_attributes_number(); + } +}; + +} // namespace storage + +class ReadAndStoreAttributes : public ngraph::AttributeVisitor, protected storage::Storage { +public: + void on_adapter(const std::string& name, ngraph::ValueAccessor& adapter) override { + if (auto inputs = + ngraph::as_type>(&adapter)) { + insert(name, inputs->get()); + } else if ( + auto outputs = + ngraph::as_type>(&adapter)) { + insert(name, outputs->get()); + } else if ( + auto ports = ngraph::as_type>(&adapter)) { + insert(name, ports->get()); + } else { + m_read_result += "store attr [ ERR ]: " + name + + " [drop `void` comparison which is '" + adapter.get_type_info().name + + "']"; + } + } + + void on_adapter(const std::string& name, ngraph::ValueAccessor& adapter) override { + const auto beg = static_cast(adapter.get_ptr()); + const auto end = beg + adapter.size(); + insert(name, storage::MemoryChunk{storage::MemoryChunk::Data(beg, end)}); + } + +#define ON_ADAPTER(TYPE) \ + void on_adapter(const std::string& name, ngraph::ValueAccessor& adapter) override { \ + insert(name, adapter.get()); \ + } + + ON_ADAPTER(bool) + ON_ADAPTER(std::string) + ON_ADAPTER(int8_t) + ON_ADAPTER(int16_t) + ON_ADAPTER(int32_t) + ON_ADAPTER(int64_t) + ON_ADAPTER(uint8_t) + ON_ADAPTER(uint16_t) + ON_ADAPTER(uint32_t) + ON_ADAPTER(uint64_t) + ON_ADAPTER(float) + ON_ADAPTER(double) + ON_ADAPTER(std::vector) + ON_ADAPTER(std::vector) + ON_ADAPTER(std::vector) + ON_ADAPTER(std::vector) + ON_ADAPTER(std::vector) + ON_ADAPTER(std::vector) + ON_ADAPTER(std::vector) + ON_ADAPTER(std::vector) + ON_ADAPTER(std::vector) + ON_ADAPTER(std::vector) + ON_ADAPTER(std::vector) + +#undef ON_ADAPTER + + void on_adapter( + const std::string&, ngraph::ValueAccessor>&) override { + // handled by `compare_functions` drop it here + } + + template + const AttrValue* get(const AttrName& name) const { + return storage().get_value(name); + } + + template + bool insert(AttrName name, AttrValue value) { + return storage().insert_value(std::move(name), std::move(value)); + } + + size_t attributes_number() const { + return stored_attributes_number(); + } + + const Result read_result() const { + return m_read_result; + } + +private: + Result m_read_result; +}; + +namespace equal { + +template +struct Equal { + static bool equal_value(const Value& lhs, const Value& rhs) { + return lhs == rhs; + } +}; + +template <> +struct Equal { + static bool equal_value(float lhs, float rhs) { + return std::abs(lhs - rhs) < 1e-5; + } +}; + +template <> +struct Equal { + static bool equal_value(double lhs, double rhs) { + return std::abs(lhs - rhs) < 1e-5; + } +}; + +template <> +struct Equal> { + static bool equal_value(const std::vector& lhs, const std::vector& rhs) { + return lhs.size() == rhs.size() && + std::equal(begin(lhs), end(lhs), begin(rhs), Equal::equal_value); + } +}; + +template <> +struct Equal> { + static bool equal_value(const std::vector& lhs, const std::vector& rhs) { + return lhs.size() == rhs.size() && + std::equal(begin(lhs), end(lhs), begin(rhs), Equal::equal_value); + } +}; + +template <> +struct Equal { + static bool equal_value( + SubGraphOpInputDescription::const_reference lhs, + SubGraphOpInputDescription::const_reference rhs) { + const auto& lhs_type_info = lhs->get_type_info(); + const auto& rhs_type_info = rhs->get_type_info(); + if (lhs_type_info != rhs_type_info) { + return false; + } + using SubGraphOp = ngraph::op::util::SubGraphOp; + if (lhs_type_info == SubGraphOp::SliceInputDescription::type_info) { + const auto& l_input = static_cast(*lhs); + const auto& r_input = static_cast(*rhs); + return l_input.m_start == r_input.m_start && l_input.m_stride == r_input.m_stride && + l_input.m_part_size == r_input.m_part_size && l_input.m_end == r_input.m_end && + l_input.m_axis == r_input.m_axis; + } else if (lhs_type_info == SubGraphOp::MergedInputDescription::type_info) { + return true; + } else if (lhs_type_info == SubGraphOp::InvariantInputDescription::type_info) { + return true; + } + return false; + } +}; + +template <> +struct Equal { + static bool equal_value( + const SubGraphOpInputDescription& lhs, const SubGraphOpInputDescription& rhs) { + if (lhs.size() != rhs.size()) { + return false; + } + return std::is_permutation( + begin(lhs), end(lhs), begin(rhs), + Equal::equal_value); + } +}; + +template <> +struct Equal { + static bool equal_value( + SubGraphOpOutputDescription::const_reference lhs, + SubGraphOpOutputDescription::const_reference rhs) { + const auto& lhs_type_info = lhs->get_type_info(); + const auto& rhs_type_info = rhs->get_type_info(); + if (lhs_type_info != rhs_type_info) { + return false; + } + using SubGraphOp = ngraph::op::util::SubGraphOp; + if (lhs_type_info == SubGraphOp::ConcatOutputDescription::type_info) { + const auto& l_output = static_cast(*lhs); + const auto& r_output = static_cast(*rhs); + return l_output.m_start == r_output.m_start && l_output.m_stride == r_output.m_stride && + l_output.m_part_size == r_output.m_part_size && + l_output.m_end == r_output.m_end && l_output.m_axis == r_output.m_axis; + } else if (lhs_type_info == SubGraphOp::BodyOutputDescription::type_info) { + const auto& l_output = static_cast(*lhs); + const auto& r_output = static_cast(*rhs); + return l_output.m_iteration == r_output.m_iteration; + } + return false; + } +}; + +template <> +struct Equal { + static bool equal_value( + const SubGraphOpOutputDescription& lhs, const SubGraphOpOutputDescription& rhs) { + if (lhs.size() != rhs.size()) { + return false; + } + return std::is_permutation( + begin(lhs), end(lhs), begin(rhs), + Equal::equal_value); + } +}; + +template <> +struct Equal { + static bool equal_value(const SpecialBodyPorts& lhs, const SpecialBodyPorts& rhs) { + return lhs.current_iteration_input_idx == rhs.current_iteration_input_idx; + } +}; + +} // namespace equal + +namespace str { +template +struct Void_t { + using type = void; +}; + +template +struct Get { + static std::string value(const T&) { + return std::string("[Ups can't convert this to value: ") + typeid(T).name() + "]"; + } +}; + +template +struct Get()))>::type> { + static std::string value(const T& v) { + return "[" + std::to_string(v) + "]"; + } +}; + +template <> +struct Get { + static std::string value(const std::string& v) { + return "[" + v + "]"; + } +}; + +template +struct Get< + T, + typename Void_t())), decltype(end(std::declval()))>::type> { + template + static std::string join(const Container& c, const char* glue = ", ") { + std::stringstream oss; + const char* s = ""; + for (const auto& v : c) { + oss << s << v; + s = glue; + } + return oss.str(); + } + + static std::string value(const T& v) { + return "[" + join(v) + "]"; + } +}; + +} // namespace str + +class ReadAndCompareAttributes : public ngraph::AttributeVisitor { +public: + ReadAndCompareAttributes(const ReadAndStoreAttributes& ref) + : m_attr_ref(ref), m_cmp_result{ref.read_result()} {} + + void on_adapter(const std::string& name, ngraph::ValueAccessor& adapter) override { + if (should_return()) { + return; + } + m_visited_attributes.insert(name); + if (auto inputs = + ngraph::as_type>(&adapter)) { + verify(name, inputs->get()); + } else if ( + auto outputs = + ngraph::as_type>(&adapter)) { + verify(name, outputs->get()); + } else if ( + auto ports = ngraph::as_type>(&adapter)) { + verify(name, ports->get()); + } else { + m_cmp_result += "compare attr [ ERR ]: " + name + + " [drop `void` comparison which is '" + adapter.get_type_info().name + + "']"; + } + } + + void on_adapter(const std::string& name, ngraph::ValueAccessor& adapter) override { + if (should_return()) { + return; + } + m_visited_attributes.insert(name); + const auto ref_value = m_attr_ref.get(name); + if (!ref_value) { + m_cmp_result += "missing attribute name: '" + name + "'"; + return; + } + + if (adapter.size() != ref_value->size() || + std::memcmp(ref_value->data(), adapter.get_ptr(), ref_value->size()) != 0) { + m_cmp_result += "mismatch in value: '" + name + "' : look in to the mem buffer"; + return; + } + } + +#define ON_ADAPTER(TYPE) \ + void on_adapter(const std::string& name, ngraph::ValueAccessor& adapter) override { \ + verify(name, adapter.get()); \ + } + + ON_ADAPTER(bool) + ON_ADAPTER(std::string) + ON_ADAPTER(int8_t) + ON_ADAPTER(int16_t) + ON_ADAPTER(int32_t) + ON_ADAPTER(int64_t) + ON_ADAPTER(uint8_t) + ON_ADAPTER(uint16_t) + ON_ADAPTER(uint32_t) + ON_ADAPTER(uint64_t) + ON_ADAPTER(float) + ON_ADAPTER(double) + ON_ADAPTER(std::vector) + ON_ADAPTER(std::vector) + ON_ADAPTER(std::vector) + ON_ADAPTER(std::vector) + ON_ADAPTER(std::vector) + ON_ADAPTER(std::vector) + ON_ADAPTER(std::vector) + ON_ADAPTER(std::vector) + ON_ADAPTER(std::vector) + ON_ADAPTER(std::vector) + ON_ADAPTER(std::vector) + +#undef ON_ADAPTER + + void on_adapter( + const std::string&, ngraph::ValueAccessor>&) override { + // handled by `compare_functions` drop it here + } + + bool all_attr_was_compared() const { + return m_visited_attributes.size() == m_attr_ref.attributes_number(); + } + + size_t compared_attr_number() const { + return m_visited_attributes.size(); + } + + const Result& cmp_result() const { + return m_cmp_result; + } + +private: + bool should_return() const { + return m_fast_exit && m_cmp_result.has_error(); + } + template + void verify(const std::string& name, const AttrValue& attr_value) { + if (should_return()) { + return; + } + m_visited_attributes.insert(name); + const auto ref_value = m_attr_ref.get(name); + if (!ref_value) { + m_cmp_result += "missing attribute name: '" + name + "'"; + return; + } + + if (!equal::Equal::equal_value(*ref_value, attr_value)) { + m_cmp_result += "mismatch in value: '" + name + + "' : " + str::Get::value(*ref_value) + " vs " + + str::Get::value(attr_value); + } + } + + const ReadAndStoreAttributes& m_attr_ref; + Result m_cmp_result; + std::set m_visited_attributes; + bool m_fast_exit{true}; +}; + +} // namespace attr_comparison + +class CompareNodesAttributes { +public: + CompareNodesAttributes() : m_compare_attr(m_store_attr) {} + + attr_comparison::ReadAndStoreAttributes& get_ref_reder() { + return m_store_attr; + } + + attr_comparison::ReadAndCompareAttributes& get_cmp_reader() { + return m_compare_attr; + } + + bool equal() const { + return m_compare_attr.all_attr_was_compared() && !m_compare_attr.cmp_result().has_error(); + } + + friend std::string to_string(const CompareNodesAttributes& c) { + const auto& result = c.m_compare_attr.cmp_result(); + if (result.has_error()) { + return result.message(); + } + if (!c.m_compare_attr.all_attr_was_compared()) { + return "not all of attr was compared: " + + std::to_string(c.m_compare_attr.compared_attr_number()) + " vs " + + std::to_string(c.m_store_attr.attributes_number()); + } + return "looks good [compared " + std::to_string(c.m_compare_attr.compared_attr_number()) + + " attributes]"; + } + +private: + attr_comparison::ReadAndStoreAttributes m_store_attr; + attr_comparison::ReadAndCompareAttributes m_compare_attr; +}; + } // namespace -std::pair compare_functions( +FunctionsComparator::Result FunctionsComparator::compare( const std::shared_ptr& f1, - const std::shared_ptr& f2, - const bool compareConstValues, - const bool compareNames, - const bool compareRuntimeKeys, - const bool comparePrecisions) { + const std::shared_ptr& f2) const { /* * This function compares two nGraph functions and requires them to have exactly one output * + Check nodes types * + Check number of inputs * + Check shapes * + Check parent ports - * - Do not check nodes attributes (requires visitor mechanism to be completed) + * + Check node attributes by Visitor API */ auto f1_results = f1->get_results(); @@ -109,24 +677,26 @@ std::pair compare_functions( if (f1_results.size() != f2_results.size()) { return error( - "Number of results is different: " + to_str(f1_results.size()) + " and " + to_str(f2_results.size())); + "Number of results is different: " + to_str(f1_results.size()) + " and " + + to_str(f2_results.size())); } const auto& f1_sinks = f1->get_sinks(); const auto& f2_sinks = f2->get_sinks(); if (f1_sinks.size() != f2_sinks.size()) { return error( - "Number of sinks is different: " + to_str(f1_sinks.size()) + " and " + to_str(f2_sinks.size())); + "Number of sinks is different: " + to_str(f1_sinks.size()) + " and " + + to_str(f2_sinks.size())); } std::ostringstream err_log; using ComparedNodes = std::pair; std::queue q; - std::unordered_set used; + std::unordered_set used; for (size_t i = 0; i < f1_results.size(); ++i) { - if (compareNames) { + if (should_compare(NAMES)) { if (name(f1_results[i]->get_input_node_shared_ptr(0)) != name(f2_results[i]->get_input_node_shared_ptr(0))) { return error( @@ -134,7 +704,7 @@ std::pair compare_functions( " and " + name(f2_results[i]->get_input_node_shared_ptr(0))); } } - q.push({ f1_results[i].get(), f2_results[i].get() }); + q.push({f1_results[i].get(), f2_results[i].get()}); used.insert(f1_results[i].get()); } @@ -150,14 +720,13 @@ std::pair compare_functions( return error(typeInfoToStr(type_info1) + " != " + typeInfoToStr(type_info2)); } - auto subgraph1 = dynamic_cast(node1); - auto subgraph2 = dynamic_cast(node2); + auto subgraph1 = dynamic_cast(node1); + auto subgraph2 = dynamic_cast(node2); if (subgraph1 && subgraph2) { - auto res = compare_functions(subgraph1->get_function(), subgraph2->get_function(), - compareConstValues, compareNames, compareRuntimeKeys, comparePrecisions); - if (!res.first) { - return res; + auto result = compare(subgraph1->get_function(), subgraph2->get_function()); + if (!result.valid) { + return result; } } @@ -183,20 +752,20 @@ std::pair compare_functions( } for (int i = 0; i < node1->inputs().size(); ++i) { - if (compareConstValues) { + if (should_compare(CONST_VALUES)) { using Constant = ngraph::opset1::Constant; auto const1 = ngraph::as_type_ptr(node1->get_input_node_shared_ptr(i)); auto const2 = ngraph::as_type_ptr(node2->get_input_node_shared_ptr(i)); const auto equal = [](std::shared_ptr c1, std::shared_ptr c2) { - const auto &c1v = c1->cast_vector(); - const auto &c2v = c2->cast_vector(); - - return c1v.size() == c2v.size() && - std::equal(begin(c1v), end(c1v), begin(c2v), - [](const double &s1, const double & s2) { - return std::abs(s1 - s2) < 0.001; - }); + const auto& c1v = c1->cast_vector(); + const auto& c2v = c2->cast_vector(); + + return c1v.size() == c2v.size() && std::equal( + begin(c1v), end(c1v), begin(c2v), + [](const double& s1, const double& s2) { + return std::abs(s1 - s2) < 0.001; + }); }; if (const1 && const2 && !equal(const1, const2)) { @@ -206,7 +775,7 @@ std::pair compare_functions( } } - if (comparePrecisions) { + if (should_compare(PRECISIONS)) { if (node1->input(i).get_element_type() != node2->input(i).get_element_type()) { err_log << "Different element type detected\n" << name(node1) << " Input(" << i << ") " @@ -235,7 +804,7 @@ std::pair compare_functions( << idx2 << std::endl; } - if (compareRuntimeKeys && !compare_rt_keys(node1, node2)) { + if (should_compare(RUNTIME_KEYS) && !compare_rt_keys(node1, node2)) { err_log << "Different runtime info detected\n" << name(node1) << " and " << name(node2) << " not equal runtime info." << std::endl; @@ -248,6 +817,27 @@ std::pair compare_functions( } for (int i = 0; i < node1->outputs().size(); ++i) { + const auto& tensor1 = node1->output(i).get_tensor(); + const auto& tensor2 = node2->output(i).get_tensor(); + + if (tensor1.get_names() != tensor2.get_names()) { + std::string names1 = ""; + for (const auto& name : tensor1.get_names()) { + if (!names1.empty()) + names1 += ", "; + names1 += name; + } + names1 = "\"" + names1 + "\""; + std::string names2 = ""; + for (const auto& name : tensor2.get_names()) { + if (!names2.empty()) + names2 += ", "; + names2 += name; + } + names2 = "\"" + names2 + "\""; + err_log << "Output tensors names " << names1 << " and " << names2 << " are different for nodes: " + << node1->get_friendly_name() << " and " << node2->get_friendly_name() << std::endl; + } if (!node1->output(i).get_partial_shape().same_scheme( node2->output(i).get_partial_shape())) { err_log << "Different shape detected\n" @@ -257,11 +847,20 @@ std::pair compare_functions( << std::endl; } } - } + if (should_compare(ATTRIBUTES)) { + CompareNodesAttributes compare_nodes; + node1->visit_attributes(compare_nodes.get_ref_reder()); + node2->visit_attributes(compare_nodes.get_cmp_reader()); + if (!compare_nodes.equal()) { + return error( + "Comparison of attributes failed for nodes " + name(node1) + ", " + + name(node2) + " [cmp status: " + to_string(compare_nodes) + "]"); + } + } + } return {err_log.str().empty(), err_log.str()}; } - void check_rt_info(const std::shared_ptr& f) { static const std::vector attrs_to_check{"Variant::RuntimeAttribute::FusedNames"}; diff --git a/inference-engine/tests/ie_test_utils/common_test_utils/ngraph_test_utils.hpp b/inference-engine/tests/ie_test_utils/common_test_utils/ngraph_test_utils.hpp index 0ccfd3e35a34e4..aae8de6a757a34 100644 --- a/inference-engine/tests/ie_test_utils/common_test_utils/ngraph_test_utils.hpp +++ b/inference-engine/tests/ie_test_utils/common_test_utils/ngraph_test_utils.hpp @@ -18,42 +18,77 @@ using TransformationTests = CommonTestUtils::TestsCommon; -std::pair compare_functions( +class FunctionsComparator { +public: + enum CmpValues { + NONE = 0, + CONST_VALUES = 1 << 0, + NAMES = 1 << 1, + RUNTIME_KEYS = 1 << 2, + PRECISIONS = 1 << 3, + ATTRIBUTES = 1 << 4, + }; + + struct Result { + bool valid; + std::string message; + }; + + static constexpr FunctionsComparator no_default() noexcept { + return FunctionsComparator{NONE}; + } + static constexpr FunctionsComparator with_default() noexcept { + return FunctionsComparator{PRECISIONS}; + } + FunctionsComparator& enable(CmpValues f) noexcept { + m_comparition_flags = static_cast(m_comparition_flags | f); + return *this; + } + constexpr bool should_compare(CmpValues f) const noexcept { + return m_comparition_flags & f; + } + Result compare( + const std::shared_ptr& f1, + const std::shared_ptr& f2) const; + + Result operator()( + const std::shared_ptr& f1, + const std::shared_ptr& f2) const { + return compare(f1, f2); + } + +private: + constexpr explicit FunctionsComparator(CmpValues f) noexcept : m_comparition_flags(f) {} + CmpValues m_comparition_flags; +}; + +/// +/// \deprecated +/// \brief compare_functions is obsolete function use FunctionComparator instead. +/// +inline std::pair compare_functions( const std::shared_ptr& f1, const std::shared_ptr& f2, const bool compareConstValues = false, const bool compareNames = false, const bool compareRuntimeKeys = false, - const bool comparePrecisions = true); + const bool comparePrecisions = true, + const bool compareAttributes = false) { + auto fc = FunctionsComparator::no_default(); + + using Cmp = FunctionsComparator::CmpValues; + if (compareConstValues) fc.enable(Cmp::CONST_VALUES); + if (compareNames) fc.enable(Cmp::NAMES); + if (compareRuntimeKeys) fc.enable(Cmp::RUNTIME_KEYS); + if (comparePrecisions) fc.enable(Cmp::PRECISIONS); + if (compareAttributes) fc.enable(Cmp::ATTRIBUTES); + + const auto r = fc(f1, f2); + return {r.valid, r.message}; +} void check_rt_info(const std::shared_ptr& f); -template -std::vector> get(const std::shared_ptr& f) { - std::vector> nodes; - - std::queue> q; - for (const auto result : f->get_results()) { - q.push(result); - } - - while (!q.empty()) { - auto node = q.front(); - q.pop(); - - std::shared_ptr op = ngraph::as_type_ptr(node); - if (op != nullptr) { - nodes.push_back(op); - } - - for (size_t i = 0; i < node->inputs().size(); ++i) { - q.push(node->get_input_node_shared_ptr(i)); - } - } - - return nodes; -} - namespace ngraph { namespace pass { class InjectionPass; diff --git a/inference-engine/tests/ie_test_utils/unit_test_utils/mocks/cpp_interfaces/impl/mock_async_infer_request_default.hpp b/inference-engine/tests/ie_test_utils/unit_test_utils/mocks/cpp_interfaces/impl/mock_async_infer_request_default.hpp index d67f292d82344c..9b20c38b6e479f 100644 --- a/inference-engine/tests/ie_test_utils/unit_test_utils/mocks/cpp_interfaces/impl/mock_async_infer_request_default.hpp +++ b/inference-engine/tests/ie_test_utils/unit_test_utils/mocks/cpp_interfaces/impl/mock_async_infer_request_default.hpp @@ -23,5 +23,5 @@ class MockAsyncInferRequestDefault : public AsyncInferRequestThreadSafeDefault { const ITaskExecutor::Ptr &callbackExecutor) : AsyncInferRequestThreadSafeDefault(request, taskExecutor, callbackExecutor) {} - MOCK_METHOD0(StartAsync_ThreadUnsafe, void()); + MOCK_METHOD0(CheckBlob, void()); }; diff --git a/inference-engine/tests/ie_test_utils/unit_test_utils/mocks/cpp_interfaces/impl/mock_async_infer_request_internal.hpp b/inference-engine/tests/ie_test_utils/unit_test_utils/mocks/cpp_interfaces/impl/mock_async_infer_request_internal.hpp index e0a339c1cd3c80..56e597095f4732 100644 --- a/inference-engine/tests/ie_test_utils/unit_test_utils/mocks/cpp_interfaces/impl/mock_async_infer_request_internal.hpp +++ b/inference-engine/tests/ie_test_utils/unit_test_utils/mocks/cpp_interfaces/impl/mock_async_infer_request_internal.hpp @@ -27,10 +27,10 @@ class MockAsyncInferRequestInternal : public AsyncInferRequestInternal { MOCK_METHOD1(GetUserData, void(void **)); MOCK_METHOD1(SetUserData, void(void *)); MOCK_METHOD0(InferImpl, void()); - MOCK_CONST_METHOD1(GetPerformanceCounts, void(std::map &)); + MOCK_CONST_METHOD0(GetPerformanceCounts, std::map()); MOCK_METHOD1(setNetworkInputs, void(InputsDataMap)); MOCK_METHOD1(setNetworkOutputs, void(OutputsDataMap)); - MOCK_METHOD2(GetBlob, void(const char *name, Blob::Ptr &)); + MOCK_METHOD1(GetBlob, Blob::Ptr(const std::string&)); MOCK_METHOD1(SetCompletionCallback, void(IInferRequest::CompletionCallback)); MOCK_METHOD0(Cancel, InferenceEngine::StatusCode()); MOCK_METHOD0(Cancel_ThreadUnsafe, InferenceEngine::StatusCode()); diff --git a/inference-engine/tests/ie_test_utils/unit_test_utils/mocks/cpp_interfaces/impl/mock_infer_request_internal.hpp b/inference-engine/tests/ie_test_utils/unit_test_utils/mocks/cpp_interfaces/impl/mock_infer_request_internal.hpp index 8c68e5630c3193..a98a26481b9df7 100644 --- a/inference-engine/tests/ie_test_utils/unit_test_utils/mocks/cpp_interfaces/impl/mock_infer_request_internal.hpp +++ b/inference-engine/tests/ie_test_utils/unit_test_utils/mocks/cpp_interfaces/impl/mock_infer_request_internal.hpp @@ -21,6 +21,7 @@ class MockInferRequestInternal : public InferRequestInternal { using InferRequestInternal::SetBlob; using InferRequestInternal::GetBlob; MOCK_METHOD0(InferImpl, void()); - MOCK_CONST_METHOD1(GetPerformanceCounts, void(std::map &)); + MOCK_CONST_METHOD0(GetPerformanceCounts, std::map()); + MOCK_METHOD0(checkBlobs, void()); MOCK_METHOD0(Cancel, InferenceEngine::StatusCode()); }; diff --git a/inference-engine/tests/ie_test_utils/unit_test_utils/mocks/cpp_interfaces/interface/mock_iasync_infer_request_internal.hpp b/inference-engine/tests/ie_test_utils/unit_test_utils/mocks/cpp_interfaces/interface/mock_iasync_infer_request_internal.hpp index 56d1bcd4611fef..342dd74f085bd4 100644 --- a/inference-engine/tests/ie_test_utils/unit_test_utils/mocks/cpp_interfaces/interface/mock_iasync_infer_request_internal.hpp +++ b/inference-engine/tests/ie_test_utils/unit_test_utils/mocks/cpp_interfaces/interface/mock_iasync_infer_request_internal.hpp @@ -20,11 +20,11 @@ class MockIAsyncInferRequestInternal : public InferenceEngine::IAsyncInferReques MOCK_METHOD1(GetUserData, void(void **)); MOCK_METHOD1(SetUserData, void(void *)); MOCK_METHOD0(Infer, void()); - MOCK_CONST_METHOD1(GetPerformanceCounts, void(std::map &)); - MOCK_METHOD2(SetBlob, void(const char *name, const InferenceEngine::Blob::Ptr &)); - MOCK_METHOD2(GetBlob, void(const char *name, InferenceEngine::Blob::Ptr &)); - MOCK_METHOD3(SetBlob, void(const char *name, const InferenceEngine::Blob::Ptr &, const InferenceEngine::PreProcessInfo&)); - MOCK_CONST_METHOD2(GetPreProcess, void(const char* name, const InferenceEngine::PreProcessInfo**)); + MOCK_CONST_METHOD0(GetPerformanceCounts, std::map()); + MOCK_METHOD2(SetBlob, void(const std::string&, const InferenceEngine::Blob::Ptr &)); + MOCK_METHOD1(GetBlob, InferenceEngine::Blob::Ptr(const std::string&)); + MOCK_METHOD3(SetBlob, void(const std::string&, const InferenceEngine::Blob::Ptr &, const InferenceEngine::PreProcessInfo&)); + MOCK_CONST_METHOD1(GetPreProcess, const InferenceEngine::PreProcessInfo&(const std::string&)); MOCK_METHOD1(SetCompletionCallback, void(InferenceEngine::IInferRequest::CompletionCallback)); MOCK_METHOD1(SetBatch, void(int)); MOCK_METHOD0(QueryState, std::vector()); diff --git a/inference-engine/tests/ngraph_helpers/lpt_ngraph_functions/include/lpt_ngraph_functions/common/builders.hpp b/inference-engine/tests/ngraph_helpers/lpt_ngraph_functions/include/lpt_ngraph_functions/common/builders.hpp index 56ad51682b77f2..70f76db27f4a69 100644 --- a/inference-engine/tests/ngraph_helpers/lpt_ngraph_functions/include/lpt_ngraph_functions/common/builders.hpp +++ b/inference-engine/tests/ngraph_helpers/lpt_ngraph_functions/include/lpt_ngraph_functions/common/builders.hpp @@ -1,4 +1,4 @@ -// Copyright (C) 2020 Intel Corporation +// Copyright (C) 2020-2021 Intel Corporation // SPDX-License-Identifier: Apache-2.0 // @@ -77,7 +77,8 @@ std::shared_ptr makeFakeQuantizeTypeRelaxed( std::shared_ptr makeFakeQuantize( const Output& input, const ngraph::element::Type precision, - const FakeQuantizeOnDataWithConstant& fqOnData); + const FakeQuantizeOnDataWithConstant& fqOnData, + const bool subgraphOnConstantPath = false); std::shared_ptr makeFakeQuantizeTypeRelaxed( const std::shared_ptr& input, diff --git a/inference-engine/tests/ngraph_helpers/lpt_ngraph_functions/include/lpt_ngraph_functions/common/constant.hpp b/inference-engine/tests/ngraph_helpers/lpt_ngraph_functions/include/lpt_ngraph_functions/common/constant.hpp index af3a0adc855614..b16f4f0b985634 100644 --- a/inference-engine/tests/ngraph_helpers/lpt_ngraph_functions/include/lpt_ngraph_functions/common/constant.hpp +++ b/inference-engine/tests/ngraph_helpers/lpt_ngraph_functions/include/lpt_ngraph_functions/common/constant.hpp @@ -4,6 +4,10 @@ #pragma once +#include +#include +#include + #include namespace ngraph { @@ -28,7 +32,22 @@ class Constant { }; inline std::ostream& operator<<(std::ostream& out, const Constant& constant) { - return out << "_" << constant.values << "_" << constant.outPrecision << "_" << constant.shape; + auto toStream = [](const std::vector& values) -> std::string { + std::stringstream os; + os << "{"; + for (size_t i = 0; i < values.size(); ++i) { + const float& value = values[i]; + if (i > 0) { + os << value; + } else { + os << ", " << value; + } + } + os << "}"; + return os.str(); + }; + + return out << "_" << toStream(constant.values) << "_" << constant.outPrecision << "_" << constant.shape; } } // namespace subgraph diff --git a/inference-engine/tests/ngraph_helpers/lpt_ngraph_functions/include/lpt_ngraph_functions/common/dequantization_operations.hpp b/inference-engine/tests/ngraph_helpers/lpt_ngraph_functions/include/lpt_ngraph_functions/common/dequantization_operations.hpp index 482342cc1ff6b0..f6eb58929897ce 100644 --- a/inference-engine/tests/ngraph_helpers/lpt_ngraph_functions/include/lpt_ngraph_functions/common/dequantization_operations.hpp +++ b/inference-engine/tests/ngraph_helpers/lpt_ngraph_functions/include/lpt_ngraph_functions/common/dequantization_operations.hpp @@ -1,4 +1,4 @@ -// Copyright (C) 2020 Intel Corporation +// Copyright (C) 2020-2021 Intel Corporation // SPDX-License-Identifier: Apache-2.0 // @@ -16,10 +16,15 @@ class DequantizationOperations { class Convert { public: Convert(); - Convert(const ngraph::element::Type outPrecision); + Convert(const ngraph::element::Type outPrecision, const bool addDeqAttr = true); bool empty() const noexcept; + bool equal(const DequantizationOperations::Convert& value) const noexcept; + bool operator==(const Convert& value) const noexcept { + return equal(value); + } - ngraph::element::Type outPrecision; + ngraph::element::Type outPrecision = element::undefined; + bool addDequantizationAttribute = true; private: bool isEmpty; }; @@ -36,17 +41,28 @@ class DequantizationOperations { const ngraph::Shape& constantShape, const bool addDequantizationAttribute = true, const size_t constantIndex = 1ul, - const ngraph::element::Type constantPrecision = ngraph::element::undefined); + const ngraph::element::Type constantPrecision = ngraph::element::undefined, + const bool addConvert = false, + const std::vector& attributes = {}, + const std::vector& convertAttributes = {}); bool empty() const noexcept; + bool equal(const DequantizationOperations::Subtract& value) const noexcept; + bool operator==(const Subtract& value) const noexcept { + return equal(value); + } + Subtract& setConstantPrecision(const ngraph::element::Type& precision); std::vector values; - ngraph::element::Type outPrecision; + ngraph::element::Type outPrecision = ngraph::element::undefined; ngraph::Shape constantShape; - bool constantShapeIsDefined; - bool addDequantizationAttribute; + bool constantShapeIsDefined = false; + bool addDequantizationAttribute = true; size_t constantIndex = 1ul; ngraph::element::Type constantPrecision = ngraph::element::undefined; + bool addConvert = false; + std::vector attributes; + std::vector convertAttributes; private: bool isEmpty; @@ -66,13 +82,17 @@ class DequantizationOperations { const size_t constantIndex = 1ul, const ngraph::element::Type constantPrecision = ngraph::element::undefined); bool empty() const noexcept; + bool equal(const DequantizationOperations::Multiply& value) const noexcept; + bool operator==(const Multiply& value) const noexcept { + return equal(value); + } Multiply& setConstantPrecision(const ngraph::element::Type& precision); std::vector values; - ngraph::element::Type outPrecision; + ngraph::element::Type outPrecision = ngraph::element::undefined; ngraph::Shape constantShape; - bool constantShapeIsDefined; - bool addDequantizationAttribute; + bool constantShapeIsDefined = false; + bool addDequantizationAttribute = true; size_t constantIndex = 1ul; ngraph::element::Type constantPrecision = ngraph::element::undefined; @@ -84,24 +104,46 @@ class DequantizationOperations { DequantizationOperations(const Convert& convert, const Subtract& subtract, const Multiply& multiply); - bool empty() const; + bool empty() const noexcept; + bool equal(const DequantizationOperations& value) const noexcept; + bool operator==(const DequantizationOperations& value) const noexcept { + return equal(value); + } Convert convert; Subtract subtract; Multiply multiply; }; -inline std::ostream& operator<<(std::ostream& out, const DequantizationOperations& data) { +inline std::ostream& operator<<(std::ostream& out, const DequantizationOperations::Convert& convert) { + return out << "_" << (convert.outPrecision != element::undefined ? convert.outPrecision.get_type_name() : ""); +} + +inline std::ostream& operator<<(std::ostream& out, const DequantizationOperations::Subtract& subtract) { return out << "_" << - (data.convert.outPrecision != element::undefined ? data.convert.outPrecision.get_type_name() : "") << "_" << - data.subtract.values << "_" << - data.subtract.constantShape << "_" << - data.subtract.outPrecision << "_" << - data.subtract.constantIndex << "_" << - data.multiply.values << "_" << - data.multiply.constantShape << "_" << - data.multiply.outPrecision << "_" << - data.multiply.constantIndex; + subtract.values << "_" << + subtract.outPrecision << "_" << + subtract.constantShape << "_" << + subtract.constantShapeIsDefined << "_" << + subtract.addDequantizationAttribute << "_" << + subtract.constantIndex << "_" << + subtract.constantPrecision << "_" << + subtract.addConvert; +} + +inline std::ostream& operator<<(std::ostream& out, const DequantizationOperations::Multiply& multiply) { + return out << "_" << + multiply.values << "_" << + multiply.outPrecision << "_" << + multiply.constantShape << "_" << + multiply.constantShapeIsDefined << "_" << + multiply.addDequantizationAttribute << "_" << + multiply.constantIndex << "_" << + multiply.constantPrecision; +} + +inline std::ostream& operator<<(std::ostream& out, const DequantizationOperations& data) { + return out << "_" << data.convert << "_" << data.subtract << "_" << data.multiply; } } // namespace subgraph diff --git a/inference-engine/tests/ngraph_helpers/lpt_ngraph_functions/include/lpt_ngraph_functions/common/fake_quantize_on_data.hpp b/inference-engine/tests/ngraph_helpers/lpt_ngraph_functions/include/lpt_ngraph_functions/common/fake_quantize_on_data.hpp index a81699cf5982b5..e6bd204889c09f 100644 --- a/inference-engine/tests/ngraph_helpers/lpt_ngraph_functions/include/lpt_ngraph_functions/common/fake_quantize_on_data.hpp +++ b/inference-engine/tests/ngraph_helpers/lpt_ngraph_functions/include/lpt_ngraph_functions/common/fake_quantize_on_data.hpp @@ -1,4 +1,4 @@ -// Copyright (C) 2020 Intel Corporation +// Copyright (C) 2020-2021 Intel Corporation // SPDX-License-Identifier: Apache-2.0 // @@ -59,6 +59,21 @@ inline std::ostream& operator<<(std::ostream& out, const FakeQuantizeOnData& dat class FakeQuantizeOnDataWithConstant { public: + FakeQuantizeOnDataWithConstant(); + + FakeQuantizeOnDataWithConstant( + const size_t quantizationLevel, + const std::vector& constantShapes, + const std::vector& inputLowValues, + const std::vector& inputHighValues, + const std::vector& outputLowValues, + const std::vector& outputHighValues, + const ngraph::element::Type outputPrecision = ngraph::element::undefined); + + virtual ~FakeQuantizeOnDataWithConstant(); + + virtual bool empty() const; + size_t quantizationLevel; std::vector constantShapes; std::vector inputLowValues; diff --git a/inference-engine/tests/ngraph_helpers/lpt_ngraph_functions/include/lpt_ngraph_functions/common/fake_quantize_on_weights.hpp b/inference-engine/tests/ngraph_helpers/lpt_ngraph_functions/include/lpt_ngraph_functions/common/fake_quantize_on_weights.hpp index e7f247a70e47dd..c21f70da8d1a54 100644 --- a/inference-engine/tests/ngraph_helpers/lpt_ngraph_functions/include/lpt_ngraph_functions/common/fake_quantize_on_weights.hpp +++ b/inference-engine/tests/ngraph_helpers/lpt_ngraph_functions/include/lpt_ngraph_functions/common/fake_quantize_on_weights.hpp @@ -1,4 +1,4 @@ -// Copyright (C) 2020 Intel Corporation +// Copyright (C) 2020-2021 Intel Corporation // SPDX-License-Identifier: Apache-2.0 // @@ -32,7 +32,7 @@ class FakeQuantizeOnWeights: public FakeQuantizeOnData { }; inline std::ostream& operator<<(std::ostream& out, const FakeQuantizeOnWeights& data) { - return out << "_" << data.constantShape << "_" << data.outputLowValues << "_" << data.outputHighValues; + return out << "_" << data.quantizationLevel << "_" << data.constantShape << "_" << data.outputLowValues << "_" << data.outputHighValues; } } // namespace subgraph diff --git a/inference-engine/tests/ngraph_helpers/lpt_ngraph_functions/include/lpt_ngraph_functions/compose_fake_quantize_function.hpp b/inference-engine/tests/ngraph_helpers/lpt_ngraph_functions/include/lpt_ngraph_functions/compose_fake_quantize_function.hpp new file mode 100644 index 00000000000000..c8026ca6e7e769 --- /dev/null +++ b/inference-engine/tests/ngraph_helpers/lpt_ngraph_functions/include/lpt_ngraph_functions/compose_fake_quantize_function.hpp @@ -0,0 +1,29 @@ +// Copyright (C) 2021 Intel Corporation +// SPDX-License-Identifier: Apache-2.0 +// + +#pragma once + +#include +#include + +#include "lpt_ngraph_functions/common/dequantization_operations.hpp" +#include "ngraph_functions/subgraph_builders.hpp" + +namespace ngraph { +namespace builder { +namespace subgraph { + +class ComposeFakeQuantizeFunction { +public: + static std::shared_ptr get( + const ngraph::element::Type precision, + const ngraph::Shape& inputShape, + const ngraph::builder::subgraph::FakeQuantizeOnData& fakeQuantizeOnData, + const ngraph::builder::subgraph::DequantizationOperations& dequantization1, + const ngraph::builder::subgraph::DequantizationOperations& dequantization2); +}; + +} // namespace subgraph +} // namespace builder +} // namespace ngraph diff --git a/inference-engine/tests/ngraph_helpers/lpt_ngraph_functions/include/lpt_ngraph_functions/concat_function.hpp b/inference-engine/tests/ngraph_helpers/lpt_ngraph_functions/include/lpt_ngraph_functions/concat_function.hpp index 60769b33d1184f..c6a0e8cee98a9d 100644 --- a/inference-engine/tests/ngraph_helpers/lpt_ngraph_functions/include/lpt_ngraph_functions/concat_function.hpp +++ b/inference-engine/tests/ngraph_helpers/lpt_ngraph_functions/include/lpt_ngraph_functions/concat_function.hpp @@ -1,4 +1,4 @@ -// Copyright (C) 2020 Intel Corporation +// Copyright (C) 2020-2021 Intel Corporation // SPDX-License-Identifier: Apache-2.0 // @@ -89,13 +89,15 @@ class ConcatFunction { const FakeQuantizeOnData& fakeQuantize2, const DequantizationOperations& dequantizationOperations); - static std::shared_ptr getReference( + static std::shared_ptr get( const ngraph::element::Type inputPrecision, const ngraph::Shape& inputShape, const FakeQuantizeOnDataWithConstant& fakeQuantize1, + const DequantizationOperations::Convert& convert1, + const DequantizationOperations& dequantization1, const FakeQuantizeOnDataWithConstant& fakeQuantize2, - const ngraph::element::Type precisionBeforeOp, - const DequantizationOperations& dequantizationBefore, + const DequantizationOperations::Convert& convert2, + const DequantizationOperations& dequantization2, const ngraph::element::Type precisionAfterOperation, const DequantizationOperations& dequantizationAfter); diff --git a/inference-engine/tests/ngraph_helpers/lpt_ngraph_functions/include/lpt_ngraph_functions/convolution_function.hpp b/inference-engine/tests/ngraph_helpers/lpt_ngraph_functions/include/lpt_ngraph_functions/convolution_function.hpp index 223c5b3a801f0b..ce12e3053f87eb 100644 --- a/inference-engine/tests/ngraph_helpers/lpt_ngraph_functions/include/lpt_ngraph_functions/convolution_function.hpp +++ b/inference-engine/tests/ngraph_helpers/lpt_ngraph_functions/include/lpt_ngraph_functions/convolution_function.hpp @@ -29,18 +29,22 @@ class ConvolutionFunction { const ngraph::Shape& inputShape, ngraph::element::Type precision, ngraph::builder::subgraph::FakeQuantizeOnWeights fakeQuantizeOnWeights, - ngraph::builder::subgraph::FakeQuantizeOnData fakeQuantizeOnData, + ngraph::builder::subgraph::DequantizationOperations dequantization, bool isCrorrect); - static std::shared_ptr getReferenceWithIncorrectWeights( + static std::shared_ptr getOriginalWithIncorrectWeights( const ngraph::Shape& inputShape, ngraph::element::Type precision, - ngraph::element::Type dataPrecision, + ngraph::builder::subgraph::FakeQuantizeOnWeights fakeQuantizeOnWeights, ngraph::builder::subgraph::FakeQuantizeOnData fakeQuantizeOnData, + bool isCorrect); + + static std::shared_ptr getReferenceWithIncorrectWeights( + const ngraph::Shape& inputShape, + ngraph::element::Type inputPrecision, ngraph::builder::subgraph::DequantizationOperations dequantizationBefore, ngraph::element::Type weightsPrecision, std::vector weightsValues, - ngraph::builder::subgraph::FakeQuantizeOnWeights fakeQuantizeOnWeights, ngraph::builder::subgraph::DequantizationOperations dequantizationAfter, bool isCorrect); diff --git a/inference-engine/tests/ngraph_helpers/lpt_ngraph_functions/include/lpt_ngraph_functions/fake_quantize_and_convolution_function.hpp b/inference-engine/tests/ngraph_helpers/lpt_ngraph_functions/include/lpt_ngraph_functions/fake_quantize_and_convolution_function.hpp index a12966bc4d2565..a1adec6bedfa1c 100644 --- a/inference-engine/tests/ngraph_helpers/lpt_ngraph_functions/include/lpt_ngraph_functions/fake_quantize_and_convolution_function.hpp +++ b/inference-engine/tests/ngraph_helpers/lpt_ngraph_functions/include/lpt_ngraph_functions/fake_quantize_and_convolution_function.hpp @@ -1,4 +1,4 @@ -// Copyright (C) 2020 Intel Corporation +// Copyright (C) 2020-2021 Intel Corporation // SPDX-License-Identifier: Apache-2.0 // @@ -6,8 +6,12 @@ #include #include +#include + +#include "lpt_ngraph_functions/common/constant.hpp" #include "lpt_ngraph_functions/common/fake_quantize_on_data.hpp" #include "lpt_ngraph_functions/common/fake_quantize_on_weights.hpp" +#include "lpt_ngraph_functions/common/dequantization_operations.hpp" namespace ngraph { namespace builder { @@ -16,11 +20,24 @@ namespace subgraph { class FakeQuantizeAndConvolutionFunction { public: // TODO: move to ConvolutionFunction - static std::shared_ptr getOriginal( + static std::shared_ptr get( const ngraph::element::Type precision, const ngraph::Shape& inputShape, const FakeQuantizeOnData& fakeQuantizeOnData, const FakeQuantizeOnWeights& fakeQuantizeOnWeights); + + static std::shared_ptr get( + const ngraph::element::Type precision, + const ngraph::Shape& inputShape, + const FakeQuantizeOnDataWithConstant& fakeQuantizeOnData, + const DequantizationOperations::Convert& convertOnData, + const DequantizationOperations& dequantizationOnData, + const Constant& constantOnWeights, + const FakeQuantizeOnWeights& fakeQuantizeOnWeights, + const DequantizationOperations::Convert& convertOnWeights, + const DequantizationOperations& dequantizationOnWeights, + const DequantizationOperations& dequantizationAfter, + const std::string operation = "Convolution"); }; } // namespace subgraph diff --git a/inference-engine/tests/ngraph_helpers/lpt_ngraph_functions/include/lpt_ngraph_functions/fake_quantize_on_weights_and_unsupported_child_function.hpp b/inference-engine/tests/ngraph_helpers/lpt_ngraph_functions/include/lpt_ngraph_functions/fake_quantize_on_weights_and_unsupported_child_function.hpp new file mode 100644 index 00000000000000..ea38f2366f87b3 --- /dev/null +++ b/inference-engine/tests/ngraph_helpers/lpt_ngraph_functions/include/lpt_ngraph_functions/fake_quantize_on_weights_and_unsupported_child_function.hpp @@ -0,0 +1,28 @@ +// Copyright (C) 2020 Intel Corporation +// SPDX-License-Identifier: Apache-2.0 +// + +#pragma once + +#include +#include + +#include +#include "lpt_ngraph_functions/common/fake_quantize_on_weights.hpp" + +namespace ngraph { +namespace builder { +namespace subgraph { + +class FakeQuantizeOnWeightsAndUnsupportedChildFunction { +public: +static std::shared_ptr get( + const ngraph::Shape& inputShape, + const ngraph::element::Type inputPrecision, + const std::shared_ptr weights, + const ngraph::builder::subgraph::FakeQuantizeOnWeights fqOnWeights); +}; + +} // namespace subgraph +} // namespace builder +} // namespace ngraph diff --git a/inference-engine/tests/ngraph_helpers/lpt_ngraph_functions/include/lpt_ngraph_functions/get_dequantization_function.hpp b/inference-engine/tests/ngraph_helpers/lpt_ngraph_functions/include/lpt_ngraph_functions/get_dequantization_function.hpp index 4aba78c0d054d2..8c3cb89e4b7034 100644 --- a/inference-engine/tests/ngraph_helpers/lpt_ngraph_functions/include/lpt_ngraph_functions/get_dequantization_function.hpp +++ b/inference-engine/tests/ngraph_helpers/lpt_ngraph_functions/include/lpt_ngraph_functions/get_dequantization_function.hpp @@ -1,4 +1,4 @@ -// Copyright (C) 2020 Intel Corporation +// Copyright (C) 2020-2021 Intel Corporation // SPDX-License-Identifier: Apache-2.0 // @@ -8,6 +8,8 @@ #include #include #include +#include "lpt_ngraph_functions/common/dequantization_operations.hpp" +#include "lpt_ngraph_functions/common/fake_quantize_on_data.hpp" namespace ngraph { namespace builder { @@ -15,6 +17,18 @@ namespace subgraph { class GetDequantizationFunction { public: + static std::shared_ptr get( + const ngraph::element::Type& precision, + const Shape& shape, + const FakeQuantizeOnData& fakeQuantize, + const ngraph::builder::subgraph::DequantizationOperations& dequantizationBefore); + + static std::shared_ptr get( + const ngraph::element::Type& precision, + const Shape& shape, + const FakeQuantizeOnData& fakeQuantize, + const ngraph::pass::low_precision::FakeQuantizeDequantization& dequantization); + static std::shared_ptr getOriginal( bool isConvert, bool isSubtract, size_t subDataInput, size_t mulDataInput); diff --git a/inference-engine/tests/ngraph_helpers/lpt_ngraph_functions/include/lpt_ngraph_functions/group_convolution_function.hpp b/inference-engine/tests/ngraph_helpers/lpt_ngraph_functions/include/lpt_ngraph_functions/group_convolution_function.hpp index 0e842e57237ad0..05ab395b695943 100644 --- a/inference-engine/tests/ngraph_helpers/lpt_ngraph_functions/include/lpt_ngraph_functions/group_convolution_function.hpp +++ b/inference-engine/tests/ngraph_helpers/lpt_ngraph_functions/include/lpt_ngraph_functions/group_convolution_function.hpp @@ -1,4 +1,4 @@ -// Copyright (C) 2020 Intel Corporation +// Copyright (C) 2020-2021 Intel Corporation // SPDX-License-Identifier: Apache-2.0 // @@ -34,14 +34,15 @@ class GroupConvolutionFunction { const FakeQuantizeOnData& fakeQuantizeOnData, const FakeQuantizeOnWeights& fakeQuantizeOnWeights); - static std::shared_ptr getReference( + static std::shared_ptr get( const ngraph::element::Type precision, const ngraph::Shape& inputShape, const ngraph::Shape& outputShape, const size_t groupCount, const ngraph::builder::subgraph::DequantizationOperations& dequantizationBefore, std::shared_ptr weightsConst, - const ngraph::builder::subgraph::FakeQuantizeOnWeights fakeQuantizeOnWeights, + const ngraph::builder::subgraph::FakeQuantizeOnWeights& fakeQuantizeOnWeights, + const ngraph::builder::subgraph::DequantizationOperations& dequantizationOnWeights, const ngraph::element::Type precisionAfterOperation, const ngraph::builder::subgraph::DequantizationOperations& dequantizationAfter, const ngraph::element::Type precisionAfterDequantization); diff --git a/inference-engine/tests/ngraph_helpers/lpt_ngraph_functions/include/lpt_ngraph_functions/mat_mul_function.hpp b/inference-engine/tests/ngraph_helpers/lpt_ngraph_functions/include/lpt_ngraph_functions/mat_mul_function.hpp index 672d72e09903d9..3e35297ecc3e26 100644 --- a/inference-engine/tests/ngraph_helpers/lpt_ngraph_functions/include/lpt_ngraph_functions/mat_mul_function.hpp +++ b/inference-engine/tests/ngraph_helpers/lpt_ngraph_functions/include/lpt_ngraph_functions/mat_mul_function.hpp @@ -76,10 +76,10 @@ class MatMulFunction { static std::shared_ptr getOriginal( const ngraph::element::Type precision, const ngraph::Shape& inputShape, - const FakeQuantizeOnData& fqOnData, + const FakeQuantizeOnDataWithConstant& fqOnData, const ngraph::Shape& weightsConstShape, const std::vector& weightsConstValues, - const FakeQuantizeOnWeights& fqOnWeights); + const FakeQuantizeOnDataWithConstant& fqOnWeights); }; } // namespace subgraph diff --git a/inference-engine/tests/ngraph_helpers/lpt_ngraph_functions/src/add_function.cpp b/inference-engine/tests/ngraph_helpers/lpt_ngraph_functions/src/add_function.cpp index 32a2a459f8b3d4..d14831be8c1eb3 100644 --- a/inference-engine/tests/ngraph_helpers/lpt_ngraph_functions/src/add_function.cpp +++ b/inference-engine/tests/ngraph_helpers/lpt_ngraph_functions/src/add_function.cpp @@ -1,4 +1,4 @@ -// Copyright (C) 2020 Intel Corporation +// Copyright (C) 2020-2021 Intel Corporation // SPDX-License-Identifier: Apache-2.0 // @@ -41,7 +41,7 @@ std::shared_ptr AddFunction::getOriginal( broadcast ? ngraph::Shape({ inputShape[0], inputShape[1], 1, 1 }) : ngraph::Shape(inputShape)); } - const auto dequantizationOp1 = is_type(input1) ? input1 : makeDequantization(input1, dequantization1); + const auto dequantizationOp1 = dequantization1.empty() ? input1 : makeDequantization(input1, dequantization1); std::shared_ptr input2; if (constInput == 1) { @@ -85,11 +85,11 @@ std::shared_ptr AddFunction::getOriginal( parent, std::make_shared(element::f32, Shape{ 1, 1, 1, 1 }, std::vector{1.f})); parent = ngraph::builder::subgraph::makeFakeQuantizeTypeRelaxed( - parent, - ngraph::element::f32, - {256, Shape{}, { 0 }, { 255 }, { 0 }, { 255 }, element::u8}); + parent, + ngraph::element::f32, + FakeQuantizeOnData{256, Shape{}, { 0 }, { 255 }, { 0 }, { 255 }, element::u8}); } - const auto dequantizationOp2 = is_type(parent) ? parent : makeDequantization(parent, dequantization2); + const auto dequantizationOp2 = dequantization2.empty() ? parent : makeDequantization(parent, dequantization2); const auto add = std::make_shared(dequantizationOp1, dequantizationOp2); add->set_friendly_name("output"); @@ -218,9 +218,9 @@ std::shared_ptr AddFunction::getReference( parent, std::make_shared(element::f32, Shape{ 1, 1, 1, 1 }, std::vector{1.f})); parent = ngraph::builder::subgraph::makeFakeQuantizeTypeRelaxed( - parent, - ngraph::element::f32, - {256, Shape{}, { 0 }, { 255 }, { 0 }, { 255 }, element::u8}); + parent, + ngraph::element::f32, + FakeQuantizeOnData{256, Shape{}, { 0 }, { 255 }, { 0 }, { 255 }, element::u8}); } const auto dequantizationOp2 = is_type(parent) ? parent : makeDequantization(parent, dequantization2); diff --git a/inference-engine/tests/ngraph_helpers/lpt_ngraph_functions/src/common/builders.cpp b/inference-engine/tests/ngraph_helpers/lpt_ngraph_functions/src/common/builders.cpp index c7806ac8c94d27..19e243fec95468 100644 --- a/inference-engine/tests/ngraph_helpers/lpt_ngraph_functions/src/common/builders.cpp +++ b/inference-engine/tests/ngraph_helpers/lpt_ngraph_functions/src/common/builders.cpp @@ -1,4 +1,4 @@ -// Copyright (C) 2020 Intel Corporation +// Copyright (C) 2020-2021 Intel Corporation // SPDX-License-Identifier: Apache-2.0 // @@ -22,9 +22,9 @@ std::shared_ptr makeDequantization( Output parent = data; if (!dequantizationOperations.convert.empty()) { - std::shared_ptr convert = std::make_shared( - data, - dequantizationOperations.convert.outPrecision); + std::shared_ptr convert = dequantizationOperations.convert.addDequantizationAttribute ? + std::make_shared(data, dequantizationOperations.convert.outPrecision) : + std::make_shared(data, dequantizationOperations.convert.outPrecision); ngraph::copy_runtime_info({ data.get_node_shared_ptr(), convert }, convert); parent = convert; } @@ -33,8 +33,12 @@ std::shared_ptr makeDequantization( std::shared_ptr subtract; std::vector shape; + auto values = dequantizationOperations.subtract.values; if (dequantizationOperations.subtract.constantShapeIsDefined) { shape = dequantizationOperations.subtract.constantShape; + if (values.size() == 1ul) { + values = std::vector(shape_size(shape), values[0]); + } } else { if (dequantizationOperations.subtract.values.size() == 1ul) { shape = std::vector({}); @@ -44,12 +48,25 @@ std::shared_ptr makeDequantization( } } - const auto subtractConst = std::make_shared( + std::shared_ptr subtractConst = std::make_shared( dequantizationOperations.subtract.constantPrecision != element::undefined ? dequantizationOperations.subtract.constantPrecision : parent.get_element_type(), shape, - dequantizationOperations.subtract.values); + values); + + if (dequantizationOperations.subtract.addConvert) { + std::shared_ptr subtractConstConvert = std::make_shared( + subtractConst, + dequantizationOperations.subtract.outPrecision); + + auto& rt = subtractConstConvert->get_rt_info(); + for (const std::string& attribute : dequantizationOperations.subtract.convertAttributes) { + rt[attribute] = std::make_shared>(""); + } + + subtractConst = subtractConstConvert; + } Output leftBranchParent = dequantizationOperations.subtract.constantIndex == 1 ? parent : subtractConst; Output rightBranchParent = dequantizationOperations.subtract.constantIndex == 1 ? subtractConst : parent; @@ -58,32 +75,72 @@ std::shared_ptr makeDequantization( (dequantizationOperations.subtract.outPrecision == parent.get_element_type())) && ((dequantizationOperations.subtract.constantPrecision == element::undefined) || (dequantizationOperations.subtract.constantPrecision == parent.get_element_type()))) { - subtract = std::make_shared(leftBranchParent, rightBranchParent); + subtract = dequantizationOperations.subtract.addDequantizationAttribute ? + std::make_shared(parent, subtractConst) : + std::make_shared(parent, subtractConst); } else { - subtract = std::make_shared>( - std::vector{element::f32, element::f32}, - std::vector{ element::f32 }, - ngraph::op::TemporaryReplaceOutputType(leftBranchParent, element::f32).get(), - ngraph::op::TemporaryReplaceOutputType(rightBranchParent, element::f32).get()); + // TODO: use templates + if (dequantizationOperations.subtract.addDequantizationAttribute) { + if (dequantizationOperations.subtract.constantIndex == 1ul) { + subtract = std::make_shared>( + std::vector{element::f32, element::f32}, + std::vector{ element::f32 }, + ngraph::op::TemporaryReplaceOutputType(parent, element::f32).get(), + ngraph::op::TemporaryReplaceOutputType(subtractConst, element::f32).get()); + } else { + subtract = std::make_shared>( + std::vector{element::f32, element::f32}, + std::vector{ element::f32 }, + ngraph::op::TemporaryReplaceOutputType(subtractConst, element::f32).get(), + ngraph::op::TemporaryReplaceOutputType(parent, element::f32).get()); + } + } else { + if (dequantizationOperations.subtract.constantIndex == 1ul) { + subtract = std::make_shared>( + std::vector{element::f32, element::f32}, + std::vector{ element::f32 }, + ngraph::op::TemporaryReplaceOutputType(parent, element::f32).get(), + ngraph::op::TemporaryReplaceOutputType(subtractConst, element::f32).get()); + } else { + subtract = std::make_shared>( + std::vector{element::f32, element::f32}, + std::vector{ element::f32 }, + ngraph::op::TemporaryReplaceOutputType(subtractConst, element::f32).get(), + ngraph::op::TemporaryReplaceOutputType(parent, element::f32).get()); + } + } + ngraph::pass::low_precision::NetworkHelper::setOutDataPrecision(subtract, dequantizationOperations.subtract.outPrecision); } if (!dequantizationOperations.subtract.addDequantizationAttribute) { ngraph::pass::low_precision::NetworkHelper::cleanRunTimeInfo(subtract); } ngraph::copy_runtime_info({ data.get_node_shared_ptr(), subtract }, subtract); + + if (!dequantizationOperations.subtract.attributes.empty()) { + auto& rt = subtract->get_rt_info(); + for (const std::string& attribute : dequantizationOperations.subtract.attributes) { + rt[attribute] = std::make_shared>(""); + } + } + parent = subtract; } if (!dequantizationOperations.multiply.empty()) { std::vector shape; + auto values = dequantizationOperations.multiply.values; if (dequantizationOperations.multiply.constantShapeIsDefined) { shape = dequantizationOperations.multiply.constantShape; + if (values.size() == 1ul) { + values = std::vector(shape_size(shape), values[0]); + } } else { - if (dequantizationOperations.multiply.values.size() == 1ul) { + if (values.size() == 1ul) { shape = std::vector({}); } else { shape = std::vector(parent.get_shape().size(), 1ul); - shape[shape.size() >= 2 ? 1ul : 0] = dequantizationOperations.multiply.values.size(); + shape[shape.size() >= 2 ? 1ul : 0] = values.size(); } } @@ -97,30 +154,51 @@ std::shared_ptr makeDequantization( dequantizationOperations.multiply.constantPrecision : parent.get_element_type(), shape, - dequantizationOperations.multiply.values); + values); - multiply = dequantizationOperations.multiply.constantIndex == 1ul ? - std::make_shared(parent, constant) : - std::make_shared(constant, parent); + if (dequantizationOperations.multiply.addDequantizationAttribute) { + multiply = dequantizationOperations.multiply.constantIndex == 1ul ? + std::make_shared(parent, constant) : + std::make_shared(constant, parent); + } else { + multiply = dequantizationOperations.multiply.constantIndex == 1ul ? + std::make_shared(parent, constant) : + std::make_shared(constant, parent); + } } else { const std::shared_ptr constant = std::make_shared( dequantizationOperations.multiply.constantPrecision != element::undefined ? dequantizationOperations.multiply.constantPrecision : parent.get_element_type(), shape, - dequantizationOperations.multiply.values); - - multiply = dequantizationOperations.multiply.constantIndex == 1ul ? - std::make_shared>( - std::vector{element::f32, element::f32}, - std::vector{ element::f32 }, - ngraph::op::TemporaryReplaceOutputType(parent, element::f32).get(), - ngraph::op::TemporaryReplaceOutputType(constant, element::f32).get()) : - std::make_shared>( - std::vector{element::f32, element::f32}, - std::vector{ element::f32 }, - ngraph::op::TemporaryReplaceOutputType(constant, element::f32).get(), - ngraph::op::TemporaryReplaceOutputType(parent, element::f32).get()); + values); + + // TODO: use templates + if (dequantizationOperations.multiply.addDequantizationAttribute) { + multiply = dequantizationOperations.multiply.constantIndex == 1ul ? + std::make_shared>( + std::vector{element::f32, element::f32}, + std::vector{ element::f32 }, + ngraph::op::TemporaryReplaceOutputType(parent, element::f32).get(), + ngraph::op::TemporaryReplaceOutputType(constant, element::f32).get()) : + std::make_shared>( + std::vector{element::f32, element::f32}, + std::vector{ element::f32 }, + ngraph::op::TemporaryReplaceOutputType(constant, element::f32).get(), + ngraph::op::TemporaryReplaceOutputType(parent, element::f32).get()); + } else { + multiply = dequantizationOperations.multiply.constantIndex == 1ul ? + std::make_shared>( + std::vector{element::f32, element::f32}, + std::vector{ element::f32 }, + ngraph::op::TemporaryReplaceOutputType(parent, element::f32).get(), + ngraph::op::TemporaryReplaceOutputType(constant, element::f32).get()) : + std::make_shared>( + std::vector{element::f32, element::f32}, + std::vector{ element::f32 }, + ngraph::op::TemporaryReplaceOutputType(constant, element::f32).get(), + ngraph::op::TemporaryReplaceOutputType(parent, element::f32).get()); + } } ngraph::copy_runtime_info({ data.get_node_shared_ptr(), multiply }, multiply); parent = multiply; @@ -155,28 +233,57 @@ std::shared_ptr makeFakeQuantizeTypeRelaxed( std::shared_ptr makeFakeQuantize( const Output& input, const ngraph::element::Type precision, - const FakeQuantizeOnDataWithConstant& fqOnData) { - const auto inputLowNode = ngraph::builder::makeConstant( - precision, - fqOnData.constantShapes.empty() ? ngraph::Shape{} : fqOnData.constantShapes[0], - fqOnData.inputLowValues, - fqOnData.inputLowValues.empty()); + const FakeQuantizeOnDataWithConstant& fqOnData, + const bool subgraphOnConstantPath) { + std::shared_ptr inputLowNode; + std::shared_ptr inputHighNode; - const auto inputHighNode = ngraph::builder::makeConstant( - precision, - fqOnData.constantShapes.empty() ? ngraph::Shape{} : fqOnData.constantShapes[1], - fqOnData.inputHighValues, - fqOnData.inputHighValues.empty()); + if (subgraphOnConstantPath) { + const auto topConstant = ngraph::builder::makeConstant(precision, ngraph::Shape{1}, std::vector(1, 0.f), false); + const auto convert = std::make_shared(topConstant, element::f32); + + const auto subtractMin = std::make_shared( + std::make_shared(precision, ngraph::Shape{ 1 }, std::vector{fqOnData.outputLowValues[0]}), + convert); + const auto subtractMax = std::make_shared( + std::make_shared(precision, ngraph::Shape{ 1 }, std::vector{fqOnData.outputHighValues[0]}), + convert); + + inputLowNode = std::make_shared( + std::make_shared(precision, ngraph::Shape{ 1 }, std::vector{fqOnData.inputLowValues[0] / fqOnData.outputLowValues[0]}), + subtractMin); + inputHighNode = std::make_shared( + std::make_shared(precision, ngraph::Shape{ 1 }, std::vector{fqOnData.inputHighValues[0] / fqOnData.outputHighValues[0]}), + subtractMax); + } else { + inputLowNode = ngraph::builder::makeConstant( + precision, + fqOnData.constantShapes.empty() ? ngraph::Shape{} : fqOnData.constantShapes[0], + fqOnData.inputLowValues, + fqOnData.inputLowValues.empty()); + + inputHighNode = ngraph::builder::makeConstant( + precision, + fqOnData.constantShapes.empty() ? + ngraph::Shape{} : + (fqOnData.constantShapes.size() == 1 ? fqOnData.constantShapes[0] : fqOnData.constantShapes[1]), + fqOnData.inputHighValues, + fqOnData.inputHighValues.empty()); + } const auto outputLowNode = ngraph::builder::makeConstant( precision, - fqOnData.constantShapes.empty() ? ngraph::Shape{} : fqOnData.constantShapes[2], + fqOnData.constantShapes.empty() ? + ngraph::Shape{} : + (fqOnData.constantShapes.size() == 1 ? fqOnData.constantShapes[0] : fqOnData.constantShapes[2]), fqOnData.outputLowValues, fqOnData.outputLowValues.empty()); const auto outputHighNode = ngraph::builder::makeConstant( precision, - fqOnData.constantShapes.empty() ? ngraph::Shape{} : fqOnData.constantShapes[3], + fqOnData.constantShapes.empty() ? + ngraph::Shape{} : + (fqOnData.constantShapes.size() == 1 ? fqOnData.constantShapes[0] : fqOnData.constantShapes[3]), fqOnData.outputHighValues, fqOnData.outputHighValues.empty()); diff --git a/inference-engine/tests/ngraph_helpers/lpt_ngraph_functions/src/common/dequantization_operations.cpp b/inference-engine/tests/ngraph_helpers/lpt_ngraph_functions/src/common/dequantization_operations.cpp index f19ee4fd762307..e079831ab98a77 100644 --- a/inference-engine/tests/ngraph_helpers/lpt_ngraph_functions/src/common/dequantization_operations.cpp +++ b/inference-engine/tests/ngraph_helpers/lpt_ngraph_functions/src/common/dequantization_operations.cpp @@ -1,4 +1,4 @@ -// Copyright (C) 2020 Intel Corporation +// Copyright (C) 2020-2021 Intel Corporation // SPDX-License-Identifier: Apache-2.0 // @@ -14,15 +14,20 @@ DequantizationOperations::Convert::Convert() : outPrecision(ngraph::element::undefined) {} -DequantizationOperations::Convert::Convert(const ngraph::element::Type outPrecision) : +DequantizationOperations::Convert::Convert(const ngraph::element::Type outPrecision, const bool addDeqAttr) : isEmpty(false), - outPrecision(outPrecision) + outPrecision(outPrecision), + addDequantizationAttribute(addDeqAttr) {} bool DequantizationOperations::Convert::empty() const noexcept { return isEmpty; } +bool DequantizationOperations::Convert::equal(const DequantizationOperations::Convert& value) const noexcept { + return (this->outPrecision == value.outPrecision) && (this->addDequantizationAttribute == value.addDequantizationAttribute); +} + DequantizationOperations::Subtract::Subtract() : isEmpty(true), outPrecision(ngraph::element::undefined), @@ -61,7 +66,10 @@ DequantizationOperations::Subtract::Subtract( const ngraph::Shape& constantShape, const bool addDequantizationAttribute, const size_t constantIndex, - const ngraph::element::Type constantPrecision) : + const ngraph::element::Type constantPrecision, + const bool addConvert, + const std::vector& attributes, + const std::vector& convertAttributes) : isEmpty(false), values(values), outPrecision(outPrecision), @@ -69,13 +77,26 @@ DequantizationOperations::Subtract::Subtract( constantShapeIsDefined(true), addDequantizationAttribute(addDequantizationAttribute), constantIndex(constantIndex), - constantPrecision(constantPrecision) { + constantPrecision(constantPrecision), + addConvert(addConvert), + attributes(attributes), + convertAttributes(convertAttributes) { } bool DequantizationOperations::Subtract::empty() const noexcept { return isEmpty; } +bool DequantizationOperations::Subtract::equal(const DequantizationOperations::Subtract& value) const noexcept { + return + (values == value.values) && + (outPrecision == value.outPrecision) && + (constantShape == value.constantShape) && + (constantShapeIsDefined == value.constantShapeIsDefined) && + (addDequantizationAttribute == value.addDequantizationAttribute) && + (constantIndex == value.constantIndex); +} + DequantizationOperations::Subtract& DequantizationOperations::Subtract::setConstantPrecision(const ngraph::element::Type& precision) { constantPrecision = precision; return *this; @@ -129,6 +150,25 @@ bool DequantizationOperations::Multiply::empty() const noexcept { return isEmpty; } +bool DequantizationOperations::Multiply::equal(const DequantizationOperations::Multiply& value) const noexcept { + return + (values == value.values) && + (outPrecision == value.outPrecision) && + (constantShape == value.constantShape) && + (addDequantizationAttribute == value.addDequantizationAttribute) && + (constantIndex == value.constantIndex) && + (constantPrecision == value.constantPrecision) && + (constantShapeIsDefined == value.constantShapeIsDefined); +} + +bool DequantizationOperations::equal(const DequantizationOperations& value) const noexcept { + return + (empty() == value.empty()) && + (convert == value.convert) && + (subtract == value.subtract) && + (multiply == value.multiply); +} + DequantizationOperations::Multiply& DequantizationOperations::Multiply::setConstantPrecision(const ngraph::element::Type& precision) { constantPrecision = precision; return *this; @@ -145,7 +185,7 @@ DequantizationOperations::DequantizationOperations( multiply(multiply) {} -bool DequantizationOperations::empty() const { +bool DequantizationOperations::empty() const noexcept { return convert.empty() && subtract.empty() && multiply.empty(); } diff --git a/inference-engine/tests/ngraph_helpers/lpt_ngraph_functions/src/common/fake_quantize_on_data.cpp b/inference-engine/tests/ngraph_helpers/lpt_ngraph_functions/src/common/fake_quantize_on_data.cpp index 8412983008fe64..1a09292295ab38 100644 --- a/inference-engine/tests/ngraph_helpers/lpt_ngraph_functions/src/common/fake_quantize_on_data.cpp +++ b/inference-engine/tests/ngraph_helpers/lpt_ngraph_functions/src/common/fake_quantize_on_data.cpp @@ -1,4 +1,4 @@ -// Copyright (C) 2020 Intel Corporation +// Copyright (C) 2020-2021 Intel Corporation // SPDX-License-Identifier: Apache-2.0 // @@ -44,6 +44,38 @@ bool FakeQuantizeOnData::empty() const { outputHighValues.empty(); } +FakeQuantizeOnDataWithConstant::FakeQuantizeOnDataWithConstant() : + quantizationLevel(0), + outputPrecision(ngraph::element::undefined) {} + +FakeQuantizeOnDataWithConstant::FakeQuantizeOnDataWithConstant( + const size_t quantizationLevel, + const std::vector& constantShapes, + const std::vector& inputLowValues, + const std::vector& inputHighValues, + const std::vector& outputLowValues, + const std::vector& outputHighValues, + const ngraph::element::Type outputPrecision) : + quantizationLevel(quantizationLevel), + constantShapes(constantShapes), + inputLowValues(inputLowValues), + inputHighValues(inputHighValues), + outputLowValues(outputLowValues), + outputHighValues(outputHighValues), + outputPrecision(outputPrecision) +{} + +FakeQuantizeOnDataWithConstant::~FakeQuantizeOnDataWithConstant() {} + +bool FakeQuantizeOnDataWithConstant::empty() const { + return (quantizationLevel == 0ul) && + constantShapes.empty() && + inputLowValues.empty() && + inputHighValues.empty() && + outputLowValues.empty() && + outputHighValues.empty(); +} + } // namespace subgraph } // namespace builder } // namespace ngraph diff --git a/inference-engine/tests/ngraph_helpers/lpt_ngraph_functions/src/compose_fake_quantize_function.cpp b/inference-engine/tests/ngraph_helpers/lpt_ngraph_functions/src/compose_fake_quantize_function.cpp new file mode 100644 index 00000000000000..87235ec378f84c --- /dev/null +++ b/inference-engine/tests/ngraph_helpers/lpt_ngraph_functions/src/compose_fake_quantize_function.cpp @@ -0,0 +1,47 @@ +// Copyright (C) 2021 Intel Corporation +// SPDX-License-Identifier: Apache-2.0 +// + +#include "lpt_ngraph_functions/compose_fake_quantize_function.hpp" +#include "low_precision/network_helper.hpp" + +#include +#include "ngraph_functions/subgraph_builders.hpp" +#include "lpt_ngraph_functions/common/builders.hpp" + +using namespace ngraph::pass::low_precision; + +namespace ngraph { +namespace builder { +namespace subgraph { + + std::shared_ptr ComposeFakeQuantizeFunction::get( + const ngraph::element::Type precision, + const ngraph::Shape& inputShape, + const ngraph::builder::subgraph::FakeQuantizeOnData& fqOnData, + const ngraph::builder::subgraph::DequantizationOperations& dequantization1, + const ngraph::builder::subgraph::DequantizationOperations& dequantization2) { + const auto input = std::make_shared(precision, inputShape); + + auto fakeQuantize = makeFakeQuantize(input, precision, fqOnData); + + auto results = ngraph::ResultVector{}; + if (dequantization1.empty() && dequantization2.empty()) { + results.push_back(std::make_shared(fakeQuantize)); + } else { + if (!dequantization1.empty()) { + const auto deq = makeDequantization(fakeQuantize, dequantization1); + results.push_back(std::make_shared(deq)); + } + if (!dequantization2.empty()) { + const auto deq = makeDequantization(fakeQuantize, dequantization2); + results.push_back(std::make_shared(deq)); + } + } + + return std::make_shared(results, ngraph::ParameterVector{ input }, "ComposeFakeQuantizeFunction"); + } + +} // namespace subgraph +} // namespace builder +} // namespace ngraph diff --git a/inference-engine/tests/ngraph_helpers/lpt_ngraph_functions/src/concat_function.cpp b/inference-engine/tests/ngraph_helpers/lpt_ngraph_functions/src/concat_function.cpp index ab6c8d9728a02d..d8c95466b56f9d 100644 --- a/inference-engine/tests/ngraph_helpers/lpt_ngraph_functions/src/concat_function.cpp +++ b/inference-engine/tests/ngraph_helpers/lpt_ngraph_functions/src/concat_function.cpp @@ -590,35 +590,47 @@ std::shared_ptr ConcatFunction::getReference( return function; } -std::shared_ptr ConcatFunction::getReference( +std::shared_ptr ConcatFunction::get( const ngraph::element::Type inputPrecision, const ngraph::Shape& inputShape, const FakeQuantizeOnDataWithConstant& fqOnData1, + const DequantizationOperations::Convert& convert1, + const DequantizationOperations& dequantization1, const FakeQuantizeOnDataWithConstant& fqOnData2, - const ngraph::element::Type precisionBeforeOp, - const DequantizationOperations& dequantizationBefore, + const DequantizationOperations::Convert& convert2, + const DequantizationOperations& dequantization2, const ngraph::element::Type precisionAfterOperation, const DequantizationOperations& dequantizationAfter) { const auto input1 = std::make_shared(inputPrecision, inputShape); input1->set_friendly_name("input1"); - const auto fakeQuantize1 = ngraph::builder::subgraph::makeFakeQuantizeTypeRelaxed(input1, inputPrecision, fqOnData1); - low_precision::NetworkHelper::setOutDataPrecisionForTypeRelaxed(fakeQuantize1, precisionBeforeOp); - const auto deqBefore1 = makeDequantization(fakeQuantize1, dequantizationBefore); + std::shared_ptr parent1 = makeFakeQuantizeTypeRelaxed(input1, inputPrecision, fqOnData1); + if (!convert1.empty()) { + parent1 = std::make_shared(parent1, convert1.outPrecision); + } + if (!dequantization1.empty()) { + parent1 = makeDequantization(parent1, dequantization1); + } const auto input2 = std::make_shared(inputPrecision, inputShape); input2->set_friendly_name("input2"); - const auto fakeQuantize2 = ngraph::builder::subgraph::makeFakeQuantizeTypeRelaxed(input2, inputPrecision, fqOnData2); - low_precision::NetworkHelper::setOutDataPrecisionForTypeRelaxed(fakeQuantize2, precisionBeforeOp); - const auto deqBefore2 = makeDequantization(fakeQuantize2, dequantizationBefore); + std::shared_ptr parent2 = makeFakeQuantizeTypeRelaxed(input2, inputPrecision, fqOnData2); + if (!convert2.empty()) { + parent2 = std::make_shared(parent2, convert2.outPrecision); + } + if (!dequantization2.empty()) { + parent2 = makeDequantization(parent2, dequantization2); + } const std::shared_ptr concat = std::make_shared>( - ngraph::OutputVector{ deqBefore1, deqBefore2 }, 1); + ngraph::OutputVector{ parent1, parent2 }, 1); auto& rtInfo = concat->get_rt_info(); rtInfo["Variant::std::string"] = std::make_shared>("concat"); - ngraph::pass::low_precision::NetworkHelper::setOutDataPrecision(concat, precisionAfterOperation); + if (precisionAfterOperation != ngraph::element::undefined) { + ngraph::pass::low_precision::NetworkHelper::setOutDataPrecision(concat, precisionAfterOperation); + } const auto lastDequantization = makeDequantization(concat, dequantizationAfter); lastDequantization->set_friendly_name("output"); diff --git a/inference-engine/tests/ngraph_helpers/lpt_ngraph_functions/src/convolution_function.cpp b/inference-engine/tests/ngraph_helpers/lpt_ngraph_functions/src/convolution_function.cpp index b7f678a5f92990..70647687cbed91 100644 --- a/inference-engine/tests/ngraph_helpers/lpt_ngraph_functions/src/convolution_function.cpp +++ b/inference-engine/tests/ngraph_helpers/lpt_ngraph_functions/src/convolution_function.cpp @@ -73,6 +73,44 @@ std::shared_ptr ConvolutionFunction::getOriginal( return std::make_shared(results, ngraph::ParameterVector{ input }, "ConvolutionTransformation"); } +std::shared_ptr ConvolutionFunction::getOriginalWithIncorrectWeights( + const ngraph::Shape& inputShape, + ngraph::element::Type precision, + ngraph::builder::subgraph::FakeQuantizeOnWeights fakeQuantizeOnWeights, + ngraph::builder::subgraph::DequantizationOperations dequantization, + bool isCorrect) { + const auto input = std::make_shared(precision, ngraph::Shape(inputShape)); + const auto deq = makeDequantization(input, dequantization); + + const size_t inputChannelsCount = inputShape[1]; + const size_t outputChannelsCount = 2 * inputShape[1]; + const auto weights = ngraph::opset1::Constant::create( + ngraph::element::f32, + ngraph::Shape{ outputChannelsCount, inputChannelsCount, 1, 1 }, + std::vector(outputChannelsCount * inputChannelsCount, 1)); + + const auto fqOnWeights = fakeQuantizeOnWeights.empty() ? + nullptr : + ngraph::builder::makeFakeQuantize( + weights, ngraph::element::f32, fakeQuantizeOnWeights.quantizationLevel, fakeQuantizeOnWeights.constantShape, + fakeQuantizeOnWeights.inputLowValues, fakeQuantizeOnWeights.inputHighValues, + fakeQuantizeOnWeights.outputLowValues, fakeQuantizeOnWeights.outputHighValues); + + const auto subtract = isCorrect ? nullptr : std::make_shared(fqOnWeights, + std::make_shared(ngraph::element::f32, Shape{1, 1, 1, 1}, 3.0f)); + + const auto convolution = std::make_shared( + deq, + isCorrect ? fqOnWeights : subtract, + ngraph::Strides{ 1, 1 }, + ngraph::CoordinateDiff{ 0, 0 }, + ngraph::CoordinateDiff{ 0, 0 }, + ngraph::Strides{ 1, 1 }); + + ngraph::ResultVector results{ std::make_shared(convolution) }; + return std::make_shared(results, ngraph::ParameterVector{ input }, "IncorrectWeightsAndConvolutionFunction"); +} + std::shared_ptr ConvolutionFunction::getOriginalWithIncorrectWeights( const ngraph::Shape& inputShape, ngraph::element::Type precision, @@ -101,7 +139,7 @@ std::shared_ptr ConvolutionFunction::getOriginalWithIncorrectW fakeQuantizeOnWeights.outputLowValues, fakeQuantizeOnWeights.outputHighValues); const auto subtract = isCorrect ? nullptr : std::make_shared(fqOnWeights, - std::make_shared(ngraph::element::f32, Shape{1, 1, 1, 1}, 3.0f)); + std::make_shared(ngraph::element::f32, Shape{ 1, 1, 1, 1 }, 3.0f)); const auto convolution = std::make_shared( fakeQuantizeOnData.empty() ? input : fqOnData, @@ -117,31 +155,16 @@ std::shared_ptr ConvolutionFunction::getOriginalWithIncorrectW std::shared_ptr ConvolutionFunction::getReferenceWithIncorrectWeights( const ngraph::Shape& inputShape, - ngraph::element::Type precision, - ngraph::element::Type dataPrecision, - ngraph::builder::subgraph::FakeQuantizeOnData fakeQuantizeOnData, + ngraph::element::Type inputPrecision, ngraph::builder::subgraph::DequantizationOperations dequantizationBefore, ngraph::element::Type weightsPrecision, std::vector weightsValues, - ngraph::builder::subgraph::FakeQuantizeOnWeights fakeQuantizeOnWeights, ngraph::builder::subgraph::DequantizationOperations dequantizationAfter, bool isCorrect) { - const auto input = std::make_shared(precision, ngraph::Shape(inputShape)); + const auto input = std::make_shared(inputPrecision, ngraph::Shape(inputShape)); input->set_friendly_name("input"); - std::shared_ptr fqOnData = as_type_ptr(ngraph::builder::makeFakeQuantize( - input, - precision, - fakeQuantizeOnData.quantizationLevel, - fakeQuantizeOnData.constantShape, - fakeQuantizeOnData.inputLowValues, - fakeQuantizeOnData.inputHighValues, - fakeQuantizeOnData.outputLowValues, - fakeQuantizeOnData.outputHighValues)); - - ngraph::pass::low_precision::NetworkHelper::setOutDataPrecision(fqOnData, dataPrecision); - - const auto deqBefore = dequantizationBefore.empty() ? nullptr : makeDequantization(fqOnData, dequantizationBefore); + const auto deqBefore = makeDequantization(input, dequantizationBefore); const size_t inputChannelsCount = inputShape[1]; const size_t outputChannelsCount = 2 * inputShape[1]; @@ -151,24 +174,17 @@ std::shared_ptr ConvolutionFunction::getReferenceWithIncorrect } const std::shared_ptr weights = ngraph::opset1::Constant::create( - precision, + weightsPrecision, ngraph::Shape{ outputChannelsCount, inputChannelsCount, 1, 1 }, weightsValues.size() == 1ul ? std::vector(outputChannelsCount * inputChannelsCount, weightsValues[0]) : weightsValues); - const auto fqOnWeights = fakeQuantizeOnWeights.empty() ? - nullptr : - ngraph::builder::makeFakeQuantize( - weights, precision, fakeQuantizeOnWeights.quantizationLevel, fakeQuantizeOnWeights.constantShape, - fakeQuantizeOnWeights.inputLowValues, fakeQuantizeOnWeights.inputHighValues, - fakeQuantizeOnWeights.outputLowValues, fakeQuantizeOnWeights.outputHighValues); - - const auto subtract = isCorrect ? nullptr : std::make_shared(fqOnWeights, - std::make_shared(precision, Shape{ 1, 1, 1, 1 }, 3.0f)); + const auto subtract = isCorrect ? nullptr : std::make_shared(weights, + std::make_shared(ngraph::element::f32, Shape{ 1, 1, 1, 1 }, 3.0f)); auto convolutionOriginal = ngraph::opset1::Convolution( - ngraph::op::TemporaryReplaceOutputType(dequantizationBefore.empty() ? fqOnData : deqBefore, element::f32).get(), + ngraph::op::TemporaryReplaceOutputType(deqBefore, element::f32).get(), ngraph::op::TemporaryReplaceOutputType(isCorrect ? weights : subtract, element::f32).get(), ngraph::Strides{ 1, 1 }, ngraph::CoordinateDiff{ 0, 0 }, @@ -180,35 +196,9 @@ std::shared_ptr ConvolutionFunction::getReferenceWithIncorrect std::vector{ element::f32, element::f32 }, std::vector{}); - std::shared_ptr multiply; - if (!dequantizationAfter.multiply.empty()) { - ngraph::Shape constShape = isCorrect ? Shape{ 1, 1, 1 } : Shape{ 1, 1, 1, 1 }; - multiply = std::make_shared(convolution, - std::make_shared(precision, constShape, dequantizationAfter.multiply.values[0])); - } - - replace_node(fqOnData->get_input_node_shared_ptr(3), - std::make_shared(precision, Shape{}, fakeQuantizeOnData.outputLowValues[0])); - - replace_node(fqOnData->get_input_node_shared_ptr(4), - std::make_shared(precision, Shape{}, fakeQuantizeOnData.outputHighValues[0])); - - ngraph::pass::low_precision::NetworkHelper::setOutDataPrecision(fqOnData, dataPrecision); - - if (!dequantizationBefore.multiply.empty()) { - ngraph::Shape constShape = isCorrect ? Shape{ 1, 1, 1 } : Shape{ 1, 1, 1, 1 }; - replace_node( - deqBefore->get_input_node_shared_ptr(1), - std::make_shared(precision, constShape, dequantizationBefore.multiply.values[0])); - } - - if (isCorrect) { - replace_node( - weights, - ngraph::pass::low_precision::fold(weights, weightsPrecision)); - } + const auto deqAfter = makeDequantization(convolution, dequantizationAfter); - ngraph::ResultVector results{ std::make_shared(dequantizationAfter.empty() ? convolution : multiply) }; + ngraph::ResultVector results{ std::make_shared(deqAfter) }; return std::make_shared(results, ngraph::ParameterVector{ input }, "IncorrectWeightsAndConvolutionFunction"); } diff --git a/inference-engine/tests/ngraph_helpers/lpt_ngraph_functions/src/fake_quantize_and_convolution_function.cpp b/inference-engine/tests/ngraph_helpers/lpt_ngraph_functions/src/fake_quantize_and_convolution_function.cpp index 8013dfd476084a..d4587ae2867f49 100644 --- a/inference-engine/tests/ngraph_helpers/lpt_ngraph_functions/src/fake_quantize_and_convolution_function.cpp +++ b/inference-engine/tests/ngraph_helpers/lpt_ngraph_functions/src/fake_quantize_and_convolution_function.cpp @@ -1,4 +1,4 @@ -// Copyright (C) 2020 Intel Corporation +// Copyright (C) 2020-2021 Intel Corporation // SPDX-License-Identifier: Apache-2.0 // @@ -6,12 +6,15 @@ #include #include "ngraph_functions/subgraph_builders.hpp" +#include "lpt_ngraph_functions/common/builders.hpp" +#include "inference_engine.hpp" namespace ngraph { namespace builder { namespace subgraph { -std::shared_ptr FakeQuantizeAndConvolutionFunction::getOriginal( +// TODO: remove, reuse mode extended method +std::shared_ptr FakeQuantizeAndConvolutionFunction::get( const ngraph::element::Type precision, const ngraph::Shape& inputShape, const FakeQuantizeOnData& fqOnData, @@ -47,6 +50,103 @@ std::shared_ptr FakeQuantizeAndConvolutionFunction::getOrigina return std::make_shared(results, ngraph::ParameterVector{ input }, "FakeQuantizeAndConvolutionFunction"); } +std::shared_ptr FakeQuantizeAndConvolutionFunction::get( + const ngraph::element::Type precision, + const ngraph::Shape& inputShape, + const FakeQuantizeOnDataWithConstant& fqOnData, + const DequantizationOperations::Convert& convertOnData, + const DequantizationOperations& dequantizationOnData, + const Constant& constantOnWeights, + const FakeQuantizeOnWeights& fqOnWeights, + const DequantizationOperations::Convert& convertOnWeights, + const DequantizationOperations& dequantizationOnWeights, + const DequantizationOperations& dequantizationAfter, + const std::string operation) { + const auto input = std::make_shared(precision, ngraph::Shape(inputShape)); + + std::shared_ptr parentOnActivation = input; + { + if (!fqOnData.empty()) { + parentOnActivation = fqOnData.outputPrecision == element::undefined ? + ngraph::builder::subgraph::makeFakeQuantize(input, precision, fqOnData) : + ngraph::builder::subgraph::makeFakeQuantizeTypeRelaxed(input, precision, fqOnData); + } + + if (!convertOnData.empty()) { + parentOnActivation = std::make_shared(parentOnActivation, convertOnData.outPrecision); + } + + if (!dequantizationOnData.empty()) { + parentOnActivation = makeDequantization(parentOnActivation, dequantizationOnData); + } + } + + std::shared_ptr parentOnWeights; + { + const size_t inputChannelsCount = inputShape[1]; + const size_t outputChannelsCount = 2 * inputShape[1]; + const Shape shape = constantOnWeights.shapeIsDefined ? constantOnWeights.shape : ngraph::Shape{ outputChannelsCount, inputChannelsCount, 1, 1 }; + parentOnWeights = ngraph::opset1::Constant::create( + constantOnWeights.outPrecision, + shape, + constantOnWeights.values.size() != ngraph::shape_size(shape) ? + std::vector(outputChannelsCount * inputChannelsCount, constantOnWeights.values[0]) : + constantOnWeights.values); + + if (!fqOnWeights.empty()) { + parentOnWeights = fqOnWeights.outputPrecision == element::undefined ? + ngraph::builder::subgraph::makeFakeQuantize(parentOnWeights, parentOnWeights->output(0).get_element_type(), fqOnWeights) : + ngraph::builder::subgraph::makeFakeQuantizeTypeRelaxed(parentOnWeights, parentOnWeights->output(0).get_element_type(), fqOnWeights); + } + + if (!convertOnWeights.empty()) { + parentOnWeights = std::make_shared(parentOnWeights, convertOnWeights.outPrecision); + } + + if (!dequantizationOnWeights.empty()) { + parentOnWeights = makeDequantization(parentOnWeights, dequantizationOnWeights); + } + } + + std::shared_ptr lastOperation; + if (operation == "Convolution") { + lastOperation = std::make_shared>( + ngraph::opset1::Convolution( + ngraph::op::TemporaryReplaceOutputType(parentOnActivation, element::f32).get(), + ngraph::op::TemporaryReplaceOutputType(parentOnWeights, element::f32).get(), + ngraph::Strides{ 1, 1 }, + ngraph::CoordinateDiff{ 0, 0 }, + ngraph::CoordinateDiff{ 0, 0 }, + ngraph::Strides{ 1, 1 }), + std::vector{ element::f32, element::f32 }, + std::vector{}); + } else if (operation == "GroupConvolution") { + std::make_shared>( + ngraph::opset1::GroupConvolution( + ngraph::op::TemporaryReplaceOutputType(parentOnActivation, element::f32).get(), + ngraph::op::TemporaryReplaceOutputType(parentOnWeights, element::f32).get(), + ngraph::Strides{ 1, 1 }, + ngraph::CoordinateDiff{ 0, 0 }, + ngraph::CoordinateDiff{ 0, 0 }, + ngraph::Strides{ 1, 1 }), + std::vector{ element::f32, element::f32 }, + std::vector{}); + } else { + THROW_IE_EXCEPTION << "unknown operation type " << operation; + } + + if (!dequantizationAfter.empty()) { + lastOperation->set_friendly_name("output_original"); + lastOperation = makeDequantization(lastOperation, dequantizationAfter); + lastOperation->set_friendly_name("output"); + } else { + lastOperation->set_friendly_name("output"); + } + + ngraph::ResultVector results{ std::make_shared(lastOperation) }; + return std::make_shared(results, ngraph::ParameterVector{ input }, "FakeQuantizeAndConvolutionFunction"); +} + } // namespace subgraph } // namespace builder } // namespace ngraph diff --git a/inference-engine/tests/ngraph_helpers/lpt_ngraph_functions/src/fake_quantize_on_weights_and_unsupported_child_function.cpp b/inference-engine/tests/ngraph_helpers/lpt_ngraph_functions/src/fake_quantize_on_weights_and_unsupported_child_function.cpp new file mode 100644 index 00000000000000..80630d0e3a01d8 --- /dev/null +++ b/inference-engine/tests/ngraph_helpers/lpt_ngraph_functions/src/fake_quantize_on_weights_and_unsupported_child_function.cpp @@ -0,0 +1,51 @@ +// Copyright (C) 2020 Intel Corporation +// SPDX-License-Identifier: Apache-2.0 +// + + +#include +#include "lpt_ngraph_functions/common/builders.hpp" +#include "lpt_ngraph_functions/fake_quantize_on_weights_and_unsupported_child_function.hpp" +#include "lpt_ngraph_functions/common/fake_quantize_on_weights.hpp" +#include "low_precision/network_helper.hpp" +#include "ngraph_functions/builders.hpp" + + +namespace ngraph { +namespace builder { +namespace subgraph { +std::shared_ptr FakeQuantizeOnWeightsAndUnsupportedChildFunction::get( + const ngraph::Shape& inputShape, + const ngraph::element::Type inputPrecision, + const std::shared_ptr weights, + const ngraph::builder::subgraph::FakeQuantizeOnWeights fqOnWeights) { + const auto input = std::make_shared(inputPrecision, inputShape); + input->set_friendly_name("Input"); + weights->set_friendly_name("Weights"); + + std::shared_ptr weightsParent = weights; + if (!fqOnWeights.empty()) { + const auto fakeQuantizeOnWeights = makeFakeQuantize(weights, inputPrecision, fqOnWeights); + fakeQuantizeOnWeights->set_friendly_name("FakeQuantize"); + weightsParent = fakeQuantizeOnWeights; + } + + auto unsupportedOperation = std::make_shared( + input, weightsParent, ngraph::Strides{ 1, 1 }, + ngraph::CoordinateDiff{ 0, 0 }, ngraph::CoordinateDiff{ 0, 0 }, ngraph::Strides{ 1, 1 }); + unsupportedOperation->set_friendly_name("UnsupportedOperation"); + + const auto result = std::make_shared(unsupportedOperation); + result->set_friendly_name("Result"); + + std::shared_ptr function = std::make_shared( + ResultVector{ result }, + ngraph::ParameterVector{ input }, + "FakeQuantizeOnWeightsWithUnsupportedOperations"); + + return function; +} + +} // namespace subgraph +} // namespace builder +} // namespace ngraph diff --git a/inference-engine/tests/ngraph_helpers/lpt_ngraph_functions/src/fake_quantize_precision_selection_function.cpp b/inference-engine/tests/ngraph_helpers/lpt_ngraph_functions/src/fake_quantize_precision_selection_function.cpp index 53ffde8d33f482..47b8833f7715c1 100644 --- a/inference-engine/tests/ngraph_helpers/lpt_ngraph_functions/src/fake_quantize_precision_selection_function.cpp +++ b/inference-engine/tests/ngraph_helpers/lpt_ngraph_functions/src/fake_quantize_precision_selection_function.cpp @@ -171,7 +171,7 @@ std::shared_ptr FakeQuantizePrecisionSelectionFunction::getRef } else { // TODO: potential workaround for the same case: // openvino\inference-engine\tests\ngraph_functions\src\low_precision_transformations\concat_function.cpp, line #496 - // branch1Pooling->set_output_type(0, values.fakeQuantizeOnDataOutPrecision, branch1Pooling->get_output_partial_shape(0)); + branch1Pooling->set_output_type(0, values.fakeQuantizeOnDataOutPrecision, branch1Pooling->get_output_partial_shape(0)); } } diff --git a/inference-engine/tests/ngraph_helpers/lpt_ngraph_functions/src/get_dequantization_function.cpp b/inference-engine/tests/ngraph_helpers/lpt_ngraph_functions/src/get_dequantization_function.cpp index 624a38c77f55f8..7fcf672b7d7757 100644 --- a/inference-engine/tests/ngraph_helpers/lpt_ngraph_functions/src/get_dequantization_function.cpp +++ b/inference-engine/tests/ngraph_helpers/lpt_ngraph_functions/src/get_dequantization_function.cpp @@ -1,21 +1,93 @@ -// Copyright (C) 2020 Intel Corporation +// Copyright (C) 2020-2021 Intel Corporation // SPDX-License-Identifier: Apache-2.0 // +#include "lpt_ngraph_functions/get_dequantization_function.hpp" + #include #include #include #include -#include -#include "lpt_ngraph_functions/get_dequantization_function.hpp" -#include "ngraph_functions/subgraph_builders.hpp" #include +#include +#include "ngraph_functions/subgraph_builders.hpp" +#include "lpt_ngraph_functions/common/builders.hpp" namespace ngraph { namespace builder { namespace subgraph { + +std::shared_ptr GetDequantizationFunction::get( + const ngraph::element::Type& precision, + const Shape& shape, + const FakeQuantizeOnData& fakeQuantize, + const ngraph::builder::subgraph::DequantizationOperations& dequantization) { + const std::shared_ptr input = std::make_shared( + ngraph::element::f32, + shape); + + std::shared_ptr parent = input; + if (!fakeQuantize.empty()) { + parent = ngraph::builder::subgraph::makeFakeQuantizeTypeRelaxed(parent, precision, fakeQuantize); + } + + if (!dequantization.empty()) { + parent = makeDequantization(parent, dequantization); + parent->set_friendly_name("output"); + } + + return std::make_shared( + ngraph::ResultVector{ std::make_shared(parent) }, + ngraph::ParameterVector{ as_type_ptr(input) }, + "DequantizationFunction"); +} + +std::shared_ptr GetDequantizationFunction::get( + const ngraph::element::Type& precision, + const Shape& shape, + const FakeQuantizeOnData& fakeQuantize, + const ngraph::pass::low_precision::FakeQuantizeDequantization& dequantization) { + const std::shared_ptr input = std::make_shared( + ngraph::element::f32, + shape); + + std::shared_ptr parent = input; + if (!fakeQuantize.empty()) { + parent = ngraph::builder::subgraph::makeFakeQuantizeTypeRelaxed(parent, precision, fakeQuantize); + } + + if (dequantization.convert != nullptr) { + parent = dequantization.convert->clone_with_new_inputs({ parent }); + parent->set_friendly_name(dequantization.convert->get_friendly_name()); + } + + if (dequantization.subtract != nullptr) { + const auto parent2 = dequantization.subtractConvert == nullptr ? + std::dynamic_pointer_cast(dequantization.subtractConstant) : + dequantization.subtractConvert; + const auto index = ngraph::pass::low_precision::NetworkHelper::getChildInputIndex(parent2, dequantization.subtract); + parent = dequantization.subtract->clone_with_new_inputs(index == 1ul ? + OutputVector{ parent, parent2 } : + OutputVector{ parent2, parent }); + parent->set_friendly_name(dequantization.subtract->get_friendly_name()); + } + + if (dequantization.multiply != nullptr) { + const auto index = ngraph::pass::low_precision::NetworkHelper::getChildInputIndex(dequantization.multiplyConstant, dequantization.multiply); + parent = dequantization.multiply->clone_with_new_inputs(index == 1ul ? + OutputVector{ parent, dequantization.multiplyConstant } : + OutputVector{ dequantization.multiplyConstant, parent }); + parent->set_friendly_name(dequantization.multiply->get_friendly_name()); + } + + return std::make_shared( + ngraph::ResultVector{ std::make_shared(parent) }, + ngraph::ParameterVector{ as_type_ptr(input) }, + "DequantizationFunction"); +} + std::shared_ptr GetDequantizationFunction::getOriginal( bool isConvert, bool isSubtract, size_t subDataInput, size_t mulDataInput) { const std::shared_ptr input = std::make_shared( diff --git a/inference-engine/tests/ngraph_helpers/lpt_ngraph_functions/src/group_convolution_function.cpp b/inference-engine/tests/ngraph_helpers/lpt_ngraph_functions/src/group_convolution_function.cpp index 008534b08991f1..97084998177df7 100644 --- a/inference-engine/tests/ngraph_helpers/lpt_ngraph_functions/src/group_convolution_function.cpp +++ b/inference-engine/tests/ngraph_helpers/lpt_ngraph_functions/src/group_convolution_function.cpp @@ -1,4 +1,4 @@ -// Copyright (C) 2020 Intel Corporation +// Copyright (C) 2020-2021 Intel Corporation // SPDX-License-Identifier: Apache-2.0 // @@ -30,9 +30,10 @@ std::shared_ptr createWeightsOriginal( const size_t groupCount, const size_t kernelSize, const std::vector& weightsValues, - const FakeQuantizeOnWeights& fakeQuantizeOnWeights) { + const FakeQuantizeOnWeights& fakeQuantizeOnWeights, + const ngraph::builder::subgraph::DequantizationOperations& dequantizationOnWeights) { std::shared_ptr weights; - if (fakeQuantizeOnWeights.empty()) { + if (fakeQuantizeOnWeights.empty() && dequantizationOnWeights.empty()) { weights = ngraph::opset1::Constant::create( precision, ngraph::Shape{ outputChannelsCount, inputChannelsCount, 1, 1 }, @@ -41,32 +42,36 @@ std::shared_ptr createWeightsOriginal( weightsValues); } else { const size_t inputChannelsPerGroup = inputChannelsCount / groupCount; - const std::shared_ptr weightsConst = ngraph::opset1::Constant::create( + weights = ngraph::opset1::Constant::create( precision, ngraph::Shape{ outputChannelsCount, inputChannelsPerGroup, kernelSize, kernelSize }, weightsValues.size() == 1ul ? std::vector(outputChannelsCount * kernelSize * kernelSize * inputChannelsPerGroup, weightsValues[0]) : weightsValues); - const std::shared_ptr fakeQuantize = ngraph::builder::makeFakeQuantize( - weightsConst, - precision, - fakeQuantizeOnWeights.quantizationLevel, - { outputChannelsCount, 1, 1, 1 }, - fakeQuantizeOnWeights.inputLowValues, - fakeQuantizeOnWeights.inputHighValues, - fakeQuantizeOnWeights.outputLowValues, - fakeQuantizeOnWeights.outputHighValues); - - const std::shared_ptr reshape = std::make_shared( - fakeQuantize, + if (!fakeQuantizeOnWeights.empty()) { + weights = ngraph::builder::makeFakeQuantize( + weights, + precision, + fakeQuantizeOnWeights.quantizationLevel, + { outputChannelsCount, 1, 1, 1 }, + fakeQuantizeOnWeights.inputLowValues, + fakeQuantizeOnWeights.inputHighValues, + fakeQuantizeOnWeights.outputLowValues, + fakeQuantizeOnWeights.outputHighValues); + } + + if (!dequantizationOnWeights.empty()) { + weights = ngraph::builder::subgraph::makeDequantization(weights, dequantizationOnWeights); + } + + weights = std::make_shared( + weights, ngraph::opset1::Constant::create( element::i64, Shape{ 5 }, std::vector({ groupCount, outputChannelsCount / groupCount, inputChannelsPerGroup, 7, 7 })), true); - - weights = reshape; } return weights; @@ -100,7 +105,8 @@ std::shared_ptr GroupConvolutionFunction::getOriginal( groupCount, kernelSize, weightsConst->cast_vector(), - fakeQuantizeOnWeights); + fakeQuantizeOnWeights, + {}); const auto convolution = std::make_shared( dequantization, @@ -152,7 +158,8 @@ std::shared_ptr GroupConvolutionFunction::getOriginal( groupCount, kernelSize, weightsValues, - fakeQuantizeOnWeights); + fakeQuantizeOnWeights, + {}); const auto convolution = std::make_shared( fakeQuantizeOnActivations == nullptr ? input : fakeQuantizeOnActivations, @@ -166,14 +173,15 @@ std::shared_ptr GroupConvolutionFunction::getOriginal( return std::make_shared(results, ngraph::ParameterVector{ input }, "GroupConvolutionTransformation"); } -std::shared_ptr GroupConvolutionFunction::getReference( +std::shared_ptr GroupConvolutionFunction::get( const ngraph::element::Type precision, const ngraph::Shape& inputShape, const ngraph::Shape& outputShape, const size_t groupCount, const ngraph::builder::subgraph::DequantizationOperations& dequantizationBefore, std::shared_ptr weightsConst, - const ngraph::builder::subgraph::FakeQuantizeOnWeights fakeQuantizeOnWeights, + const ngraph::builder::subgraph::FakeQuantizeOnWeights& fakeQuantizeOnWeights, + const ngraph::builder::subgraph::DequantizationOperations& dequantizationOnWeights, const ngraph::element::Type precisionAfterOperation, const ngraph::builder::subgraph::DequantizationOperations& dequantizationAfter, const ngraph::element::Type precisionAfterDequantization) { @@ -193,7 +201,7 @@ std::shared_ptr GroupConvolutionFunction::getReference( } std::shared_ptr weights; - if (fakeQuantizeOnWeights.empty()) { + if (fakeQuantizeOnWeights.empty() && dequantizationOnWeights.empty()) { const ngraph::Shape weightsShape = ngraph::Shape{ groupCount, outputChannelsInGroup, inputChannelsInGroup, kernelSize, kernelSize }; weights = ngraph::opset1::Constant::create( weightsConst->get_element_type(), @@ -210,7 +218,8 @@ std::shared_ptr GroupConvolutionFunction::getReference( groupCount, kernelSize, weightsConst->cast_vector(), - fakeQuantizeOnWeights); + fakeQuantizeOnWeights, + dequantizationOnWeights); } auto convolutionOriginal = ngraph::opset1::GroupConvolution( diff --git a/inference-engine/tests/ngraph_helpers/lpt_ngraph_functions/src/mat_mul_function.cpp b/inference-engine/tests/ngraph_helpers/lpt_ngraph_functions/src/mat_mul_function.cpp index 5ac07fd2498f80..1f3f6cef6befa0 100644 --- a/inference-engine/tests/ngraph_helpers/lpt_ngraph_functions/src/mat_mul_function.cpp +++ b/inference-engine/tests/ngraph_helpers/lpt_ngraph_functions/src/mat_mul_function.cpp @@ -292,10 +292,10 @@ std::shared_ptr MatMulFunction::getReference( std::shared_ptr MatMulFunction::getOriginal( const ngraph::element::Type precision, const ngraph::Shape& inputShape, - const FakeQuantizeOnData& fqOnData, + const FakeQuantizeOnDataWithConstant& fqOnData, const ngraph::Shape& weightsConstShape, const std::vector& weightsConstValues, - const FakeQuantizeOnWeights& fqOnWeights) { + const FakeQuantizeOnDataWithConstant& fqOnWeights) { const std::shared_ptr input = std::make_shared( precision, inputShape); @@ -314,7 +314,7 @@ std::shared_ptr MatMulFunction::getOriginal( lastDequantization, fakeQuantize, false, - false); + true); matMul->set_friendly_name("matMul"); std::shared_ptr result = std::make_shared(matMul); diff --git a/inference-engine/tests/ngraph_helpers/ngraph_functions/include/ngraph_functions/builders.hpp b/inference-engine/tests/ngraph_helpers/ngraph_functions/include/ngraph_functions/builders.hpp index 2b0b614b89f049..2ce54b684e936a 100644 --- a/inference-engine/tests/ngraph_helpers/ngraph_functions/include/ngraph_functions/builders.hpp +++ b/inference-engine/tests/ngraph_helpers/ngraph_functions/include/ngraph_functions/builders.hpp @@ -1,4 +1,4 @@ -// Copyright (C) 2019-2020 Intel Corporation +// Copyright (C) 2019-2021 Intel Corporation // SPDX-License-Identifier: Apache-2.0 // @@ -137,6 +137,16 @@ std::shared_ptr makeConvolutionBackpropData(const ngraph::Output &biasesWeights = {}); +std::shared_ptr makeCTCGreedyDecoder( + const ngraph::Output& inputData, + const bool mergeRepeated); + +std::shared_ptr makeCTCGreedyDecoderSeqLen( + const ngraph::Output& inputData, + int blankIndex, + bool mergeRepeated, + const element::Type& idxPrec); + std::shared_ptr makeCTCLoss( const ngraph::Output& logitsNode, std::vector& logitsLength, @@ -236,6 +246,12 @@ std::shared_ptr makeMVN(const ngraph::Output &in, bool normalizeVariance, double eps); +std::shared_ptr makeMVN6(const Output& in, + const Output& axesNode, + bool normalizeVariance, + float eps, + std::string& epsMode); + std::shared_ptr makeSqueezeUnsqueeze(const ngraph::Output &in, const element::Type &type, const std::vector &squeeze_indices, diff --git a/inference-engine/tests/ngraph_helpers/ngraph_functions/src/ctc_greedy_decoder.cpp b/inference-engine/tests/ngraph_helpers/ngraph_functions/src/ctc_greedy_decoder.cpp new file mode 100644 index 00000000000000..fd9ff69a17d022 --- /dev/null +++ b/inference-engine/tests/ngraph_helpers/ngraph_functions/src/ctc_greedy_decoder.cpp @@ -0,0 +1,38 @@ +// Copyright (C) 2020-2021 Intel Corporation +// SPDX-License-Identifier: Apache-2.0 +// + +#include +#include + +#include "ngraph_functions/builders.hpp" + +namespace ngraph { +namespace builder { + +std::shared_ptr makeCTCGreedyDecoder( + const ngraph::Output& inputData, + const bool mergeRepeated) { + auto inputDataShape = inputData.get_shape(); + size_t T = inputDataShape[0]; + size_t B = inputDataShape[1]; + + std::mt19937 gen(1); + std::uniform_int_distribution dist(1, T); + + std::vector sequenceMaskData(B * T, 0); + for (int b = 0; b < B; b++) { + int len = dist(gen); + for (int t = 0; t < len; t++) { + sequenceMaskData[t * B + b] = 1; + } + } + + auto sequenceMaskNode = makeConstant(inputData.get_element_type(), {T, B}, sequenceMaskData); + + auto CTCGreedyDecoderNode = std::make_shared(inputData, sequenceMaskNode, mergeRepeated); + + return CTCGreedyDecoderNode; +} +} // namespace builder +} // namespace ngraph diff --git a/inference-engine/tests/ngraph_helpers/ngraph_functions/src/ctc_greedy_decoder_seq_len.cpp b/inference-engine/tests/ngraph_helpers/ngraph_functions/src/ctc_greedy_decoder_seq_len.cpp new file mode 100644 index 00000000000000..a0fe52c26f6ead --- /dev/null +++ b/inference-engine/tests/ngraph_helpers/ngraph_functions/src/ctc_greedy_decoder_seq_len.cpp @@ -0,0 +1,39 @@ +// Copyright (C) 2021 Intel Corporation +// SPDX-License-Identifier: Apache-2.0 +// + +#include +#include + +#include "ngraph_functions/builders.hpp" + +namespace ngraph { +namespace builder { + +std::shared_ptr makeCTCGreedyDecoderSeqLen( + const ngraph::Output& inputData, + int blankIndex, + bool mergeRepeated, + const element::Type& idxPrec) { + const auto& inputDataShape = inputData.get_shape(); + const size_t B = inputDataShape[0]; + const size_t T = inputDataShape[1]; + + std::mt19937 gen(1); + std::uniform_int_distribution dist(0, T); + + std::vector sequenceLenData(B); + for (int b = 0; b < B; b++) { + int len = dist(gen); + sequenceLenData[b] = len; + } + + auto sequenceLenNode = makeConstant(idxPrec, {B}, sequenceLenData); + + std::vector blankIdxData = {blankIndex}; + auto blankIndexNode = makeConstant(idxPrec, {1}, blankIdxData); + + return std::make_shared(inputData, sequenceLenNode, blankIndexNode, mergeRepeated, idxPrec, idxPrec); +} +} // namespace builder +} // namespace ngraph diff --git a/inference-engine/tests/ngraph_helpers/ngraph_functions/src/mvn.cpp b/inference-engine/tests/ngraph_helpers/ngraph_functions/src/mvn.cpp index 6a26eb5c9a7f4d..f0a9017ea75b24 100644 --- a/inference-engine/tests/ngraph_helpers/ngraph_functions/src/mvn.cpp +++ b/inference-engine/tests/ngraph_helpers/ngraph_functions/src/mvn.cpp @@ -1,4 +1,4 @@ -// Copyright (C) 2020 Intel Corporation +// Copyright (C) 2020-2021 Intel Corporation // SPDX-License-Identifier: Apache-2.0 // @@ -24,5 +24,18 @@ std::shared_ptr makeMVN(const ngraph::Output &in, return mvnNode; } +std::shared_ptr makeMVN6(const Output& in, + const Output& axesNode, + bool normalizeVariance, + float eps, + std::string& epsMode) { + op::MVNEpsMode nEpsMode = op::MVNEpsMode::INSIDE_SQRT; + if (epsMode == "outside_sqrt") + nEpsMode = op::MVNEpsMode::OUTSIDE_SQRT; + auto mvnNode = std::make_shared(in, axesNode, normalizeVariance, eps, nEpsMode); + + return mvnNode; +} + } // namespace builder -} // namespace ngraph \ No newline at end of file +} // namespace ngraph diff --git a/inference-engine/tests/unit/inference_engine/cpp_interfaces/ie_executable_network_base_test.cpp b/inference-engine/tests/unit/inference_engine/cpp_interfaces/ie_executable_network_base_test.cpp index a2e81527c39694..20125117abe4ff 100644 --- a/inference-engine/tests/unit/inference_engine/cpp_interfaces/ie_executable_network_base_test.cpp +++ b/inference-engine/tests/unit/inference_engine/cpp_interfaces/ie_executable_network_base_test.cpp @@ -34,7 +34,7 @@ class ExecutableNetworkThreadSafeAsyncOnlyTests : public ::testing::Test { virtual void SetUp() { mockExeNetwork = make_shared(); exeNetwork = details::shared_from_irelease( - new ExecutableNetworkBase(mockExeNetwork)); + new ExecutableNetworkBase(mockExeNetwork)); InputsDataMap networkInputs; OutputsDataMap networkOutputs; mockAsyncInferRequestInternal = make_shared(networkInputs, networkOutputs); @@ -46,7 +46,7 @@ TEST_F(ExecutableNetworkThreadSafeAsyncOnlyTests, createAsyncInferRequestCallsTh EXPECT_CALL(*mockExeNetwork.get(), CreateAsyncInferRequestImpl(_, _)).WillOnce( Return(mockAsyncInferRequestInternal)); EXPECT_NO_THROW(exeNetwork->CreateInferRequest(req, &dsc)); - auto threadSafeReq = dynamic_pointer_cast>(req); + auto threadSafeReq = dynamic_pointer_cast(req); ASSERT_NE(threadSafeReq, nullptr); } @@ -109,7 +109,7 @@ class ExecutableNetworkThreadSafeTests : public ::testing::Test { virtual void SetUp() { mockExeNetwork = make_shared(); exeNetwork = details::shared_from_irelease( - new ExecutableNetworkBase(mockExeNetwork)); + new ExecutableNetworkBase(mockExeNetwork)); InputsDataMap networkInputs; OutputsDataMap networkOutputs; mockInferRequestInternal = make_shared(networkInputs, networkOutputs); @@ -120,7 +120,7 @@ TEST_F(ExecutableNetworkThreadSafeTests, createInferRequestCallsThreadSafeImplAn IInferRequest::Ptr req; EXPECT_CALL(*mockExeNetwork.get(), CreateInferRequestImpl(_, _)).WillOnce(Return(mockInferRequestInternal)); EXPECT_NO_THROW(exeNetwork->CreateInferRequest(req, &dsc)); - auto threadSafeReq = dynamic_pointer_cast>(req); + auto threadSafeReq = dynamic_pointer_cast(req); ASSERT_NE(threadSafeReq, nullptr); } @@ -128,7 +128,7 @@ TEST_F(ExecutableNetworkThreadSafeTests, returnErrorIfInferThrowsException) { IInferRequest::Ptr req; EXPECT_CALL(*mockExeNetwork.get(), CreateInferRequestImpl(_, _)).WillOnce(Return(mockInferRequestInternal)); EXPECT_NO_THROW(exeNetwork->CreateInferRequest(req, &dsc)); - EXPECT_CALL(*mockInferRequestInternal.get(), InferImpl()).WillOnce(Throw(std::runtime_error(""))); + EXPECT_CALL(*mockInferRequestInternal.get(), checkBlobs()).WillOnce(Throw(std::runtime_error(""))); EXPECT_NO_THROW(sts = req->Infer(&dsc)); ASSERT_EQ(StatusCode::GENERAL_ERROR, sts) << dsc.msg; } diff --git a/inference-engine/tests/unit/inference_engine/cpp_interfaces/ie_infer_async_request_base_test.cpp b/inference-engine/tests/unit/inference_engine/cpp_interfaces/ie_infer_async_request_base_test.cpp index c3789f8937b12e..cd116340e52bf3 100644 --- a/inference-engine/tests/unit/inference_engine/cpp_interfaces/ie_infer_async_request_base_test.cpp +++ b/inference-engine/tests/unit/inference_engine/cpp_interfaces/ie_infer_async_request_base_test.cpp @@ -34,7 +34,7 @@ class InferRequestBaseTests : public ::testing::Test { virtual void SetUp() { mock_impl.reset(new MockIAsyncInferRequestInternal()); - request = details::shared_from_irelease(new InferRequestBase(mock_impl)); + request = details::shared_from_irelease(new InferRequestBase(mock_impl)); } }; @@ -131,42 +131,41 @@ TEST_F(InferRequestBaseTests, canCatchUnknownErrorInInfer) { // GetPerformanceCounts TEST_F(InferRequestBaseTests, canForwardGetPerformanceCounts) { std::map info; - EXPECT_CALL(*mock_impl.get(), GetPerformanceCounts(Ref(info))).Times(1); - ASSERT_EQ(OK, request->GetPerformanceCounts(info, &dsc)); + EXPECT_CALL(*mock_impl.get(), GetPerformanceCounts()).WillOnce(Return(std::map{})); + ASSERT_EQ(OK, request->GetPerformanceCounts(info, &dsc)) << dsc.msg; } TEST_F(InferRequestBaseTests, canReportErrorInGetPerformanceCounts) { std::map info; - EXPECT_CALL(*mock_impl.get(), GetPerformanceCounts(_)).WillOnce(Throw(std::runtime_error("compare"))); + EXPECT_CALL(*mock_impl.get(), GetPerformanceCounts()).WillOnce(Throw(std::runtime_error("compare"))); ASSERT_NE(request->GetPerformanceCounts(info, &dsc), OK); ASSERT_STREQ(dsc.msg, "compare"); } TEST_F(InferRequestBaseTests, canCatchUnknownErrorInGetPerformanceCounts) { std::map info; - EXPECT_CALL(*mock_impl.get(), GetPerformanceCounts(_)).WillOnce(Throw(5)); + EXPECT_CALL(*mock_impl.get(), GetPerformanceCounts()).WillOnce(Throw(5)); ASSERT_EQ(UNEXPECTED, request->GetPerformanceCounts(info, nullptr)); } // GetBlob TEST_F(InferRequestBaseTests, canForwardGetBlob) { Blob::Ptr data; - const char *name = ""; - EXPECT_CALL(*mock_impl.get(), GetBlob(name, Ref(data))).Times(1); - ASSERT_EQ(OK, request->GetBlob(name, data, &dsc)); + EXPECT_CALL(*mock_impl.get(), GetBlob(_)).WillOnce(Return(Blob::Ptr{})); + ASSERT_EQ(OK, request->GetBlob("", data, &dsc)) << dsc.msg; } TEST_F(InferRequestBaseTests, canReportErrorInGetBlob) { - EXPECT_CALL(*mock_impl.get(), GetBlob(_, _)).WillOnce(Throw(std::runtime_error("compare"))); + EXPECT_CALL(*mock_impl.get(), GetBlob(_)).WillOnce(Throw(std::runtime_error("compare"))); Blob::Ptr data; - ASSERT_NE(request->GetBlob(nullptr, data, &dsc), OK); + ASSERT_NE(request->GetBlob("", data, &dsc), OK); ASSERT_STREQ(dsc.msg, "compare"); } TEST_F(InferRequestBaseTests, canCatchUnknownErrorInGetBlob) { Blob::Ptr data; - EXPECT_CALL(*mock_impl.get(), GetBlob(_, _)).WillOnce(Throw(5)); - ASSERT_EQ(UNEXPECTED, request->GetBlob(nullptr, data, nullptr)); + EXPECT_CALL(*mock_impl.get(), GetBlob(_)).WillOnce(Throw(5)); + ASSERT_EQ(UNEXPECTED, request->GetBlob("notEmpty", data, nullptr)); } // SetBlob @@ -180,14 +179,14 @@ TEST_F(InferRequestBaseTests, canForwardSetBlob) { TEST_F(InferRequestBaseTests, canReportErrorInSetBlob) { EXPECT_CALL(*mock_impl.get(), SetBlob(_, _)).WillOnce(Throw(std::runtime_error("compare"))); Blob::Ptr data; - ASSERT_NE(request->SetBlob(nullptr, data, &dsc), OK); + ASSERT_NE(request->SetBlob("", data, &dsc), OK); ASSERT_STREQ(dsc.msg, "compare"); } TEST_F(InferRequestBaseTests, canCatchUnknownErrorInSetBlob) { Blob::Ptr data; EXPECT_CALL(*mock_impl.get(), SetBlob(_, _)).WillOnce(Throw(5)); - ASSERT_EQ(UNEXPECTED, request->SetBlob(nullptr, data, nullptr)); + ASSERT_EQ(UNEXPECTED, request->SetBlob("notEmpty", data, nullptr)); } // SetCompletionCallback @@ -244,7 +243,7 @@ class InferRequestTests : public ::testing::Test { mockNotEmptyNet.getOutputsInfo(outputsInfo); mockInferRequestInternal = make_shared(inputsInfo, outputsInfo); inferRequest = shared_from_irelease( - new InferRequestBase(mockInferRequestInternal)); + new InferRequestBase(mockInferRequestInternal)); return make_shared(inferRequest); } diff --git a/inference-engine/tests/unit/inference_engine/cpp_interfaces/ie_infer_async_request_thread_safe_default_test.cpp b/inference-engine/tests/unit/inference_engine/cpp_interfaces/ie_infer_async_request_thread_safe_default_test.cpp index 6854e15177e4f6..8ac217e6cda3d6 100644 --- a/inference-engine/tests/unit/inference_engine/cpp_interfaces/ie_infer_async_request_thread_safe_default_test.cpp +++ b/inference-engine/tests/unit/inference_engine/cpp_interfaces/ie_infer_async_request_thread_safe_default_test.cpp @@ -21,18 +21,6 @@ using namespace std; using namespace InferenceEngine; using namespace InferenceEngine::details; -class TestAsyncInferRequestThreadSafeDefault : public AsyncInferRequestThreadSafeDefault { -public: - TestAsyncInferRequestThreadSafeDefault(const InferRequestInternal::Ptr& request, - const ITaskExecutor::Ptr& taskExecutor, - const ITaskExecutor::Ptr& callbackExecutor) - : AsyncInferRequestThreadSafeDefault(request, taskExecutor, callbackExecutor) {} - - void setRequestBusy() { - AsyncInferRequestThreadSafeDefault::setIsRequestBusy(true); - } -}; - struct DeferedExecutor : public ITaskExecutor { using Ptr = std::shared_ptr; DeferedExecutor() = default; @@ -61,7 +49,7 @@ struct DeferedExecutor : public ITaskExecutor { class InferRequestThreadSafeDefaultTests : public ::testing::Test { protected: - shared_ptr testRequest; + shared_ptr testRequest; ResponseDesc dsc; shared_ptr mockInferRequestInternal; @@ -76,7 +64,7 @@ class InferRequestThreadSafeDefaultTests : public ::testing::Test { OutputsDataMap outputsInfo; mockTaskExecutor = make_shared(); mockInferRequestInternal = make_shared(inputsInfo, outputsInfo); - testRequest = make_shared(mockInferRequestInternal, mockTaskExecutor, mockTaskExecutor); + testRequest = make_shared(mockInferRequestInternal, mockTaskExecutor, mockTaskExecutor); } bool _doesThrowExceptionWithMessage(std::function func, string refError) { @@ -93,7 +81,7 @@ class InferRequestThreadSafeDefaultTests : public ::testing::Test { // StartAsync TEST_F(InferRequestThreadSafeDefaultTests, returnRequestBusyOnStartAsync) { auto taskExecutor = std::make_shared(); - testRequest = make_shared(mockInferRequestInternal, taskExecutor, taskExecutor); + testRequest = make_shared(mockInferRequestInternal, taskExecutor, taskExecutor); EXPECT_CALL(*mockInferRequestInternal, InferImpl()).Times(1).WillOnce(Return()); ASSERT_NO_THROW(testRequest->StartAsync()); ASSERT_TRUE(_doesThrowExceptionWithMessage([this]() { testRequest->StartAsync(); }, REQUEST_BUSY_str)); @@ -101,19 +89,21 @@ TEST_F(InferRequestThreadSafeDefaultTests, returnRequestBusyOnStartAsync) { } TEST_F(InferRequestThreadSafeDefaultTests, canResetBusyStatusIfStartAsyncFails) { - MockAsyncInferRequestDefault mockAsync(mockInferRequestInternal, mockTaskExecutor, mockTaskExecutor); - EXPECT_CALL(mockAsync, StartAsync_ThreadUnsafe()).Times(2) + auto taskExecutor = std::make_shared(); + testRequest = make_shared(mockInferRequestInternal, taskExecutor, taskExecutor); + EXPECT_CALL(*mockInferRequestInternal, checkBlobs()).Times(2) .WillOnce(Throw(InferenceEngineException(__FILE__, __LINE__) << "compare")) .WillOnce(Return()); - ASSERT_TRUE(_doesThrowExceptionWithMessage([&]() { mockAsync.StartAsync(); }, "compare")); - ASSERT_NO_THROW(mockAsync.StartAsync()); + ASSERT_TRUE(_doesThrowExceptionWithMessage([&]() { testRequest->StartAsync(); }, "compare")); + ASSERT_NO_THROW(testRequest->StartAsync()); + taskExecutor->executeAll(); } // GetUserData TEST_F(InferRequestThreadSafeDefaultTests, returnRequestBusyOnGetUserData) { auto taskExecutor = std::make_shared(); - testRequest = make_shared(mockInferRequestInternal, taskExecutor, taskExecutor); + testRequest = make_shared(mockInferRequestInternal, taskExecutor, taskExecutor); EXPECT_CALL(*mockInferRequestInternal, InferImpl()).Times(1).WillOnce(Return()); ASSERT_NO_THROW(testRequest->StartAsync()); ASSERT_TRUE(_doesThrowExceptionWithMessage([this]() { testRequest->GetUserData(nullptr); }, REQUEST_BUSY_str)); @@ -123,7 +113,7 @@ TEST_F(InferRequestThreadSafeDefaultTests, returnRequestBusyOnGetUserData) { // SetUserData TEST_F(InferRequestThreadSafeDefaultTests, returnRequestBusyOnSetUserData) { auto taskExecutor = std::make_shared(); - testRequest = make_shared(mockInferRequestInternal, taskExecutor, taskExecutor); + testRequest = make_shared(mockInferRequestInternal, taskExecutor, taskExecutor); EXPECT_CALL(*mockInferRequestInternal, InferImpl()).Times(1).WillOnce(Return()); ASSERT_NO_THROW(testRequest->StartAsync()); ASSERT_TRUE(_doesThrowExceptionWithMessage([this]() { testRequest->SetUserData(nullptr); }, REQUEST_BUSY_str)); @@ -132,7 +122,6 @@ TEST_F(InferRequestThreadSafeDefaultTests, returnRequestBusyOnSetUserData) { // Wait TEST_F(InferRequestThreadSafeDefaultTests, returnInferNotStartedOnWait) { - testRequest->setRequestBusy(); int64_t ms = 0; StatusCode actual = testRequest->Wait(ms); ASSERT_EQ(INFER_NOT_STARTED, actual); @@ -141,7 +130,7 @@ TEST_F(InferRequestThreadSafeDefaultTests, returnInferNotStartedOnWait) { // Infer TEST_F(InferRequestThreadSafeDefaultTests, returnRequestBusyOnInfer) { auto taskExecutor = std::make_shared(); - testRequest = make_shared(mockInferRequestInternal, taskExecutor, taskExecutor); + testRequest = make_shared(mockInferRequestInternal, taskExecutor, taskExecutor); EXPECT_CALL(*mockInferRequestInternal, InferImpl()).Times(1).WillOnce(Return()); ASSERT_NO_THROW(testRequest->StartAsync()); ASSERT_TRUE(_doesThrowExceptionWithMessage([this]() { testRequest->Infer(); }, REQUEST_BUSY_str)); @@ -149,24 +138,24 @@ TEST_F(InferRequestThreadSafeDefaultTests, returnRequestBusyOnInfer) { } TEST_F(InferRequestThreadSafeDefaultTests, canResetBusyStatusIfInferFails) { - auto taskExecutor = std::make_shared(); - testRequest = make_shared(mockInferRequestInternal, taskExecutor, taskExecutor); + auto taskExecutor = std::make_shared(); + testRequest = make_shared(mockInferRequestInternal, taskExecutor, taskExecutor); EXPECT_CALL(*mockInferRequestInternal, InferImpl()).Times(2) .WillOnce(Throw(InferenceEngineException(__FILE__, __LINE__) << "compare")) .WillOnce(Return()); ASSERT_TRUE(_doesThrowExceptionWithMessage([this]() { testRequest->Infer(); }, "compare")); ASSERT_NO_THROW(testRequest->Infer()); + taskExecutor->executeAll(); } // GetPerformanceCounts TEST_F(InferRequestThreadSafeDefaultTests, returnRequestBusyOnGetPerformanceCounts) { auto taskExecutor = std::make_shared(); - testRequest = make_shared(mockInferRequestInternal, taskExecutor, taskExecutor); + testRequest = make_shared(mockInferRequestInternal, taskExecutor, taskExecutor); EXPECT_CALL(*mockInferRequestInternal, InferImpl()).Times(1).WillOnce(Return()); ASSERT_NO_THROW(testRequest->StartAsync()); ASSERT_TRUE(_doesThrowExceptionWithMessage([this]() { - std::map info; - testRequest->GetPerformanceCounts(info); + auto info = testRequest->GetPerformanceCounts(); }, REQUEST_BUSY_str)); taskExecutor->executeAll(); } @@ -174,12 +163,11 @@ TEST_F(InferRequestThreadSafeDefaultTests, returnRequestBusyOnGetPerformanceCoun // GetBlob TEST_F(InferRequestThreadSafeDefaultTests, returnRequestBusyOnGetBlob) { auto taskExecutor = std::make_shared(); - testRequest = make_shared(mockInferRequestInternal, taskExecutor, taskExecutor); + testRequest = make_shared(mockInferRequestInternal, taskExecutor, taskExecutor); EXPECT_CALL(*mockInferRequestInternal, InferImpl()).Times(1).WillOnce(Return()); ASSERT_NO_THROW(testRequest->StartAsync()); ASSERT_TRUE(_doesThrowExceptionWithMessage([this]() { - Blob::Ptr data; - testRequest->GetBlob(nullptr, data); + auto data = testRequest->GetBlob({}); }, REQUEST_BUSY_str)); taskExecutor->executeAll(); } @@ -187,17 +175,17 @@ TEST_F(InferRequestThreadSafeDefaultTests, returnRequestBusyOnGetBlob) { // SetBlob TEST_F(InferRequestThreadSafeDefaultTests, returnRequestBusyOnSetBlob) { auto taskExecutor = std::make_shared(); - testRequest = make_shared(mockInferRequestInternal, taskExecutor, taskExecutor); + testRequest = make_shared(mockInferRequestInternal, taskExecutor, taskExecutor); EXPECT_CALL(*mockInferRequestInternal, InferImpl()).Times(1).WillOnce(Return()); ASSERT_NO_THROW(testRequest->StartAsync()); - ASSERT_TRUE(_doesThrowExceptionWithMessage([this]() { testRequest->SetBlob(nullptr, nullptr); }, REQUEST_BUSY_str)); + ASSERT_TRUE(_doesThrowExceptionWithMessage([this]() { testRequest->SetBlob({}, {}); }, REQUEST_BUSY_str)); taskExecutor->executeAll(); } // SetCompletionCallback TEST_F(InferRequestThreadSafeDefaultTests, returnRequestBusyOnSetCompletionCallback) { auto taskExecutor = std::make_shared(); - testRequest = make_shared(mockInferRequestInternal, taskExecutor, taskExecutor); + testRequest = make_shared(mockInferRequestInternal, taskExecutor, taskExecutor); EXPECT_CALL(*mockInferRequestInternal, InferImpl()).Times(1).WillOnce(Return()); ASSERT_NO_THROW(testRequest->StartAsync()); ASSERT_TRUE(_doesThrowExceptionWithMessage([this]() { testRequest->SetCompletionCallback(nullptr); }, @@ -207,11 +195,10 @@ TEST_F(InferRequestThreadSafeDefaultTests, returnRequestBusyOnSetCompletionCallb TEST_F(InferRequestThreadSafeDefaultTests, callbackTakesOKIfAsyncRequestWasOK) { auto taskExecutor = std::make_shared(); - testRequest = make_shared(mockInferRequestInternal, taskExecutor, taskExecutor); + testRequest = make_shared(mockInferRequestInternal, taskExecutor, taskExecutor); IInferRequest::Ptr asyncRequest; - asyncRequest.reset(new InferRequestBase( - testRequest), [](IInferRequest *p) { p->Release(); }); + asyncRequest.reset(new InferRequestBase(testRequest), [](IInferRequest *p) { p->Release(); }); testRequest->SetPointerToPublicInterface(asyncRequest); testRequest->SetCompletionCallback([](InferenceEngine::IInferRequest::Ptr request, StatusCode status) { @@ -225,10 +212,9 @@ TEST_F(InferRequestThreadSafeDefaultTests, callbackTakesOKIfAsyncRequestWasOK) { TEST_F(InferRequestThreadSafeDefaultTests, callbackIsCalledIfAsyncRequestFailed) { auto taskExecutor = std::make_shared(); - testRequest = make_shared(mockInferRequestInternal, taskExecutor, taskExecutor); + testRequest = make_shared(mockInferRequestInternal, taskExecutor, taskExecutor); IInferRequest::Ptr asyncRequest; - asyncRequest.reset(new InferRequestBase( - testRequest), [](IInferRequest *p) { p->Release(); }); + asyncRequest.reset(new InferRequestBase(testRequest), [](IInferRequest *p) { p->Release(); }); testRequest->SetPointerToPublicInterface(asyncRequest); bool wasCalled = false; @@ -248,10 +234,9 @@ TEST_F(InferRequestThreadSafeDefaultTests, callbackIsCalledIfAsyncRequestFailed) TEST_F(InferRequestThreadSafeDefaultTests, canCatchExceptionIfAsyncRequestFailedAndNoCallback) { auto taskExecutor = std::make_shared(); - testRequest = make_shared(mockInferRequestInternal, taskExecutor, taskExecutor); + testRequest = make_shared(mockInferRequestInternal, taskExecutor, taskExecutor); IInferRequest::Ptr asyncRequest; - asyncRequest.reset(new InferRequestBase( - testRequest), [](IInferRequest *p) { p->Release(); }); + asyncRequest.reset(new InferRequestBase(testRequest), [](IInferRequest *p) { p->Release(); }); testRequest->SetPointerToPublicInterface(asyncRequest); EXPECT_CALL(*mockInferRequestInternal.get(), InferImpl()).WillOnce(Throw(std::exception())); diff --git a/inference-engine/tests/unit/inference_engine/cpp_interfaces/ie_memory_state_internal_test.cpp b/inference-engine/tests/unit/inference_engine/cpp_interfaces/ie_memory_state_internal_test.cpp index f7066904cb3583..d8cffe2e4de74a 100644 --- a/inference-engine/tests/unit/inference_engine/cpp_interfaces/ie_memory_state_internal_test.cpp +++ b/inference-engine/tests/unit/inference_engine/cpp_interfaces/ie_memory_state_internal_test.cpp @@ -20,7 +20,7 @@ using namespace InferenceEngine::details; template inline typename InferenceEngine::InferRequest make_infer_request(std::shared_ptr impl) { - typename InferRequestBase::Ptr req(new InferRequestBase(impl), [](IInferRequest* p) { + typename InferRequestBase::Ptr req(new InferRequestBase(impl), [](IInferRequest* p) { p->Release(); }); return InferenceEngine::InferRequest(req); diff --git a/inference-engine/tests/unit/inference_engine/cpp_interfaces/ie_plugin_test.cpp b/inference-engine/tests/unit/inference_engine/cpp_interfaces/ie_plugin_test.cpp index 4aa4fbf8fc3955..f85f4bef2092c4 100644 --- a/inference-engine/tests/unit/inference_engine/cpp_interfaces/ie_plugin_test.cpp +++ b/inference-engine/tests/unit/inference_engine/cpp_interfaces/ie_plugin_test.cpp @@ -80,7 +80,7 @@ TEST_F(InferenceEnginePluginInternalTest, failToSetBlobWithInCorrectName) { ASSERT_EQ(refError, dsc.msg); } -TEST_F(InferenceEnginePluginInternalTest, failToSetBlobWithNullPtr) { +TEST_F(InferenceEnginePluginInternalTest, failToSetBlobWithEmptyName) { Blob::Ptr inBlob = make_shared_blob({ Precision::FP32, {}, NCHW }); inBlob->allocate(); string inputName = "not_input"; @@ -88,7 +88,7 @@ TEST_F(InferenceEnginePluginInternalTest, failToSetBlobWithNullPtr) { IInferRequest::Ptr inferRequest; getInferRequestWithMockImplInside(inferRequest); - ASSERT_NO_THROW(sts = inferRequest->SetBlob(nullptr, inBlob, &dsc)); + ASSERT_NO_THROW(sts = inferRequest->SetBlob("", inBlob, &dsc)); ASSERT_EQ(StatusCode::GENERAL_ERROR, sts); dsc.msg[refError.length()] = '\0'; ASSERT_EQ(refError, dsc.msg); diff --git a/inference-engine/tests/unit/inference_engine/ie_blob_test.cpp b/inference-engine/tests/unit/inference_engine/ie_blob_test.cpp index 5df01a0b518f03..69520c1d4c54a1 100644 --- a/inference-engine/tests/unit/inference_engine/ie_blob_test.cpp +++ b/inference-engine/tests/unit/inference_engine/ie_blob_test.cpp @@ -261,9 +261,12 @@ TEST_F(BlobTests, canMakeSharedBlob) { { InferenceEngine::Precision::FP32, size, InferenceEngine::CHW }); InferenceEngine::TBlob::Ptr blob3 = InferenceEngine::make_shared_blob({ InferenceEngine::Precision::FP32, { 0 }, InferenceEngine::C }); + InferenceEngine::TBlob::Ptr blob4 = InferenceEngine::make_shared_blob( + { InferenceEngine::Precision::FP32, size, InferenceEngine::HWC }); ASSERT_EQ(blob1->size(), 0); ASSERT_EQ(blob2->size(), 1); ASSERT_EQ(blob3->size(), 0); + ASSERT_EQ(blob4->size(), 1); } TEST_F(BlobTests, cannotCreateBlobWithIncorrectPrecision) { diff --git a/inference-engine/tests/unit/inference_engine/ie_executable_network_test.cpp b/inference-engine/tests/unit/inference_engine/ie_executable_network_test.cpp index 1d42ba534f250b..21ea155807d468 100644 --- a/inference-engine/tests/unit/inference_engine/ie_executable_network_test.cpp +++ b/inference-engine/tests/unit/inference_engine/ie_executable_network_test.cpp @@ -223,7 +223,7 @@ class ExecutableNetworkBaseTests : public ::testing::Test { virtual void SetUp() { mock_impl.reset(new MockIExecutableNetworkInternal()); - exeNetwork = shared_from_irelease(new ExecutableNetworkBase(mock_impl)); + exeNetwork = shared_from_irelease(new ExecutableNetworkBase(mock_impl)); } }; diff --git a/inference-engine/tests_deprecated/functional/vpu/common/layers/myriad_layers_psroipooling_test.hpp b/inference-engine/tests_deprecated/functional/vpu/common/layers/myriad_layers_psroipooling_test.hpp index c7bacd25d34835..5d80aa2674624a 100644 --- a/inference-engine/tests_deprecated/functional/vpu/common/layers/myriad_layers_psroipooling_test.hpp +++ b/inference-engine/tests_deprecated/functional/vpu/common/layers/myriad_layers_psroipooling_test.hpp @@ -191,7 +191,7 @@ static std::vector s_PSROIPoolingNumROIs = { }; TEST_P(myriadLayersTestsPSROIPooling_smoke, PSROIPooling) { -#if defined(_WIN32) || defined(WIN32) +#ifdef _WIN32 SKIP() << "Disabled for Windows. Issue-13239"; #endif tensor_test_params dims_layer_in = std::get<0>(GetParam()); diff --git a/inference-engine/tests_deprecated/functional/vpu/common/regression/helpers/vpu_case_common.hpp b/inference-engine/tests_deprecated/functional/vpu/common/regression/helpers/vpu_case_common.hpp index cd81b3e13b4c74..c4449099e7ecda 100644 --- a/inference-engine/tests_deprecated/functional/vpu/common/regression/helpers/vpu_case_common.hpp +++ b/inference-engine/tests_deprecated/functional/vpu/common/regression/helpers/vpu_case_common.hpp @@ -26,7 +26,7 @@ using namespace Regression::Matchers; }while(false) -#if defined(_WIN32) || defined(WIN32) +#ifdef _WIN32 # define DISABLE_ON_WINDOWS_IF(expr) DISABLE_IF((expr)) #else # define DISABLE_ON_WINDOWS_IF(expr) diff --git a/inference-engine/tests_deprecated/helpers/tests_file_utils.cpp b/inference-engine/tests_deprecated/helpers/tests_file_utils.cpp index 416234ec9d2575..e568bcfb6bdf0a 100644 --- a/inference-engine/tests_deprecated/helpers/tests_file_utils.cpp +++ b/inference-engine/tests_deprecated/helpers/tests_file_utils.cpp @@ -16,7 +16,7 @@ # include #endif -#if defined(WIN32) || defined(WIN64) +#ifdef _WIN32 // Copied from linux libc sys/stat.h: # define S_ISREG(m) (((m) & S_IFMT) == S_IFREG) # define S_ISDIR(m) (((m) & S_IFMT) == S_IFDIR) diff --git a/inference-engine/tests_deprecated/helpers/tests_vpu_common.hpp b/inference-engine/tests_deprecated/helpers/tests_vpu_common.hpp index 0bdb05f6f7d8e1..8ccf7435b00075 100644 --- a/inference-engine/tests_deprecated/helpers/tests_vpu_common.hpp +++ b/inference-engine/tests_deprecated/helpers/tests_vpu_common.hpp @@ -28,7 +28,7 @@ static constexpr char ENV_HDDL_R[] = "IE_VPU_ENABLE_PER_LAYER_TESTS_HDDL"; } \ } -#if defined(_WIN32) || defined(WIN32) +#ifdef _WIN32 #define DISABLE_ON_WINDOWS_IF(expr) DISABLE_IF((expr)) #else #define DISABLE_ON_WINDOWS_IF(expr) diff --git a/inference-engine/tests_deprecated/unit/engines/gna/gna_matcher.cpp b/inference-engine/tests_deprecated/unit/engines/gna/gna_matcher.cpp index fbe53442b94a85..3e0b509576e6e0 100644 --- a/inference-engine/tests_deprecated/unit/engines/gna/gna_matcher.cpp +++ b/inference-engine/tests_deprecated/unit/engines/gna/gna_matcher.cpp @@ -472,8 +472,7 @@ void GNAPropagateMatcher :: match() { } } - std::map perfMap; - plugin.GetPerformanceCounts(perfMap); + auto perfMap = plugin.GetPerformanceCounts(); if(_env.is_profiling_enabled != false) { ASSERT_NE(perfMap.empty(),true); diff --git a/inference-engine/tests_deprecated/unit/engines/mkldnn/graph/layers/extensions/mvn_tests.cpp b/inference-engine/tests_deprecated/unit/engines/mkldnn/graph/layers/extensions/mvn_tests.cpp index b6d4ba38efa2da..e5b88586e03bcd 100644 --- a/inference-engine/tests_deprecated/unit/engines/mkldnn/graph/layers/extensions/mvn_tests.cpp +++ b/inference-engine/tests_deprecated/unit/engines/mkldnn/graph/layers/extensions/mvn_tests.cpp @@ -339,32 +339,32 @@ TEST_P(MKLDNNCPUExtMVNTests, TestsMVN) {} INSTANTIATE_TEST_CASE_P( TestsMVN, MKLDNNCPUExtMVNTests, ::testing::Values( - /*0*/ mvn_test_params{{2, 64, 15, 15}, 0, 0, 0.00001, 2, false, MKLDNNPlugin::impl_desc_type::unknown }, - mvn_test_params{{2, 2, 33, 65}, 0, 0, 0.00001, 2, false, MKLDNNPlugin::impl_desc_type::unknown }, + /*0*/ mvn_test_params{{2, 64, 15, 15}, 0, 0, 0.00001, 3, false, MKLDNNPlugin::impl_desc_type::unknown }, + mvn_test_params{{2, 2, 33, 65}, 0, 0, 0.00001, 3, false, MKLDNNPlugin::impl_desc_type::unknown }, mvn_test_params{{2, 64, 15, 15}, 0, 1, 0.00001, 3, false, MKLDNNPlugin::impl_desc_type::unknown }, mvn_test_params{{2, 2, 33, 65}, 0, 1, 0.00001, 3, false, MKLDNNPlugin::impl_desc_type::unknown }, - mvn_test_params{{2, 64, 15, 15}, 1, 0, 0.00001, 2, false, MKLDNNPlugin::impl_desc_type::unknown }, - mvn_test_params{{2, 2, 33, 65}, 1, 0, 0.00001, 2, false, MKLDNNPlugin::impl_desc_type::unknown }, - mvn_test_params{{2, 64, 15, 15}, 1, 1, 0.00001, 2, false, MKLDNNPlugin::impl_desc_type::unknown }, - mvn_test_params{{2, 2, 33, 65}, 1, 1, 0.00001, 2, false, MKLDNNPlugin::impl_desc_type::unknown }, - mvn_test_params{{2, 64, 15, 15}, 0, 0, 0.00001, 2, true, MKLDNNPlugin::impl_desc_type::unknown }, - /*9*/ mvn_test_params{{2, 2, 33, 65}, 0, 0, 0.00001, 2, true, MKLDNNPlugin::impl_desc_type::unknown }, + mvn_test_params{{2, 64, 15, 15}, 1, 0, 0.00001, 3, false, MKLDNNPlugin::impl_desc_type::unknown }, + mvn_test_params{{2, 2, 33, 65}, 1, 0, 0.00001, 3, false, MKLDNNPlugin::impl_desc_type::unknown }, + mvn_test_params{{2, 64, 15, 15}, 1, 1, 0.00001, 3, false, MKLDNNPlugin::impl_desc_type::unknown }, + mvn_test_params{{2, 2, 33, 65}, 1, 1, 0.00001, 3, false, MKLDNNPlugin::impl_desc_type::unknown }, + mvn_test_params{{2, 64, 15, 15}, 0, 0, 0.00001, 3, true, MKLDNNPlugin::impl_desc_type::unknown }, + /*9*/ mvn_test_params{{2, 2, 33, 65}, 0, 0, 0.00001, 3, true, MKLDNNPlugin::impl_desc_type::unknown }, mvn_test_params{{2, 64, 15, 15}, 0, 1, 0.00001, 3, true, MKLDNNPlugin::impl_desc_type::unknown }, mvn_test_params{{2, 2, 33, 65}, 0, 1, 0.00001, 3, true, MKLDNNPlugin::impl_desc_type::unknown }, - mvn_test_params{{2, 64, 15, 15}, 1, 0, 0.00001, 2, true, MKLDNNPlugin::impl_desc_type::unknown }, - mvn_test_params{{2, 2, 33, 65}, 1, 0, 0.00001, 2, true, MKLDNNPlugin::impl_desc_type::unknown }, - /*14*/ mvn_test_params{{2,640, 15, 15}, 1, 1, 0.00001, 2, true, MKLDNNPlugin::impl_desc_type::unknown }, - mvn_test_params{{2, 2, 33, 65}, 1, 1, 0.00001, 2, true, MKLDNNPlugin::impl_desc_type::unknown }, + mvn_test_params{{2, 64, 15, 15}, 1, 0, 0.00001, 3, true, MKLDNNPlugin::impl_desc_type::unknown }, + mvn_test_params{{2, 2, 33, 65}, 1, 0, 0.00001, 3, true, MKLDNNPlugin::impl_desc_type::unknown }, + /*14*/ mvn_test_params{{2,640, 15, 15}, 1, 1, 0.00001, 3, true, MKLDNNPlugin::impl_desc_type::unknown }, + mvn_test_params{{2, 2, 33, 65}, 1, 1, 0.00001, 3, true, MKLDNNPlugin::impl_desc_type::unknown }, // 5D - /*16*/ mvn_test_params{{2, 64, 24, 32, 40}, 0, 0, 0.00001f, 2, false, MKLDNNPlugin::impl_desc_type::unknown }, + /*16*/ mvn_test_params{{2, 64, 24, 32, 40}, 0, 0, 0.00001f, 3, false, MKLDNNPlugin::impl_desc_type::unknown }, mvn_test_params{{2, 64, 24, 32, 40}, 0, 1, 0.00001f, 3, false, MKLDNNPlugin::impl_desc_type::unknown }, - mvn_test_params{{2, 64, 24, 32, 40}, 1, 0, 0.00001f, 2, false, MKLDNNPlugin::impl_desc_type::unknown }, - mvn_test_params{{2, 64, 24, 32, 40}, 1, 1, 0.00001f, 2, false, MKLDNNPlugin::impl_desc_type::unknown }, - mvn_test_params{{2, 64, 24, 32, 40}, 0, 0, 0.00001f, 2, true, MKLDNNPlugin::impl_desc_type::unknown }, + mvn_test_params{{2, 64, 24, 32, 40}, 1, 0, 0.00001f, 3, false, MKLDNNPlugin::impl_desc_type::unknown }, + mvn_test_params{{2, 64, 24, 32, 40}, 1, 1, 0.00001f, 3, false, MKLDNNPlugin::impl_desc_type::unknown }, + mvn_test_params{{2, 64, 24, 32, 40}, 0, 0, 0.00001f, 3, true, MKLDNNPlugin::impl_desc_type::unknown }, mvn_test_params{{2, 64, 24, 32, 40}, 0, 1, 0.00001f, 3, true, MKLDNNPlugin::impl_desc_type::unknown }, - mvn_test_params{{2, 64, 24, 32, 40}, 1, 0, 0.00001f, 2, true, MKLDNNPlugin::impl_desc_type::unknown }, - /*23*/ mvn_test_params{{2, 64, 24, 32, 40}, 1, 1, 0.00001f, 2, true, MKLDNNPlugin::impl_desc_type::unknown }, + mvn_test_params{{2, 64, 24, 32, 40}, 1, 0, 0.00001f, 3, true, MKLDNNPlugin::impl_desc_type::unknown }, + /*23*/ mvn_test_params{{2, 64, 24, 32, 40}, 1, 1, 0.00001f, 3, true, MKLDNNPlugin::impl_desc_type::unknown }, mvn_test_params{{1, 64, 32, 32, 32}, 0, 1, 0.001f, 3, true, MKLDNNPlugin::impl_desc_type::unknown } )); @@ -616,29 +616,29 @@ INSTANTIATE_TEST_CASE_P( mvn_test_params{{2, 2, 33, 65}, 0, 1, 0.00001, 3, false, MKLDNNPlugin::impl_desc_type::unknown, Precision::FP32, Precision::FP32 }, mvn_test_params{{2, 64, 8, 8, 8}, 0, 1, 0.00001f, 3, false, MKLDNNPlugin::impl_desc_type::unknown, Precision::FP32, Precision::FP32 }, - mvn_test_params{{2, 64, 15, 15}, 0, 1, 0.00001, 1, false, MKLDNNPlugin::impl_desc_type::unknown, Precision::FP32, Precision::U8 }, - mvn_test_params{{2, 2, 33, 65}, 0, 1, 0.00001, 1, false, MKLDNNPlugin::impl_desc_type::unknown, Precision::FP32, Precision::U8 }, - mvn_test_params{{2, 64, 8, 8, 8}, 0, 1, 0.00001f, 1, false, MKLDNNPlugin::impl_desc_type::unknown, Precision::FP32, Precision::U8 }, + mvn_test_params{{2, 64, 15, 15}, 0, 1, 0.00001, 3, false, MKLDNNPlugin::impl_desc_type::unknown, Precision::FP32, Precision::U8 }, + /*4*/ // mvn_test_params{{2, 2, 33, 65}, 0, 1, 0.00001, 3, false, MKLDNNPlugin::impl_desc_type::unknown, Precision::FP32, Precision::U8 }, + mvn_test_params{{2, 64, 8, 8, 8}, 0, 1, 0.00001f, 3, false, MKLDNNPlugin::impl_desc_type::unknown, Precision::FP32, Precision::U8 }, - mvn_test_params{{2, 64, 15, 15}, 0, 1, 0.00001, 1, false, MKLDNNPlugin::impl_desc_type::unknown, Precision::U8, Precision::U8 }, - mvn_test_params{{2, 2, 33, 65}, 0, 1, 0.00001, 1, false, MKLDNNPlugin::impl_desc_type::unknown, Precision::U8, Precision::U8 }, - mvn_test_params{{2, 64, 8, 8, 8}, 0, 1, 0.00001f, 1, false, MKLDNNPlugin::impl_desc_type::unknown, Precision::U8, Precision::U8 }, + mvn_test_params{{2, 64, 15, 15}, 0, 1, 0.00001, 3, false, MKLDNNPlugin::impl_desc_type::unknown, Precision::U8, Precision::U8 }, + /*7*/ // mvn_test_params{{2, 2, 33, 65}, 0, 1, 0.00001, 3, false, MKLDNNPlugin::impl_desc_type::unknown, Precision::U8, Precision::U8 }, + mvn_test_params{{2, 64, 8, 8, 8}, 0, 1, 0.00001f, 3, false, MKLDNNPlugin::impl_desc_type::unknown, Precision::U8, Precision::U8 }, - mvn_test_params{{2, 64, 15, 15}, 0, 1, 0.00001, 1, false, MKLDNNPlugin::impl_desc_type::unknown, Precision::U8, Precision::FP32 }, - mvn_test_params{{2, 2, 33, 65}, 0, 1, 0.00001, 1, false, MKLDNNPlugin::impl_desc_type::unknown, Precision::U8, Precision::FP32 }, - mvn_test_params{{2, 64, 8, 8, 8}, 0, 1, 0.00001f, 1, false, MKLDNNPlugin::impl_desc_type::unknown, Precision::U8, Precision::FP32 }, + mvn_test_params{{2, 64, 15, 15}, 0, 1, 0.00001, 3, false, MKLDNNPlugin::impl_desc_type::unknown, Precision::U8, Precision::FP32 }, + mvn_test_params{{2, 2, 33, 65}, 0, 1, 0.00001, 3, false, MKLDNNPlugin::impl_desc_type::unknown, Precision::U8, Precision::FP32 }, + mvn_test_params{{2, 64, 8, 8, 8}, 0, 1, 0.00001f, 3, false, MKLDNNPlugin::impl_desc_type::unknown, Precision::U8, Precision::FP32 }, - mvn_test_params{{2, 64, 15, 15}, 0, 1, 0.00001, 1, false, MKLDNNPlugin::impl_desc_type::unknown, Precision::FP32, Precision::I8 }, - mvn_test_params{{2, 2, 33, 65}, 0, 1, 0.00001, 1, false, MKLDNNPlugin::impl_desc_type::unknown, Precision::FP32, Precision::I8 }, - mvn_test_params{{2, 64, 8, 8, 8}, 0, 1, 0.00001f, 1, false, MKLDNNPlugin::impl_desc_type::unknown, Precision::FP32, Precision::I8 }, + mvn_test_params{{2, 64, 15, 15}, 0, 1, 0.00001, 3, false, MKLDNNPlugin::impl_desc_type::unknown, Precision::FP32, Precision::I8 }, + /*13*/ // mvn_test_params{{2, 2, 33, 65}, 0, 1, 0.00001, 3, false, MKLDNNPlugin::impl_desc_type::unknown, Precision::FP32, Precision::I8 }, + mvn_test_params{{2, 64, 8, 8, 8}, 0, 1, 0.00001f, 3, false, MKLDNNPlugin::impl_desc_type::unknown, Precision::FP32, Precision::I8 }, - mvn_test_params{{2, 64, 15, 15}, 0, 1, 0.00001, 1, false, MKLDNNPlugin::impl_desc_type::unknown, Precision::I8, Precision::I8 }, - mvn_test_params{{2, 2, 33, 65}, 0, 1, 0.00001, 1, false, MKLDNNPlugin::impl_desc_type::unknown, Precision::I8, Precision::I8 }, - mvn_test_params{{2, 64, 8, 8, 8}, 0, 1, 0.00001f, 1, false, MKLDNNPlugin::impl_desc_type::unknown, Precision::I8, Precision::I8 }, + mvn_test_params{{2, 64, 15, 15}, 0, 1, 0.00001, 3, false, MKLDNNPlugin::impl_desc_type::unknown, Precision::I8, Precision::I8 }, + /*16*/ // mvn_test_params{{2, 2, 33, 65}, 0, 1, 0.00001, 3, false, MKLDNNPlugin::impl_desc_type::unknown, Precision::I8, Precision::I8 }, + mvn_test_params{{2, 64, 8, 8, 8}, 0, 1, 0.00001f, 3, false, MKLDNNPlugin::impl_desc_type::unknown, Precision::I8, Precision::I8 }, - mvn_test_params{{2, 64, 15, 15}, 0, 1, 0.00001f, 1, false, MKLDNNPlugin::impl_desc_type::unknown, Precision::I8, Precision::FP32 }, - mvn_test_params{{2, 2, 33, 65}, 0, 1, 0.00001f, 1, false, MKLDNNPlugin::impl_desc_type::unknown, Precision::I8, Precision::FP32 }, - mvn_test_params{{2, 64, 8, 8, 8}, 0, 1, 0.00001f, 1, false, MKLDNNPlugin::impl_desc_type::unknown, Precision::I8, Precision::FP32 }, + mvn_test_params{{2, 64, 15, 15}, 0, 1, 0.00001f, 3, false, MKLDNNPlugin::impl_desc_type::unknown, Precision::I8, Precision::FP32 }, + mvn_test_params{{2, 2, 33, 65}, 0, 1, 0.00001f, 3, false, MKLDNNPlugin::impl_desc_type::unknown, Precision::I8, Precision::FP32 }, + mvn_test_params{{2, 64, 8, 8, 8}, 0, 1, 0.00001f, 3, false, MKLDNNPlugin::impl_desc_type::unknown, Precision::I8, Precision::FP32 }, mvn_test_params{{2, 64, 15, 15}, 0, 1, 0.00001, 3, true, MKLDNNPlugin::impl_desc_type::unknown, Precision::FP32, Precision::FP32 }, mvn_test_params{{2, 2, 33, 65}, 0, 1, 0.00001, 3, true, MKLDNNPlugin::impl_desc_type::unknown, Precision::FP32, Precision::FP32 }, diff --git a/inference-engine/tests_deprecated/unit/engines/mkldnn/graph/layers/internal/graph_leaks_test.cpp b/inference-engine/tests_deprecated/unit/engines/mkldnn/graph/layers/internal/graph_leaks_test.cpp index a2178ab3e9c3d0..18a36d56e4c5a9 100644 --- a/inference-engine/tests_deprecated/unit/engines/mkldnn/graph/layers/internal/graph_leaks_test.cpp +++ b/inference-engine/tests_deprecated/unit/engines/mkldnn/graph/layers/internal/graph_leaks_test.cpp @@ -20,21 +20,17 @@ class MKLDNNTestExecNetwork: public MKLDNNPlugin::MKLDNNExecNetwork { } }; -class MKLDNNTestEngine: public MKLDNNPlugin::Engine { -public: - MKLDNNPlugin::MKLDNNGraph& getGraph(InferenceEngine::IExecutableNetwork::Ptr execNetwork) { - auto * execNetworkInt = - dynamic_cast *>(execNetwork.get()); - if (!execNetworkInt) - THROW_IE_EXCEPTION << "Cannot find loaded network!"; - - auto * network = reinterpret_cast(execNetworkInt->getImpl().get()); - if (!network) - THROW_IE_EXCEPTION << "Cannot get mkldnn graph!"; - return network->getGraph(); - } +struct TestExecutableNetworkBase : public InferenceEngine::ExecutableNetworkBase { + using InferenceEngine::ExecutableNetworkBase::_impl; + ~TestExecutableNetworkBase() override = default; }; +static MKLDNNPlugin::MKLDNNGraph& getGraph(InferenceEngine::IExecutableNetwork::Ptr execNetwork) { + return reinterpret_cast( + reinterpret_cast( + execNetwork.get())->_impl.get())->getGraph(); +} + class MKLDNNGraphLeaksTests: public ::testing::Test { protected: void addOutputToEachNode(InferenceEngine::CNNNetwork& network, std::vector& new_outputs, @@ -257,11 +253,11 @@ TEST_F(MKLDNNGraphLeaksTests, MKLDNN_not_release_outputs_fp32) { ASSERT_NE(1, network.getOutputsInfo().size()); - std::shared_ptr score_engine(new MKLDNNTestEngine()); + std::shared_ptr score_engine(new MKLDNNPlugin::Engine()); InferenceEngine::ExecutableNetwork exeNetwork1; ASSERT_NO_THROW(exeNetwork1 = score_engine->LoadNetwork(network, {})); - size_t modified_outputs_size = score_engine->getGraph(exeNetwork1).GetOutputNodes().size(); + size_t modified_outputs_size = getGraph(exeNetwork1).GetOutputNodes().size(); InferenceEngine::CNNNetwork network2; ASSERT_NO_THROW(network2 = core.ReadNetwork(model, weights_ptr)); @@ -270,10 +266,12 @@ TEST_F(MKLDNNGraphLeaksTests, MKLDNN_not_release_outputs_fp32) { InferenceEngine::ExecutableNetwork exeNetwork2; ASSERT_NO_THROW(exeNetwork2 = score_engine->LoadNetwork(network2, {})); - size_t original_outputs_size = score_engine->getGraph(exeNetwork2).GetOutputNodes().size(); + size_t original_outputs_size = getGraph(exeNetwork2).GetOutputNodes().size(); ASSERT_NE(modified_outputs_size, original_outputs_size); ASSERT_EQ(1, original_outputs_size); + } catch (std::exception& e) { + FAIL() << e.what(); } catch (...) { FAIL(); } diff --git a/inference-engine/tests_deprecated/unit/engines/mkldnn/graph/layers/internal/graph_split_test.cpp b/inference-engine/tests_deprecated/unit/engines/mkldnn/graph/layers/internal/graph_split_test.cpp index ac415ac2fe577b..f2fe38b6eeba8d 100644 --- a/inference-engine/tests_deprecated/unit/engines/mkldnn/graph/layers/internal/graph_split_test.cpp +++ b/inference-engine/tests_deprecated/unit/engines/mkldnn/graph/layers/internal/graph_split_test.cpp @@ -1,4 +1,4 @@ -// Copyright (C) 2018-2020 Intel Corporation +// Copyright (C) 2018-2021 Intel Corporation // SPDX-License-Identifier: Apache-2.0 // @@ -230,27 +230,27 @@ INSTANTIATE_TEST_CASE_P( split_test_params { {1, 24, 2, 5}, {{1, 16, 2, 5}, {1, 8, 2, 5}}, - 1, 5, MKLDNNPlugin::impl_desc_type::ref, {MKLDNNPlugin::impl_desc_type::ref} + 1, 6, MKLDNNPlugin::impl_desc_type::ref, {MKLDNNPlugin::impl_desc_type::ref} }, split_test_params { {1, 20, 2, 5}, {{1, 13, 2, 5}, {1, 7, 2, 5}}, - 1, 3, MKLDNNPlugin::impl_desc_type::ref, {MKLDNNPlugin::impl_desc_type::ref} + 1, 4, MKLDNNPlugin::impl_desc_type::ref, {MKLDNNPlugin::impl_desc_type::ref} }, split_test_params { {1, 20, 2, 5}, {{1, 10, 2, 5}, {1, 10, 2, 5}}, - 1, 3, MKLDNNPlugin::impl_desc_type::ref, {MKLDNNPlugin::impl_desc_type::ref} + 1, 4, MKLDNNPlugin::impl_desc_type::ref, {MKLDNNPlugin::impl_desc_type::ref} }, split_test_params { {2, 20, 2, 5}, {{2, 10, 2, 5}, {2, 10, 2, 5}}, - 1, 3, MKLDNNPlugin::impl_desc_type::ref, {MKLDNNPlugin::impl_desc_type::ref} + 1, 4, MKLDNNPlugin::impl_desc_type::ref, {MKLDNNPlugin::impl_desc_type::ref} }, split_test_params { {2, 20, 2, 5}, {{2, 15, 2, 5}, {2, 5, 2, 5}}, - 1, 3, MKLDNNPlugin::impl_desc_type::ref, {MKLDNNPlugin::impl_desc_type::ref} + 1, 4, MKLDNNPlugin::impl_desc_type::ref, {MKLDNNPlugin::impl_desc_type::ref} }, split_test_params { {9, 11, 7, 5}, @@ -275,7 +275,7 @@ INSTANTIATE_TEST_CASE_P( split_test_params { {5, 6, 7, 15}, {{5, 1, 7, 15}, {5, 2, 7, 15}, {5, 1, 7, 15}, {5, 2, 7, 15}}, - 1, 3, MKLDNNPlugin::impl_desc_type::ref, {MKLDNNPlugin::impl_desc_type::ref} + 1, 4, MKLDNNPlugin::impl_desc_type::ref, {MKLDNNPlugin::impl_desc_type::ref} }, split_test_params { {5, 6, 7, 15}, @@ -290,15 +290,15 @@ INSTANTIATE_TEST_CASE_P( split_test_params { {5, 6, 7, 15}, {{5, 6, 7, 15}}, - 1, 3, MKLDNNPlugin::impl_desc_type::ref, {MKLDNNPlugin::impl_desc_type::ref}}, + 1, 4, MKLDNNPlugin::impl_desc_type::ref, {MKLDNNPlugin::impl_desc_type::ref}}, split_test_params { {1, 32, 16, 16, 16}, {{1, 8, 16, 16, 16}, {1, 8, 16, 16, 16}, {1, 8, 16, 16, 16}, {1, 8, 16, 16, 16}}, - 1, 5, MKLDNNPlugin::impl_desc_type::ref, {MKLDNNPlugin::impl_desc_type::ref}}, + 1, 6, MKLDNNPlugin::impl_desc_type::ref, {MKLDNNPlugin::impl_desc_type::ref}}, split_test_params { {1, 32, 16, 16, 16}, {{1, 8, 16, 16, 16}, {1, 8, 16, 16, 16}, {1, 8, 16, 16, 16}, {1, 8, 16, 16, 16}}, - 1, 5, MKLDNNPlugin::impl_desc_type::unknown, {}})); + 1, 6, MKLDNNPlugin::impl_desc_type::unknown, {}})); class MKLDNNGraphDynBatchSplitTests: public MKLDNNGraphSplitTests { protected: diff --git a/inference-engine/tests_deprecated/unit/engines/mkldnn/graph/test_graph.hpp b/inference-engine/tests_deprecated/unit/engines/mkldnn/graph/test_graph.hpp index cdcb5a31873089..fd032cca535071 100644 --- a/inference-engine/tests_deprecated/unit/engines/mkldnn/graph/test_graph.hpp +++ b/inference-engine/tests_deprecated/unit/engines/mkldnn/graph/test_graph.hpp @@ -210,7 +210,7 @@ class MKLDNNGraphTestClass: public MKLDNNPlugin::MKLDNNGraph { PushInputData(input.first, input.second, batch); } - MKLDNNPlugin::MKLDNNGraph::Infer(batch); + MKLDNNPlugin::MKLDNNGraph::Infer(nullptr, batch); } catch (const std::exception &e) { FAIL() << e.what(); } diff --git a/inference-engine/thirdparty/CMakeLists.txt b/inference-engine/thirdparty/CMakeLists.txt index 041724c3cc3017..5586d900cd84ec 100644 --- a/inference-engine/thirdparty/CMakeLists.txt +++ b/inference-engine/thirdparty/CMakeLists.txt @@ -94,6 +94,6 @@ if(ENABLE_MKL_DNN) set(OpenMP_cmake_included ON) ## to skip "omp simd" inside a code. Lead to some crashes inside NDK LLVM.. endif() - add_subdirectory(mkl-dnn) + add_subdirectory(mkl-dnn EXCLUDE_FROM_ALL) add_library(mkldnn ALIAS dnnl) endif() diff --git a/inference-engine/thirdparty/clDNN/api/memory.hpp b/inference-engine/thirdparty/clDNN/api/memory.hpp index b54ed4cf5952cb..54e837cc3f5da1 100644 --- a/inference-engine/thirdparty/clDNN/api/memory.hpp +++ b/inference-engine/thirdparty/clDNN/api/memory.hpp @@ -65,7 +65,7 @@ struct shared_mem_params { shared_handle context; ///< OpenCL context for external operations shared_handle user_device; ///< DX/VA device for external operations shared_handle mem; ///< memory object handle -#ifdef WIN32 +#ifdef _WIN32 shared_handle surface; ///< VA/DXVA surface handle #else shared_surface surface; @@ -90,7 +90,7 @@ struct memory { static memory share_image(const engine& engine, const layout& layout, shared_handle img, uint32_t net_id = 0); /// Create shared memory object on @p engine over specified @p plane of video decoder surface @p surf using specified @p layout -#ifdef WIN32 +#ifdef _WIN32 static memory share_surface(const engine& engine, const layout& layout, shared_handle surf, uint32_t plane, uint32_t net_id = 0); static memory share_dx_buffer(const engine& engine, const layout& layout, shared_handle res, uint32_t net_id = 0); diff --git a/inference-engine/thirdparty/clDNN/api/scatter_elements_update.hpp b/inference-engine/thirdparty/clDNN/api/scatter_elements_update.hpp new file mode 100644 index 00000000000000..6fb11b6aca2ba8 --- /dev/null +++ b/inference-engine/thirdparty/clDNN/api/scatter_elements_update.hpp @@ -0,0 +1,63 @@ +/* +// Copyright (c) 2020 Intel Corporation +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. +*/ + +/////////////////////////////////////////////////////////////////////////////////////////////////// +#pragma once +#include "primitive.hpp" + +namespace cldnn { +/// @addtogroup cpp_api C++ API +/// @{ +/// @addtogroup cpp_topology Network Topology +/// @{ +/// @addtogroup cpp_primitives Primitives +/// @{ + +/// @brief +/// @details +struct scatter_elements_update : public primitive_base { + CLDNN_DECLARE_PRIMITIVE(scatter_elements_update) + + enum scatter_elements_update_axis { + along_b, + along_f, + along_x, + along_y, + along_z, + along_w + }; + + /// @brief Constructs scatter_elements_update primitive. + /// @param id This primitive id. + /// @param dict Input data primitive id. + /// @param idx Input indexes primitive id. + /// @param idupd Input updates primitive id. + /// @param axis Gathering axis. + scatter_elements_update(const primitive_id& id, + const primitive_id& data, + const primitive_id& idx, + const primitive_id& idupd, + const scatter_elements_update_axis axis, + const padding& output_padding = padding()) + : primitive_base(id, {data, idx, idupd}, output_padding), axis(axis) {} + + /// @brief ScatterElementsUpdate axis + scatter_elements_update_axis axis; +}; +/// @} +/// @} +/// @} +} // namespace cldnn diff --git a/inference-engine/thirdparty/clDNN/kernel_selector/common/common_types.h b/inference-engine/thirdparty/clDNN/kernel_selector/common/common_types.h index 5eda7d2c99a5bd..4099586bfb38b5 100644 --- a/inference-engine/thirdparty/clDNN/kernel_selector/common/common_types.h +++ b/inference-engine/thirdparty/clDNN/kernel_selector/common/common_types.h @@ -58,6 +58,7 @@ enum class KernelType { ONE_HOT, GATHER, SCATTER_UPDATE, + SCATTER_ELEMENTS_UPDATE, DEPTH_TO_SPACE, BATCH_TO_SPACE, SHUFFLE_CHANNELS, diff --git a/inference-engine/thirdparty/clDNN/kernel_selector/core/actual_kernels/scatter_update/scatter_elements_update_kernel_ref.cpp b/inference-engine/thirdparty/clDNN/kernel_selector/core/actual_kernels/scatter_update/scatter_elements_update_kernel_ref.cpp new file mode 100644 index 00000000000000..bb5c96d67c16fe --- /dev/null +++ b/inference-engine/thirdparty/clDNN/kernel_selector/core/actual_kernels/scatter_update/scatter_elements_update_kernel_ref.cpp @@ -0,0 +1,174 @@ +/* +// Copyright (c) 2021 Intel Corporation +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. +*/ + +#include "scatter_elements_update_kernel_ref.h" +#include "kernel_selector_utils.h" +#include +#include + +namespace kernel_selector { +static size_t GetScatterElementsUpdateChannelIndex(const scatter_elements_update_params& params) { + Tensor::DataChannelName name = Tensor::DataChannelName::X; + + const size_t input_size = params.inputs[0].GetDims().size(); + switch (params.axis) { + case ScatterUpdateAxis::X: + return input_size - 1; + case ScatterUpdateAxis::Y: + return input_size - 2; + case ScatterUpdateAxis::Z: + return input_size - 3; + case ScatterUpdateAxis::W: + return 2; + case ScatterUpdateAxis::FEATURE: + return 1; + case ScatterUpdateAxis::BATCH: + return 0; + default: + break; + } + + return DataTensor::Channelndex(params.output.GetLayout(), name); +} + +ParamsKey ScatterElementsUpdateKernelRef::GetSupportedKey() const { + ParamsKey k; + k.EnableInputDataType(Datatype::F16); + k.EnableInputDataType(Datatype::F32); + k.EnableInputDataType(Datatype::INT32); + k.EnableOutputDataType(Datatype::F16); + k.EnableOutputDataType(Datatype::F32); + k.EnableOutputDataType(Datatype::INT32); + k.EnableOutputDataType(Datatype::INT8); + k.EnableOutputDataType(Datatype::UINT8); + k.EnableInputLayout(DataLayout::bfyx); + k.EnableOutputLayout(DataLayout::bfyx); + k.EnableInputLayout(DataLayout::bfzyx); + k.EnableOutputLayout(DataLayout::bfzyx); + k.EnableInputLayout(DataLayout::bfwzyx); + k.EnableOutputLayout(DataLayout::bfwzyx); + k.EnableTensorOffset(); + k.EnableTensorPitches(); + k.EnableBatching(); + k.EnableDifferentTypes(); + return k; +} + +static inline std::string GetOrderString(std::vector& order) { + std::string order_str = order[0]; + for (size_t i = 1; i < order.size(); i++) + order_str += ", " + order[i]; + + return order_str; +} + +static inline std::vector GetDefaultOrder(size_t size) { + std::vector default_order; + if (size <= 4) { + default_order = {"b", "f", "y", "x"}; + } else if (size == 5) { + default_order = {"b", "f", "z", "y", "x"}; + } else if (size == 6) { + default_order = {"b", "f", "w", "z", "y", "x"}; + } + + return default_order; +} + +CommonDispatchData ScatterElementsUpdateKernelRef::SetDefault(const scatter_elements_update_params& params, const optional_params&, bool is_second) const { + CommonDispatchData dispatchData; + const auto& output = params.output; + const auto& indices = params.inputs[1]; + + const auto& scope = is_second ? indices : output; + + switch (params.inputs[0].GetLayout()) { + case DataLayout::bfyx: + dispatchData.gws = {scope.X().v, scope.Y().v, scope.Feature().v * scope.Batch().v}; + break; + + case DataLayout::bfzyx: + dispatchData.gws = {scope.X().v * scope.Y().v, scope.Z().v, scope.Feature().v * scope.Batch().v}; + break; + + case DataLayout::bfwzyx: + dispatchData.gws = {scope.X().v * scope.Y().v, scope.Z().v * scope.W().v, scope.Feature().v * scope.Batch().v}; + break; + default: + throw std::invalid_argument("Unsupported data layout for scatter elements update primitive"); + break; + } + + dispatchData.lws = GetOptimalLocalWorkGroupSizes(dispatchData.gws, params.engineInfo); + + return dispatchData; +} + +JitConstants ScatterElementsUpdateKernelRef::GetJitConstants(const scatter_elements_update_params& params) const { + JitConstants jit = MakeBaseParamsJitConstants(params); + + jit.AddConstant(MakeJitConstant("AXIS_VALUE", GetScatterElementsUpdateChannelIndex(params))); + + if (!params.fused_ops.empty()) { + FusedOpsConfiguration conf1 = { "_FIRST_KERNEL", GetDefaultOrder(params.output.GetDims().size()), "val", params.inputs[0].GetDType() }; + FusedOpsConfiguration conf2 = { "_SECOND_KERNEL", GetDefaultOrder(params.output.GetDims().size()), "val", params.inputs[0].GetDType() }; + jit.Merge(MakeFusedOpsJitConstants(params, {conf1, conf2})); + } + + return jit; +} + +bool ScatterElementsUpdateKernelRef::Validate(const Params& p, const optional_params& o) const { + if (p.GetType() != KernelType:: SCATTER_ELEMENTS_UPDATE || o.GetType() != KernelType::SCATTER_ELEMENTS_UPDATE) { + return false; + } + + const scatter_elements_update_params& params = static_cast(p); + + for (auto& fused_op : params.fused_ops) { + if (!IsFusedPrimitiveSupported(fused_op)) + return false; + } + + return true; +} + +KernelsData ScatterElementsUpdateKernelRef::GetKernelsData(const Params& params, const optional_params& options) const { + if (!Validate(params, options)) { + return {}; + } + + KernelData kd = KernelData::Default(params, 2); + scatter_elements_update_params& newParams = *static_cast(kd.params.get()); + auto cldnn_jit = GetJitConstants(newParams); + + for (int i = 0; i < 2; i++) { + auto dispatchData = SetDefault(newParams, options, (i == 1)); + auto entry_point = GetEntryPoint(kernelName, newParams.layerID, options); + + if (i == 1){ + cldnn_jit.AddConstant(MakeJitConstant("IS_SECOND_ITER", "true")); + } + std::string jit = CreateJit(kernelName, cldnn_jit, entry_point); + + clKernelData& kernel = kd.kernels[i]; + + FillCLKernelData(kernel, dispatchData, params.engineInfo, kernelName, jit, entry_point, "", false, false, 3, GetFusedPrimitiveInputsCount(params)); + } + + return {kd}; +} +} // namespace kernel_selector diff --git a/inference-engine/thirdparty/clDNN/kernel_selector/core/actual_kernels/scatter_update/scatter_elements_update_kernel_ref.h b/inference-engine/thirdparty/clDNN/kernel_selector/core/actual_kernels/scatter_update/scatter_elements_update_kernel_ref.h new file mode 100644 index 00000000000000..027535f807af81 --- /dev/null +++ b/inference-engine/thirdparty/clDNN/kernel_selector/core/actual_kernels/scatter_update/scatter_elements_update_kernel_ref.h @@ -0,0 +1,58 @@ +/* +// Copyright (c) 2021 Intel Corporation +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. +*/ + +#pragma once + +#include "kernel_base_opencl.h" + +namespace kernel_selector { +//////////////////////////////////////////////////////////////////////////////////////////////////////////////////////// +// scatter_elements_update_params +//////////////////////////////////////////////////////////////////////////////////////////////////////////////////////// +struct scatter_elements_update_params : public base_params { + scatter_elements_update_params() : base_params(KernelType::SCATTER_ELEMENTS_UPDATE), axis(ScatterUpdateAxis::BATCH) {} + + ScatterUpdateAxis axis; + + virtual ParamsKey GetParamsKey() const { return base_params::GetParamsKey(); } +}; + +//////////////////////////////////////////////////////////////////////////////////////////////////////////////////////// +// scatter_elements_update_optional_params +//////////////////////////////////////////////////////////////////////////////////////////////////////////////////////// +struct scatter_elements_update_optional_params : optional_params { + scatter_elements_update_optional_params() : optional_params(KernelType::SCATTER_ELEMENTS_UPDATE) {} +}; + +class ScatterElementsUpdateKernelRef : public KernelBaseOpenCL { +public: + ScatterElementsUpdateKernelRef() : KernelBaseOpenCL("scatter_elements_update_ref") {} + virtual ~ScatterElementsUpdateKernelRef() {} + virtual JitConstants GetJitConstants(const scatter_elements_update_params& params) const; + virtual CommonDispatchData SetDefault(const scatter_elements_update_params& params, const optional_params&, bool is_second) const; + KernelsData GetKernelsData(const Params& params, const optional_params& options) const override; + ParamsKey GetSupportedKey() const override; + std::vector GetSupportedFusedOps() const override { + return { FusedOpType::QUANTIZE, + FusedOpType::SCALE, + FusedOpType::ACTIVATION, + FusedOpType::ELTWISE }; + } + +protected: + bool Validate(const Params& p, const optional_params& o) const override; +}; +} // namespace kernel_selector diff --git a/inference-engine/thirdparty/clDNN/kernel_selector/core/actual_kernels/scatter_update/scatter_elements_update_kernel_selector.cpp b/inference-engine/thirdparty/clDNN/kernel_selector/core/actual_kernels/scatter_update/scatter_elements_update_kernel_selector.cpp new file mode 100644 index 00000000000000..915b8110c3bad8 --- /dev/null +++ b/inference-engine/thirdparty/clDNN/kernel_selector/core/actual_kernels/scatter_update/scatter_elements_update_kernel_selector.cpp @@ -0,0 +1,27 @@ +/* +// Copyright (c) 2020 Intel Corporation +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. +*/ + +#include "scatter_elements_update_kernel_selector.h" +#include "scatter_elements_update_kernel_ref.h" + +namespace kernel_selector { + +scatter_elements_update_kernel_selector::scatter_elements_update_kernel_selector() { Attach(); } + +KernelsData scatter_elements_update_kernel_selector::GetBestKernels(const Params& params, const optional_params& options) const { + return GetNaiveBestKernel(params, options, KernelType::SCATTER_ELEMENTS_UPDATE); +} +} // namespace kernel_selector diff --git a/inference-engine/thirdparty/clDNN/kernel_selector/core/actual_kernels/scatter_update/scatter_elements_update_kernel_selector.h b/inference-engine/thirdparty/clDNN/kernel_selector/core/actual_kernels/scatter_update/scatter_elements_update_kernel_selector.h new file mode 100644 index 00000000000000..b4ebeb7e524e1a --- /dev/null +++ b/inference-engine/thirdparty/clDNN/kernel_selector/core/actual_kernels/scatter_update/scatter_elements_update_kernel_selector.h @@ -0,0 +1,35 @@ +/* +// Copyright (c) 2020 Intel Corporation +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. +*/ + +#pragma once + +#include "kernel_selector.h" + +namespace kernel_selector { +class scatter_elements_update_kernel_selector : public kernel_selector_base { +public: + static scatter_elements_update_kernel_selector& Instance() { + static scatter_elements_update_kernel_selector instance_; + return instance_; + } + + scatter_elements_update_kernel_selector(); + + virtual ~scatter_elements_update_kernel_selector() {} + + KernelsData GetBestKernels(const Params& params, const optional_params& options) const override; +}; +} // namespace kernel_selector diff --git a/inference-engine/thirdparty/clDNN/kernel_selector/core/actual_kernels/scatter_update/scatter_update_kernel_ref.cpp b/inference-engine/thirdparty/clDNN/kernel_selector/core/actual_kernels/scatter_update/scatter_update_kernel_ref.cpp index d35e179c32c3c8..4c6e888e490d64 100644 --- a/inference-engine/thirdparty/clDNN/kernel_selector/core/actual_kernels/scatter_update/scatter_update_kernel_ref.cpp +++ b/inference-engine/thirdparty/clDNN/kernel_selector/core/actual_kernels/scatter_update/scatter_update_kernel_ref.cpp @@ -67,22 +67,6 @@ ParamsKey ScatterUpdateKernelRef::GetSupportedKey() const { return k; } -static size_t GetNonEmptyDimsNumber(const DataTensor& data_tensor) { - if (data_tensor.LogicalSize() != 1) { - // Count the number of "one size" dimensions starting with X to Batch - size_t one_size_dims = 0; - for (auto& i : data_tensor.GetDims()) { - if (i.v == 1) - one_size_dims++; - else - break; - } - return data_tensor.Dimentions() - one_size_dims; - } else { - return 1; - } -} - static inline std::string GetOrderString(std::vector& order) { std::string order_str = order[0]; for (size_t i = 1; i < order.size(); i++) @@ -104,99 +88,90 @@ static inline std::vector GetDefaultOrder(size_t size) { return default_order; } -static std::string GetUpdatesIndexOrder(const scatter_update_params& params, size_t axis) { - std::vector default_order = GetDefaultOrder(params.output.GetDims().size()); - - for (unsigned int i = 0; i < params.inputs[2].GetDims().size() - params.output.GetDims().size(); i++) - default_order.push_back("0"); - - size_t indices_non_empty_dims = GetNonEmptyDimsNumber(params.inputs[1]); - std::string FYX_indices_size = "(INPUT1_FEATURE_NUM * INPUT1_SIZE_Y * INPUT1_SIZE_X)"; - std::string YX_indices_size = "(INPUT1_SIZE_Y * INPUT1_SIZE_X)"; - std::string X_indices_size = "(INPUT1_SIZE_X)"; - - // Shift indices of ScatterUpdate updates input related to Indices dims - for (size_t i = default_order.size() - 1; i > (axis + indices_non_empty_dims - 1); i--) - default_order[i] = default_order[i - indices_non_empty_dims + 1]; - - // Insert Indices indexes in axis dimention in the Update index order - for (size_t i = axis; i < (axis + indices_non_empty_dims) && i < default_order.size(); i++) { - switch(i - axis) { - case 0: - default_order[i] = "(OUTPUT_INDEX_ON_AXIS /" + FYX_indices_size + ")"; - break; - case 1: - default_order[i] = "((OUTPUT_INDEX_ON_AXIS %" + FYX_indices_size + ")/" + YX_indices_size + ")"; - break; - case 2: - default_order[i] = "(((OUTPUT_INDEX_ON_AXIS %" + FYX_indices_size + ")%" + YX_indices_size + ")/" + X_indices_size + ")"; - break; - case 3: - default_order[i] = "(((OUTPUT_INDEX_ON_AXIS %" + FYX_indices_size + ")%" + YX_indices_size + ")%" + X_indices_size + ")"; - break; - } +static inline std::string GetAxisName(size_t size, size_t axis) { + std::vector axis_names;; + if (size <= 4) { + axis_names = {"BATCH", "FEATURE", "Y", "X"}; + } else if (size == 5) { + axis_names = {"BATCH", "FEATURE", "Z", "Y", "X"}; + } else if (size == 6) { + axis_names = {"BATCH", "FEATURE", "W", "Z", "Y", "X"}; } + return axis_names[axis]; +} +static std::string GetUpdatesIndexOrder(const scatter_update_params& params) { + std::vector default_order = GetDefaultOrder(params.output.GetDims().size()); return GetOrderString(default_order); } CommonDispatchData ScatterUpdateKernelRef::SetDefault(const scatter_update_params& params, const optional_params&, bool is_second) const { CommonDispatchData dispatchData; const auto& output = params.output; - - const size_t indices_size = params.inputs[1].LogicalSize(); - - switch (params.inputs[0].GetLayout()) { - case DataLayout::bfyx: - dispatchData.gws = {output.X().v, output.Y().v, output.Feature().v * output.Batch().v}; - if (is_second) { - if (params.axis == ScatterUpdateAxis::BATCH) - dispatchData.gws[2] = indices_size * output.Feature().v; - else if (params.axis == ScatterUpdateAxis::FEATURE) - dispatchData.gws[2] = indices_size * output.Batch().v; - else if (params.axis == ScatterUpdateAxis::Y) - dispatchData.gws[1] = indices_size; - else - dispatchData.gws[0] = indices_size; + if (!is_second) { + switch (output.GetLayout()) { + case DataLayout::bfyx: + dispatchData.gws = {output.X().v, output.Y().v, output.Feature().v * output.Batch().v}; + break; + case DataLayout::bfzyx: + dispatchData.gws = {output.X().v * output.Y().v, output.Z().v, output.Feature().v * output.Batch().v}; + break; + case DataLayout::bfwzyx: + dispatchData.gws = {output.X().v * output.Y().v, output.Z().v * output.W().v, output.Feature().v * output.Batch().v}; + break; + default: + throw std::runtime_error("Unsupported combination\n"); + break; } - break; - - case DataLayout::bfzyx: - dispatchData.gws = {output.X().v * output.Y().v, output.Z().v, output.Feature().v * output.Batch().v}; - if (is_second) { - if (params.axis == ScatterUpdateAxis::BATCH) - dispatchData.gws[2] = indices_size * output.Feature().v; - else if (params.axis == ScatterUpdateAxis::FEATURE) - dispatchData.gws[2] = indices_size * output.Batch().v; - else if (params.axis == ScatterUpdateAxis::Z) - dispatchData.gws[1] = indices_size; - else if (params.axis == ScatterUpdateAxis::Y) - dispatchData.gws[0] = indices_size * output.X().v; - else - dispatchData.gws[0] = indices_size * output.Y().v; - } - break; - - case DataLayout::bfwzyx: - dispatchData.gws = {output.X().v * output.Y().v, output.Z().v * output.W().v, output.Feature().v * output.Batch().v}; - if (is_second) { - if (params.axis == ScatterUpdateAxis::BATCH) - dispatchData.gws[2] = indices_size * output.Feature().v; - else if (params.axis == ScatterUpdateAxis::FEATURE) - dispatchData.gws[2] = indices_size * output.Batch().v; - else if (params.axis == ScatterUpdateAxis::Z) - dispatchData.gws[1] = indices_size * output.W().v; - else if (params.axis == ScatterUpdateAxis::W) - dispatchData.gws[1] = indices_size * output.Z().v; - else if (params.axis == ScatterUpdateAxis::Y) - dispatchData.gws[0] = indices_size * output.X().v; - else - dispatchData.gws[0] = indices_size * output.Y().v; + } else { + // second iteration + // Each work item is for each tensor in input2. + // Not using input2's shape info directly, because the input2's shape might be reordered from the reordering pass. + // Instead, we reconsider update2's dimension with input1's shape which is shrinked as 1d. + // e.g., axis = b, input0(10, 9, 10, 9, 10) && input1(4, 2) => input2(8, 9, 10, 9, 10 + const size_t indices_size = params.inputs[1].LogicalSize(); + switch (output.GetLayout()) { + case DataLayout::bfyx: + if (params.axis == ScatterUpdateAxis::BATCH) + dispatchData.gws = {output.X().v, output.Y().v, output.Feature().v * indices_size}; + else if (params.axis == ScatterUpdateAxis::FEATURE) + dispatchData.gws = {output.X().v, output.Y().v, indices_size * output.Batch().v}; + else if (params.axis == ScatterUpdateAxis::Y) + dispatchData.gws = {output.X().v, indices_size, output.Feature().v * output.Batch().v}; + else if (params.axis == ScatterUpdateAxis::X) + dispatchData.gws = {indices_size, output.Y().v, output.Feature().v * output.Batch().v}; + break; + case DataLayout::bfzyx: + if (params.axis == ScatterUpdateAxis::BATCH) + dispatchData.gws = {output.X().v * output.Y().v, output.Z().v, output.Feature().v * indices_size}; + else if (params.axis == ScatterUpdateAxis::FEATURE) + dispatchData.gws = {output.X().v * output.Y().v, output.Z().v, indices_size * output.Batch().v}; + else if (params.axis == ScatterUpdateAxis::Z) + dispatchData.gws = {output.X().v * output.Y().v, indices_size, output.Feature().v * output.Batch().v}; + else if (params.axis == ScatterUpdateAxis::Y) + dispatchData.gws = {output.X().v * indices_size, output.Z().v, output.Feature().v * output.Batch().v}; + else if (params.axis == ScatterUpdateAxis::X) + dispatchData.gws = {indices_size * output.Y().v, output.Z().v, output.Feature().v * output.Batch().v}; + break; + case DataLayout::bfwzyx: + if (params.axis == ScatterUpdateAxis::BATCH) + dispatchData.gws = {output.X().v * output.Y().v, output.Z().v * output.W().v, output.Feature().v * indices_size}; + else if (params.axis == ScatterUpdateAxis::FEATURE) + dispatchData.gws = {output.X().v * output.Y().v, output.Z().v * output.W().v, indices_size * output.Batch().v}; + else if (params.axis == ScatterUpdateAxis::W) + dispatchData.gws = {output.X().v * output.Y().v, output.Z().v * indices_size, output.Feature().v * output.Batch().v}; + else if (params.axis == ScatterUpdateAxis::Z) + dispatchData.gws = {output.X().v * output.Y().v, indices_size * output.W().v, output.Feature().v * output.Batch().v}; + else if (params.axis == ScatterUpdateAxis::Y) + dispatchData.gws = {output.X().v * indices_size, output.Z().v * output.W().v, output.Feature().v * output.Batch().v}; + else if (params.axis == ScatterUpdateAxis::X) + dispatchData.gws = {indices_size * output.Y().v, output.Z().v * output.W().v, output.Feature().v * output.Batch().v}; + break; + default: + throw std::runtime_error("Unsupported combination\n"); + break; } - break; - default: break; } - dispatchData.lws = GetOptimalLocalWorkGroupSizes(dispatchData.gws, params.engineInfo); return dispatchData; @@ -208,24 +183,51 @@ static std::string GetOutputIndexOnAxis(const scatter_update_params& params, siz } static std::vector GetVectorSecondOutputIndexOrder(const scatter_update_params& params, size_t axis) { - std::vector default_order = GetDefaultOrder(params.output.GetDims().size()); - default_order[axis] = "convert_int(indices[OUTPUT_INDEX_ON_AXIS])"; - return default_order; + auto output_order = GetDefaultOrder(params.output.GetDims().size()); + output_order[axis] = "convert_int(indices[OUTPUT_INDEX_ON_AXIS])"; + return output_order; } static std::string GetSecondIterOutputIndexOrder(const scatter_update_params& params, size_t axis) { - std::vector default_order = GetDefaultOrder(params.output.GetDims().size()); - default_order[axis] = "convert_int(indices[OUTPUT_INDEX_ON_AXIS])"; - return GetOrderString(default_order); + auto output_order = GetVectorSecondOutputIndexOrder(params, axis); + return GetOrderString(output_order); } JitConstants ScatterUpdateKernelRef::GetJitConstants(const scatter_update_params& params) const { + size_t axis_value = GetScatterUpdateChannelIndex(params); + JitConstants jit = MakeBaseParamsJitConstants(params); - jit.AddConstant(MakeJitConstant("UPDATES_INDEX_ORDER", GetUpdatesIndexOrder(params, GetScatterUpdateChannelIndex(params)))); + jit.AddConstant(MakeJitConstant("UPDATES_INDEX_ORDER", GetUpdatesIndexOrder(params))); jit.AddConstant(MakeJitConstant("SECOND_ITER_OUTPUT_INDEX_ORDER", GetSecondIterOutputIndexOrder(params, GetScatterUpdateChannelIndex(params)))); jit.AddConstant(MakeJitConstant("OUTPUT_INDEX_ON_AXIS", GetOutputIndexOnAxis(params, GetScatterUpdateChannelIndex(params)))); - jit.AddConstant(MakeJitConstant("AXIS_VALUE", GetScatterUpdateChannelIndex(params))); + jit.AddConstant(MakeJitConstant("AXIS_VALUE", axis_value)); + jit.AddConstant(MakeJitConstant("INDICES_SIZE", params.inputs[1].LogicalSize())); + + auto default_order = GetDefaultOrder(params.output.GetDims().size()); + size_t dims = default_order.size(); + std::string get_update_idx = "(INPUT2_OFFSET)"; + std::string output_size_feature = "OUTPUT_FEATURE_NUM"; + for (size_t i = 0; i < dims; ++i) { + if (i >= axis_value) { + std::string def_pitch = "UPDATES_" + GetAxisName(dims, i) + "_PITCH"; + std::string src_pitch = "(OUTPUT_" + GetAxisName(dims, i) + "_PITCH)"; + jit.AddConstant(MakeJitConstant(def_pitch, src_pitch)); + } else if (i == (axis_value - 1)) { + std::string def_pitch = "UPDATES_" + GetAxisName(dims, i) + "_PITCH"; + std::string src_pitch = "(OUTPUT_" + GetAxisName(dims, i + 1) + "_PITCH * INDICES_SIZE)"; + jit.AddConstant(MakeJitConstant(def_pitch, src_pitch)); + } else { // i < axis_value - 1 + std::string def_pitch = "UPDATES_" + GetAxisName(dims, i) + "_PITCH" + ""; + std::string output_size_name; + if (i == 0) output_size_name = "OUTPUT_FEATURE_NUM"; + else output_size_name = "OUTPUT_SIZE_" + GetAxisName(dims, i + 1); + std::string src_pitch = "(UPDATES_" + GetAxisName(dims, i + 1) + "_PITCH * " + output_size_name + ")"; + jit.AddConstant(MakeJitConstant(def_pitch, src_pitch)); + } + get_update_idx = get_update_idx + " + (" + default_order[i] + ")*(UPDATES_" + GetAxisName(dims, i) + "_PITCH)"; + } + jit.AddConstant(MakeJitConstant("GET_UPDATES_INDEX(idx_order)", get_update_idx)); if (!params.fused_ops.empty()) { FusedOpsConfiguration conf1 = { "_FIRST_KERNEL", GetDefaultOrder(params.output.GetDims().size()), "val", params.inputs[0].GetDType() }; @@ -248,6 +250,10 @@ bool ScatterUpdateKernelRef::Validate(const Params& p, const optional_params& o) return false; } + if (params.output.PitchesDifferFromLogicalDims() || params.inputs[2].PitchesDifferFromLogicalDims()) { + return false; + } + return true; } diff --git a/inference-engine/thirdparty/clDNN/kernel_selector/core/actual_kernels/scatter_update/scatter_update_kernel_ref.h b/inference-engine/thirdparty/clDNN/kernel_selector/core/actual_kernels/scatter_update/scatter_update_kernel_ref.h index 141c6dce280bca..25037d8550896d 100644 --- a/inference-engine/thirdparty/clDNN/kernel_selector/core/actual_kernels/scatter_update/scatter_update_kernel_ref.h +++ b/inference-engine/thirdparty/clDNN/kernel_selector/core/actual_kernels/scatter_update/scatter_update_kernel_ref.h @@ -47,7 +47,8 @@ class ScatterUpdateKernelRef : public KernelBaseOpenCL { KernelsPriority GetKernelsPriority(const Params& params, const optional_params& options) const override; ParamsKey GetSupportedKey() const override; std::vector GetSupportedFusedOps() const override { - return { FusedOpType::QUANTIZE, + return { FusedOpType::ELTWISE, + FusedOpType::QUANTIZE, FusedOpType::SCALE, FusedOpType::ACTIVATION }; } diff --git a/inference-engine/thirdparty/clDNN/kernel_selector/core/cl_kernels/include/reshape_dims.cl b/inference-engine/thirdparty/clDNN/kernel_selector/core/cl_kernels/include/reshape_dims.cl index 4d53bc3d2645c3..7b7ac6f4cbadfc 100644 --- a/inference-engine/thirdparty/clDNN/kernel_selector/core/cl_kernels/include/reshape_dims.cl +++ b/inference-engine/thirdparty/clDNN/kernel_selector/core/cl_kernels/include/reshape_dims.cl @@ -1,5 +1,5 @@ /* -// Copyright (c) 2016 Intel Corporation +// Copyright (c) 2021 Intel Corporation // // Licensed under the Apache License, Version 2.0 (the "License"); // you may not use this file except in compliance with the License. @@ -125,6 +125,32 @@ inline uint8 FUNC(reshape_6_to_4)(uint o, uint i, uint w, uint z, uint y, uint x return (uint8)(0, dst_b, dst_f, 0, 0, dst_y, dst_x, 0); } +inline uint8 FUNC(reshape_6_to_5)(uint o, uint i, uint w, uint z, uint y, uint x, + uint src_size_f, uint src_size_w, uint src_size_z, uint src_size_y, uint src_size_x, + uint dst_size_f, uint dst_size_z, uint dst_size_y, uint dst_size_x) +{ + const uint src_pitch_x = 1; + const uint src_pitch_y = src_pitch_x * src_size_x; + const uint src_pitch_z = src_pitch_y * src_size_y; + const uint src_pitch_w = src_pitch_z * src_size_z; + const uint src_pitch_f = src_pitch_w * src_size_w; + const uint src_pitch_b = src_pitch_f * src_size_f; + + uint flat_idx = x * src_pitch_x + y * src_pitch_y + z * src_pitch_z + w * src_pitch_w + i * src_pitch_f + o * src_pitch_b; + + uint dst_x = flat_idx % dst_size_x; + flat_idx /= dst_size_x; + uint dst_y = flat_idx % dst_size_y; + flat_idx /= dst_size_y; + uint dst_z = flat_idx % dst_size_z; + flat_idx /= dst_size_z; + uint dst_f = flat_idx % dst_size_f; + flat_idx /= dst_size_f; + uint dst_b = flat_idx; + return (uint8)(0, dst_b, dst_f, 0, dst_z, dst_y, dst_x, 0); +} + + inline uint8 FUNC(reshape_grouped)(uint g, uint o, uint i, uint z, uint y, uint x, uint src_size_ofm, uint dst_size_ofm) { const uint flat_ofm = g * src_size_ofm + o; @@ -167,6 +193,10 @@ inline uint8 FUNC(reshape_dims)( { return FUNC_CALL(reshape_5_to_4)(o, i, z, y, x, src_size_f, src_size_z, src_size_y, src_size_x, dst_size_f, dst_size_y, dst_size_x); } + else if (src_dims == 6 && dst_dims == 5) + { + return FUNC_CALL(reshape_6_to_5)(o, i, w, z, y, x, src_size_f, src_size_w, src_size_z, src_size_y, src_size_x, dst_size_f, dst_size_z, dst_size_y, dst_size_x); + } return (uint8)(0, o, i, w, z, y, x, 0); } diff --git a/inference-engine/thirdparty/clDNN/kernel_selector/core/cl_kernels/scatter_elements_update_ref.cl b/inference-engine/thirdparty/clDNN/kernel_selector/core/cl_kernels/scatter_elements_update_ref.cl new file mode 100644 index 00000000000000..f019f76d48312a --- /dev/null +++ b/inference-engine/thirdparty/clDNN/kernel_selector/core/cl_kernels/scatter_elements_update_ref.cl @@ -0,0 +1,155 @@ +// Copyright (c) 2021 Intel Corporation +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + + +#include "include/include_all.cl" + +#define GET_UPDATES_INDEX(prefix, idx_order) CAT(prefix, _GET_INDEX)(idx_order) +#define GET_OUTPUT_INDEX(idx_order) OUTPUT_GET_INDEX(idx_order) +#if OUTPUT_DIMS == 4 + #define ORDER b,f,y,x + #define IDX_ORDER idx_b,idx_f,idx_y,idx_x +#elif OUTPUT_DIMS == 5 + #define ORDER b,f,z,y,x + #define IDX_ORDER idx_b,idx_f,idx_z,idx_y,idx_x +#elif OUTPUT_DIMS == 6 + #define ORDER b,f,w,z,y,x + #define IDX_ORDER idx_b,idx_f,idx_w,idx_z,idx_y,idx_x +#endif + +#if OUTPUT_DIMS != INPUT2_DIMS + #error "OUTPUT_DIMS is supposed to be same as INPUT2_DIMS" +#endif + +KERNEL(scatter_elements_update_ref)(const __global INPUT0_TYPE* data, + const __global INPUT1_TYPE* indices, + const __global INPUT2_TYPE* updates, + __global OUTPUT_TYPE* output +#if HAS_FUSED_OPS_DECLS + , FUSED_OPS_DECLS +#endif +) +{ + + const uint dim0 = get_global_id(0); + const uint dim1 = get_global_id(1); + const uint dim2 = get_global_id(2); + +#ifndef IS_SECOND_ITER // First kernel + #if OUTPUT_DIMS == 4 + const uint x = dim0; + const uint y = dim1; + const uint f = dim2 % OUTPUT_FEATURE_NUM; + const uint b = dim2 / OUTPUT_FEATURE_NUM; + #elif OUTPUT_DIMS == 5 + const uint x = dim0 % OUTPUT_SIZE_X; + const uint y = dim0 / OUTPUT_SIZE_X; + const uint z = dim1; + const uint f = dim2 % OUTPUT_FEATURE_NUM; + const uint b = dim2 / OUTPUT_FEATURE_NUM; + #elif OUTPUT_DIMS == 6 + const uint x = dim0 % OUTPUT_SIZE_X; + const uint y = dim0 / OUTPUT_SIZE_X; + const uint z = dim1 % OUTPUT_SIZE_Z; + const uint w = dim1 / OUTPUT_SIZE_Z; + const uint f = dim2 % OUTPUT_FEATURE_NUM; + const uint b = dim2 / OUTPUT_FEATURE_NUM; + #endif + + const uint output_idx = GET_OUTPUT_INDEX(ORDER); + INPUT0_TYPE val = data[output_idx]; + #if HAS_FUSED_OPS + FUSED_OPS_FIRST_KERNEL; + output[output_idx] = TO_OUTPUT_TYPE(FUSED_OPS_RESULT_FIRST_KERNEL); + #else + output[output_idx] = ACTIVATION(val, ACTIVATION_PARAMS); + #endif + +#else // Second kernel + #if OUTPUT_DIMS == 4 + const uint idx_x = dim0; + const uint idx_y = dim1; + const uint idx_f = dim2 % INPUT2_FEATURE_NUM; + const uint idx_b = dim2 / INPUT2_FEATURE_NUM; + #elif OUTPUT_DIMS == 5 + const uint idx_x = dim0 % INPUT2_SIZE_X; + const uint idx_y = dim0 / INPUT2_SIZE_X; + const uint idx_z = dim1; + const uint idx_f = dim2 % INPUT2_FEATURE_NUM; + const uint idx_b = dim2 / INPUT2_FEATURE_NUM; + #elif OUTPUT_DIMS == 6 + const uint idx_x = dim0 % INPUT2_SIZE_X; + const uint idx_y = dim0 / INPUT2_SIZE_X; + const uint idx_z = dim1 % INPUT2_SIZE_Z; + const uint idx_w = dim1 / INPUT2_SIZE_Z; + const uint idx_f = dim2 % INPUT2_FEATURE_NUM; + const uint idx_b = dim2 / INPUT2_FEATURE_NUM; + #endif + + const uint updates_idx = GET_UPDATES_INDEX(INPUT2, IDX_ORDER); + INPUT1_TYPE index = indices[(int)updates_idx]; + + #if OUTPUT_DIMS == 4 + #if AXIS_VALUE == 0 + const uint x = idx_x; const uint y = idx_y; const uint f = idx_f; const uint b = index; + #elif AXIS_VALUE == 1 + const uint x = idx_x; const uint y = idx_y; const uint f = index; const uint b = idx_b; + #elif AXIS_VALUE == 2 + const uint x = idx_x; const uint y = index; const uint f = idx_f; const uint b = idx_b; + #elif AXIS_VALUE == 3 + const uint x = index; const uint y = idx_y; const uint f = idx_f; const uint b = idx_b; + #endif // AXIS_VALUE + #elif OUTPUT_DIMS == 5 + #if AXIS_VALUE == 0 + const uint x = idx_x; const uint y = idx_y; const uint z = idx_z; const uint f = idx_f; const uint b = index; + #elif AXIS_VALUE == 1 + const uint x = idx_x; const uint y = idx_y; const uint z = idx_z; const uint f = index; const uint b = idx_b; + #elif AXIS_VALUE == 2 + const uint x = idx_x; const uint y = idx_y; const uint z = index; const uint f = idx_f; const uint b = idx_b; + #elif AXIS_VALUE == 3 + const uint x = idx_x; const uint y = index; const uint z = idx_z; const uint f = idx_f; const uint b = idx_b; + #elif AXIS_VALUE == 4 + const uint x = index; const uint y = idx_y; const uint z = idx_z; const uint f = idx_f; const uint b = idx_b; + #endif // AXIS_VALUE + #elif OUTPUT_DIMS == 6 + #if AXIS_VALUE == 0 + const uint x = idx_x; const uint y = idx_y; const uint z = idx_z; const uint w = idx_w; const uint f = idx_f; const uint b = index; + #elif AXIS_VALUE == 1 + const uint x = idx_x; const uint y = idx_y; const uint z = idx_z; const uint w = idx_w; const uint f = index; const uint b = idx_b; + #elif AXIS_VALUE == 2 + const uint x = idx_x; const uint y = idx_y; const uint z = idx_z; const uint w = index; const uint f = idx_f; const uint b = idx_b; + #elif AXIS_VALUE == 3 + const uint x = idx_x; const uint y = idx_y; const uint z = index; const uint w = idx_w; const uint f = idx_f; const uint b = idx_b; + #elif AXIS_VALUE == 4 + const uint x = idx_x; const uint y = index; const uint z = idx_z; const uint w = idx_w; const uint f = idx_f; const uint b = idx_b; + #elif AXIS_VALUE == 5 + const uint x = index; const uint y = idx_y; const uint z = idx_z; const uint w = idx_w; const uint f = idx_f; const uint b = idx_b; + #endif // AXIS_VALUE + #endif + const uint output_idx = GET_OUTPUT_INDEX(ORDER); + + INPUT2_TYPE val = updates[(int)updates_idx]; + #if HAS_FUSED_OPS + FUSED_OPS_SECOND_KERNEL; + output[output_idx] = TO_OUTPUT_TYPE(FUSED_OPS_RESULT_SECOND_KERNEL); + #else + output[output_idx] = ACTIVATION(val, ACTIVATION_PARAMS); + #endif +#endif +} + +#undef GET_UPDATES_INDEX +#undef GET_OUTPUT_INDEX +#undef IDX_ORDER +#undef ORDER diff --git a/inference-engine/thirdparty/clDNN/kernel_selector/core/cl_kernels/scatter_update_ref.cl b/inference-engine/thirdparty/clDNN/kernel_selector/core/cl_kernels/scatter_update_ref.cl index 298b7030d47470..4234c00223f833 100644 --- a/inference-engine/thirdparty/clDNN/kernel_selector/core/cl_kernels/scatter_update_ref.cl +++ b/inference-engine/thirdparty/clDNN/kernel_selector/core/cl_kernels/scatter_update_ref.cl @@ -15,8 +15,15 @@ #include "include/include_all.cl" -#define GET_UPDATES_INDEX(prefix, idx_order) CAT(prefix, _GET_INDEX)(idx_order) +#define AXIS_B (0) +#define AXIS_F (1) +#define AXIS_W (2) +#define AXIS_Z (OUTPUT_DIMS - 3) +#define AXIS_Y (OUTPUT_DIMS - 2) +#define AXIS_X (OUTPUT_DIMS - 1) + #define GET_OUTPUT_INDEX(idx_order) OUTPUT_GET_INDEX(idx_order) + #if OUTPUT_DIMS == 4 #define ORDER b,f,y,x #elif OUTPUT_DIMS == 5 @@ -37,7 +44,6 @@ KERNEL(scatter_update_ref)(const __global INPUT0_TYPE* dictionary, const uint dim0 = get_global_id(0); const uint dim1 = get_global_id(1); const uint dim2 = get_global_id(2); - #ifndef IS_SECOND_ITER // First kernel #if OUTPUT_DIMS == 4 const uint x = dim0; @@ -58,8 +64,9 @@ KERNEL(scatter_update_ref)(const __global INPUT0_TYPE* dictionary, const uint f = dim2 % OUTPUT_FEATURE_NUM; const uint b = dim2 / OUTPUT_FEATURE_NUM; #endif - + const uint output_idx = GET_OUTPUT_INDEX(ORDER); + INPUT0_TYPE val = dictionary[output_idx]; #if HAS_FUSED_OPS FUSED_OPS_FIRST_KERNEL; @@ -69,70 +76,64 @@ KERNEL(scatter_update_ref)(const __global INPUT0_TYPE* dictionary, #endif #else // Second kernel - #if OUTPUT_DIMS == 4 - const uint x = dim0; - const uint y = dim1; - #if AXIS_VALUE == 0 - const uint f = dim2 % OUTPUT_FEATURE_NUM; + #if (OUTPUT_DIMS == 4) + // bf|y|x + #if (AXIS_VALUE == AXIS_F) + const uint b = dim2 / INDICES_SIZE; + const uint f = dim2 % INDICES_SIZE; + #else const uint b = dim2 / OUTPUT_FEATURE_NUM; + const uint f = dim2 % OUTPUT_FEATURE_NUM; + #endif + const uint y = dim1; + const uint x = dim0; + #elif (OUTPUT_DIMS == 5) + // bf|z|yx + #if (AXIS_VALUE == AXIS_F) + const uint b = dim2 / INDICES_SIZE; + const uint f = dim2 % INDICES_SIZE; #else - const uint f = dim2 / OUTPUT_BATCH_NUM; - const uint b = dim2 % OUTPUT_BATCH_NUM; + const uint b = dim2 / OUTPUT_FEATURE_NUM; + const uint f = dim2 % OUTPUT_FEATURE_NUM; #endif - #elif OUTPUT_DIMS == 5 const uint z = dim1; - #if AXIS_VALUE == 1 - const uint f = dim2 / OUTPUT_BATCH_NUM; - const uint b = dim2 % OUTPUT_BATCH_NUM; - const uint x = dim0 % OUTPUT_SIZE_X; - const uint y = dim0 / OUTPUT_SIZE_X; - #elif AXIS_VALUE == 4 - const uint f = dim2 % OUTPUT_FEATURE_NUM; - const uint b = dim2 / OUTPUT_FEATURE_NUM; - const uint x = dim0 / OUTPUT_SIZE_Y; - const uint y = dim0 % OUTPUT_SIZE_Y; + #if (AXIS_VALUE == AXIS_X) + const uint y = dim0 / INDICES_SIZE; + const uint x = dim0 % INDICES_SIZE; #else - const uint f = dim2 % OUTPUT_FEATURE_NUM; - const uint b = dim2 / OUTPUT_FEATURE_NUM; - const uint x = dim0 % OUTPUT_SIZE_X; const uint y = dim0 / OUTPUT_SIZE_X; - #endif - #elif OUTPUT_DIMS == 6 - #if AXIS_VALUE == 1 - const uint f = dim2 / OUTPUT_BATCH_NUM; - const uint b = dim2 % OUTPUT_BATCH_NUM; const uint x = dim0 % OUTPUT_SIZE_X; - const uint y = dim0 / OUTPUT_SIZE_X; - const uint z = dim1 % OUTPUT_SIZE_Z; - const uint w = dim1 / OUTPUT_SIZE_Z; - #elif AXIS_VALUE == 3 - const uint f = dim2 % OUTPUT_FEATURE_NUM; + #endif + #elif (OUTPUT_DIMS == 6) + // bf|wz|yx + #if (AXIS_VALUE == AXIS_F) + const uint b = dim2 / INDICES_SIZE; + const uint f = dim2 % INDICES_SIZE; + #else const uint b = dim2 / OUTPUT_FEATURE_NUM; - const uint x = dim0 % OUTPUT_SIZE_X; - const uint y = dim0 / OUTPUT_SIZE_X; - const uint z = dim1 / OUTPUT_SIZE_W; - const uint w = dim1 % OUTPUT_SIZE_W; - #elif AXIS_VALUE == 5 const uint f = dim2 % OUTPUT_FEATURE_NUM; - const uint b = dim2 / OUTPUT_FEATURE_NUM; - const uint x = dim0 / OUTPUT_SIZE_Y; - const uint y = dim0 % OUTPUT_SIZE_Y; - const uint z = dim1 % OUTPUT_SIZE_Z; + #endif + #if (AXIS_VALUE == AXIS_Z) + const uint w = dim1 / INDICES_SIZE; + const uint z = dim1 % INDICES_SIZE; + #else const uint w = dim1 / OUTPUT_SIZE_Z; + const uint z = dim1 % OUTPUT_SIZE_Z; + #endif + #if (AXIS_VALUE == AXIS_X) + const uint y = dim0 / INDICES_SIZE; + const uint x = dim0 % INDICES_SIZE; #else - const uint f = dim2 % OUTPUT_FEATURE_NUM; - const uint b = dim2 / OUTPUT_FEATURE_NUM; - const uint x = dim0 % OUTPUT_SIZE_X; const uint y = dim0 / OUTPUT_SIZE_X; - const uint z = dim1 % OUTPUT_SIZE_Z; - const uint w = dim1 / OUTPUT_SIZE_Z; + const uint x = dim0 % OUTPUT_SIZE_X; #endif #endif const uint output_idx = GET_OUTPUT_INDEX(SECOND_ITER_OUTPUT_INDEX_ORDER); - const uint updates_idx = GET_UPDATES_INDEX(INPUT2, UPDATES_INDEX_ORDER); + const uint updates_idx = GET_UPDATES_INDEX(UPDATES_INDEX_ORDER); INPUT2_TYPE val = updates[updates_idx]; + #if HAS_FUSED_OPS FUSED_OPS_SECOND_KERNEL; output[output_idx] = TO_OUTPUT_TYPE(FUSED_OPS_RESULT_SECOND_KERNEL); @@ -142,5 +143,10 @@ KERNEL(scatter_update_ref)(const __global INPUT0_TYPE* dictionary, #endif } -#undef GET_UPDATES_INDEX #undef GET_OUTPUT_INDEX +#undef AXIS_B +#undef AXIS_F +#undef AXIS_W +#undef AXIS_Z +#undef AXIS_Y +#undef AXIS_X diff --git a/inference-engine/thirdparty/clDNN/kernel_selector/core/common/jitter.cpp b/inference-engine/thirdparty/clDNN/kernel_selector/core/common/jitter.cpp index 50169e9bdbe22d..8059089e83bfd5 100644 --- a/inference-engine/thirdparty/clDNN/kernel_selector/core/common/jitter.cpp +++ b/inference-engine/thirdparty/clDNN/kernel_selector/core/common/jitter.cpp @@ -1515,36 +1515,36 @@ JitConstants FusedOpsCodeGenerator::MakeOpJitConstants(const FusedOpsConfigurati in_vars_converted.push_back(in_name); } - switch (desc.GetType()) { - case KernelType::SCALE: { - auto get_acc_t = [&]() -> Datatype { - std::vector tensor_types = {desc.output_tensor.GetDType()}; - for (auto& in : desc.tensors) { - tensor_types.push_back(in.GetDType()); - } + auto get_acc_t = [&]() -> Datatype { + std::vector tensor_types = {desc.output_tensor.GetDType()}; + for (auto& in : desc.tensors) { + tensor_types.push_back(in.GetDType()); + } - std::vector types_prioritized = { Datatype::F32, Datatype::F16 }; + std::vector types_prioritized = { Datatype::F32, Datatype::F16 }; - for (auto& type : types_prioritized) { - if (std::any_of(tensor_types.begin(), tensor_types.end(), [=](const Datatype& t) -> bool { return t == type; })) { - return type; - } - } + for (auto& type : types_prioritized) { + if (std::any_of(tensor_types.begin(), tensor_types.end(), [=](const Datatype& t) -> bool { return t == type; })) { + return type; + } + } - return Datatype::F32; - }; + return Datatype::F32; + }; - auto get_input = [&](size_t index) -> std::string { - auto in_name = index == 0 ? in_var : GetInputVarName(index - 1, is_shuffled, shuffle_var); - auto tensor_type = index == 0 ? in_type : desc.tensors[index - 1].GetDType(); - auto acc_t = get_acc_t(); + auto get_input = [&](size_t index) -> std::string { + auto in_name = index == 0 ? in_var : GetInputVarName(index - 1, is_shuffled, shuffle_var); + auto tensor_type = index == 0 ? in_type : desc.tensors[index - 1].GetDType(); + auto acc_t = get_acc_t(); - if (tensor_type != acc_t) - return ConvertToType(in_name, acc_t, vec_size); - else - return in_name; - }; + if (tensor_type != acc_t) + return ConvertToType(in_name, acc_t, vec_size); + else + return in_name; + }; + switch (desc.GetType()) { + case KernelType::SCALE: { auto tmp_var = out_var + "_tmp"; if (desc.tensors.size() > 1) { op_decls += "\\\n\t" + GetType(get_acc_t(), vec_size) + " " + tmp_var + " = " @@ -1574,8 +1574,9 @@ JitConstants FusedOpsCodeGenerator::MakeOpJitConstants(const FusedOpsConfigurati throw std::runtime_error("[clDNN] Eltwise mode is not supported in fused ops codegen"); } - op_decls += "\\\n\t" + GetOutputType(vec_size) + " " + out_var + " = " + in_vars_converted[0] + - op + ConvertToOutputType(in_var, vec_size) + ";"; + auto tmp_var = out_var + "_tmp"; + op_decls += "\\\n\t" + GetType(get_acc_t(), vec_size) + " " + tmp_var + " = " + get_input(0) + op + get_input(1) + ";"; + op_decls += "\\\n\t" + GetOutputType(vec_size) + " " + out_var + " = " + ConvertToOutputType(tmp_var, vec_size) + ";"; break; } case KernelType::QUANTIZE: { diff --git a/inference-engine/thirdparty/clDNN/src/gpu/memory_gpu.cpp b/inference-engine/thirdparty/clDNN/src/gpu/memory_gpu.cpp index 347ab507d4ad50..1d9686847fbeb6 100644 --- a/inference-engine/thirdparty/clDNN/src/gpu/memory_gpu.cpp +++ b/inference-engine/thirdparty/clDNN/src/gpu/memory_gpu.cpp @@ -72,7 +72,7 @@ void gpu_buffer::fill(unsigned char pattern, event_impl::ptr ev) { shared_mem_params gpu_buffer::get_internal_params() const { return {shared_mem_type::shared_mem_buffer, static_cast(_context->context().get()), nullptr, static_cast(_buffer.get()), -#ifdef WIN32 +#ifdef _WIN32 nullptr, #else 0, @@ -185,7 +185,7 @@ void gpu_image2d::fill(unsigned char pattern, event_impl::ptr ev) { shared_mem_params gpu_image2d::get_internal_params() const { return {shared_mem_type::shared_mem_image, static_cast(_context->context().get()), nullptr, static_cast(_buffer.get()), -#ifdef WIN32 +#ifdef _WIN32 nullptr, #else 0, @@ -211,7 +211,7 @@ shared_mem_params gpu_media_buffer::get_internal_params() const { static_cast(_buffer.get()), surface, plane }; } -#ifdef WIN32 +#ifdef _WIN32 gpu_dx_buffer::gpu_dx_buffer(const refcounted_obj_ptr& engine, const layout& new_layout, const shared_mem_params* params, @@ -309,7 +309,7 @@ shared_mem_params gpu_usm::get_internal_params() const { static_cast(_engine->get_context()->context().get()), // context handle nullptr, // user_device handle nullptr, // mem handle -#ifdef WIN32 +#ifdef _WIN32 nullptr, // surface handle #else 0, // surface handle diff --git a/inference-engine/thirdparty/clDNN/src/gpu/memory_gpu.h b/inference-engine/thirdparty/clDNN/src/gpu/memory_gpu.h index e31eff25ccf65f..01fbcb2829134b 100644 --- a/inference-engine/thirdparty/clDNN/src/gpu/memory_gpu.h +++ b/inference-engine/thirdparty/clDNN/src/gpu/memory_gpu.h @@ -139,7 +139,7 @@ struct gpu_media_buffer : public gpu_image2d { shared_mem_params get_internal_params() const override; private: void* device; -#ifdef WIN32 +#ifdef _WIN32 void* surface; #else uint32_t surface; @@ -147,7 +147,7 @@ struct gpu_media_buffer : public gpu_image2d { uint32_t plane; }; -#ifdef WIN32 +#ifdef _WIN32 struct gpu_dx_buffer : public gpu_buffer { friend cldnn::memory_pool; diff --git a/inference-engine/thirdparty/clDNN/src/gpu/ocl_builder.cpp b/inference-engine/thirdparty/clDNN/src/gpu/ocl_builder.cpp index bb5227c3980326..2e590d044c19a1 100644 --- a/inference-engine/thirdparty/clDNN/src/gpu/ocl_builder.cpp +++ b/inference-engine/thirdparty/clDNN/src/gpu/ocl_builder.cpp @@ -139,7 +139,7 @@ std::vector ocl_builder::build_device_list_from_user_device(b continue; std::vector devices; -#ifdef WIN32 +#ifdef _WIN32 platform.getDevices(CL_D3D11_DEVICE_KHR, user_device, CL_PREFERRED_DEVICES_FOR_D3D11_KHR, @@ -153,11 +153,11 @@ std::vector ocl_builder::build_device_list_from_user_device(b for (auto& device : devices) { if (!does_device_match_config(out_out_order, device)) continue; cl_context_properties props[] = { - #ifdef WIN32 +#ifdef _WIN32 CL_CONTEXT_D3D11_DEVICE_KHR, - #else +#else CL_CONTEXT_VA_API_DISPLAY_INTEL, - #endif +#endif (intptr_t)user_device, CL_CONTEXT_INTEROP_USER_SYNC, CL_FALSE, CL_CONTEXT_PLATFORM, (cl_context_properties)id, diff --git a/inference-engine/thirdparty/clDNN/src/gpu/ocl_toolkit.cpp b/inference-engine/thirdparty/clDNN/src/gpu/ocl_toolkit.cpp index 003b1b8838c734..76eacfbe89dd37 100644 --- a/inference-engine/thirdparty/clDNN/src/gpu/ocl_toolkit.cpp +++ b/inference-engine/thirdparty/clDNN/src/gpu/ocl_toolkit.cpp @@ -45,7 +45,7 @@ cl::PFN_clEnqueueAcquireMediaSurfacesINTEL cl::SharedSurfLock::pfn_acquire = NULL; cl::PFN_clEnqueueReleaseMediaSurfacesINTEL cl::SharedSurfLock::pfn_release = NULL; cl::PFN_clCreateFromMediaSurfaceINTEL cl::ImageVA::pfn_clCreateFromMediaSurfaceINTEL = NULL; -#ifdef WIN32 +#ifdef _WIN32 cl::PFN_clCreateFromD3D11Buffer cl::BufferDX::pfn_clCreateFromD3D11Buffer = NULL; #endif diff --git a/inference-engine/thirdparty/clDNN/src/gpu/register_gpu.cpp b/inference-engine/thirdparty/clDNN/src/gpu/register_gpu.cpp index 1876757df01aa5..fab9ed83113a9d 100644 --- a/inference-engine/thirdparty/clDNN/src/gpu/register_gpu.cpp +++ b/inference-engine/thirdparty/clDNN/src/gpu/register_gpu.cpp @@ -69,6 +69,7 @@ void register_implementations_gpu() { REGISTER_GPU(roi_pooling); REGISTER_GPU(scale); REGISTER_GPU(scatter_update); + REGISTER_GPU(scatter_elements_update); REGISTER_GPU(select); REGISTER_GPU(shuffle_channels); REGISTER_GPU(softmax); diff --git a/inference-engine/thirdparty/clDNN/src/gpu/register_gpu.hpp b/inference-engine/thirdparty/clDNN/src/gpu/register_gpu.hpp index 2b609e8a9defb3..37795e88812b73 100644 --- a/inference-engine/thirdparty/clDNN/src/gpu/register_gpu.hpp +++ b/inference-engine/thirdparty/clDNN/src/gpu/register_gpu.hpp @@ -61,6 +61,7 @@ #include "api/roi_pooling.hpp" #include "api/scale.hpp" #include "api/scatter_update.hpp" +#include "api/scatter_elements_update.hpp" #include "api/select.hpp" #include "api/shuffle_channels.hpp" #include "api/softmax.hpp" @@ -136,6 +137,7 @@ REGISTER_GPU(reverse_sequence); REGISTER_GPU(roi_pooling); REGISTER_GPU(scale); REGISTER_GPU(scatter_update); +REGISTER_GPU(scatter_elements_update); REGISTER_GPU(select); REGISTER_GPU(shuffle_channels); REGISTER_GPU(softmax); diff --git a/inference-engine/thirdparty/clDNN/src/gpu/scatter_elements_update_gpu.cpp b/inference-engine/thirdparty/clDNN/src/gpu/scatter_elements_update_gpu.cpp new file mode 100644 index 00000000000000..06832ecf2c4635 --- /dev/null +++ b/inference-engine/thirdparty/clDNN/src/gpu/scatter_elements_update_gpu.cpp @@ -0,0 +1,97 @@ +/* +// Copyright (c) 2020 Intel Corporation +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. +*/ + +#include "scatter_elements_update_inst.h" +#include "primitive_gpu_base.h" +#include "implementation_map.h" +#include "kernel_selector_helper.h" +#include "scatter_update/scatter_elements_update_kernel_selector.h" +#include "scatter_update/scatter_elements_update_kernel_ref.h" +#include "error_handler.h" + +using namespace cldnn; + +namespace cldnn { +namespace gpu { +kernel_selector::scatter_update_axis convert_axis(scatter_elements_update::scatter_elements_update_axis axis, const scatter_elements_update_node& arg) { + switch (axis) { + case scatter_elements_update::along_x: + return kernel_selector::scatter_update_axis::X; + case scatter_elements_update::along_y: + return kernel_selector::scatter_update_axis::Y; + case scatter_elements_update::along_z: + return kernel_selector::scatter_update_axis::Z; + case scatter_elements_update::along_w: + return kernel_selector::scatter_update_axis::W; + case scatter_elements_update::along_f: + return kernel_selector::scatter_update_axis::FEATURE; + case scatter_elements_update::along_b: + return kernel_selector::scatter_update_axis::BATCH; + default: + CLDNN_ERROR_MESSAGE(arg.id(), "Unsupported Axis"); + } + return kernel_selector::scatter_update_axis::X; +} + +struct scatter_elements_update_gpu : typed_primitive_gpu_impl { + using parent = typed_primitive_gpu_impl; + using parent::parent; + +public: + static primitive_impl* create(const scatter_elements_update_node& arg) { + auto scatter_elements_update_params = get_default_params(arg); + auto scatter_elements_update_optional_params = + get_default_optional_params(arg.get_program()); + + scatter_elements_update_params.axis = convert_axis(arg.get_primitive()->axis, arg); + + scatter_elements_update_params.inputs.push_back(convert_data_tensor(arg.input(1).get_output_layout())); + scatter_elements_update_params.inputs.push_back(convert_data_tensor(arg.input(2).get_output_layout())); + + auto& kernel_selector = kernel_selector::scatter_elements_update_kernel_selector::Instance(); + auto best_kernels = kernel_selector.GetBestKernels(scatter_elements_update_params, scatter_elements_update_optional_params); + + CLDNN_ERROR_BOOL(arg.id(), + "Best_kernel.empty()", + best_kernels.empty(), + "Cannot find a proper kernel with this arguments"); + + auto scatter_elements_update = new scatter_elements_update_gpu(arg, best_kernels[0]); + + return scatter_elements_update; + } +}; + +namespace detail { + +attach_scatter_elements_update_gpu::attach_scatter_elements_update_gpu() { + auto val_fw = scatter_elements_update_gpu::create; + implementation_map::add(std::make_tuple(engine_types::ocl, data_types::f32, format::bfyx), val_fw); + implementation_map::add(std::make_tuple(engine_types::ocl, data_types::f16, format::bfyx), val_fw); + implementation_map::add(std::make_tuple(engine_types::ocl, data_types::i32, format::bfyx), val_fw); + + implementation_map::add(std::make_tuple(engine_types::ocl, data_types::f32, format::bfzyx), val_fw); + implementation_map::add(std::make_tuple(engine_types::ocl, data_types::f16, format::bfzyx), val_fw); + implementation_map::add(std::make_tuple(engine_types::ocl, data_types::i32, format::bfzyx), val_fw); + + implementation_map::add(std::make_tuple(engine_types::ocl, data_types::f32, format::bfwzyx), val_fw); + implementation_map::add(std::make_tuple(engine_types::ocl, data_types::f16, format::bfwzyx), val_fw); + implementation_map::add(std::make_tuple(engine_types::ocl, data_types::i32, format::bfwzyx), val_fw); +} + +} // namespace detail +} // namespace gpu +} // namespace cldnn diff --git a/inference-engine/thirdparty/clDNN/src/graph_optimizer/prepare_primitive_fusing.cpp b/inference-engine/thirdparty/clDNN/src/graph_optimizer/prepare_primitive_fusing.cpp index 73e93f0a3f199c..7ad1cf87c6e859 100644 --- a/inference-engine/thirdparty/clDNN/src/graph_optimizer/prepare_primitive_fusing.cpp +++ b/inference-engine/thirdparty/clDNN/src/graph_optimizer/prepare_primitive_fusing.cpp @@ -1,5 +1,5 @@ /* -// Copyright (c) 2018-2020 Intel Corporation +// Copyright (c) 2018-2021 Intel Corporation // // Licensed under the Apache License, Version 2.0 (the "License"); // you may not use this file except in compliance with the License. @@ -45,6 +45,7 @@ #include "space_to_depth_inst.h" #include "gather_inst.h" #include "scatter_update_inst.h" +#include "scatter_elements_update_inst.h" #include "reverse_sequence_inst.h" #include "shuffle_channels_inst.h" #include "space_to_batch_inst.h" @@ -539,6 +540,8 @@ void prepare_primitive_fusing::fuse_simple_primitives(program_impl &p) { should_fuse |= input_data.is_type(); + should_fuse |= input_data.is_type(); + should_fuse |= input_data.is_type(); should_fuse |= input_data.is_type(); @@ -601,6 +604,8 @@ void prepare_primitive_fusing::fuse_simple_primitives(program_impl &p) { should_fuse |= input_data.is_type(); + should_fuse |= input_data.is_type(); + should_fuse |= input_data.is_type(); should_fuse |= input_data.is_type(); @@ -685,6 +690,8 @@ void prepare_primitive_fusing::fuse_simple_primitives(program_impl &p) { should_fuse |= input_data.is_type() && quantize_node.get_scale_shift_opt(); + should_fuse |= input_data.is_type() && quantize_node.get_scale_shift_opt(); + should_fuse |= input_data.is_type() && quantize_node.get_scale_shift_opt(); should_fuse |= input_data.is_type() && quantize_node.get_scale_shift_opt(); @@ -738,6 +745,7 @@ void prepare_primitive_fusing::fuse_simple_primitives(program_impl &p) { (parents[i]->is_type()) || (parents[i]->is_type() && eltwise_supports_fusings(parents[i]->as())) || (parents[i]->is_type()) || + (parents[i]->is_type()) || (parents[i]->is_type() && pooling_supports_fusings(parents[i]->as())) || (parents[i]->is_type() && dts_supports_fusings(parents[i]->as())) || (parents[i]->is_type() && reduce_supports_fusings(parents[i]->as())); diff --git a/inference-engine/thirdparty/clDNN/src/include/memory_impl.h b/inference-engine/thirdparty/clDNN/src/include/memory_impl.h index 0219319994b0dd..445c6a3e7f1957 100644 --- a/inference-engine/thirdparty/clDNN/src/include/memory_impl.h +++ b/inference-engine/thirdparty/clDNN/src/include/memory_impl.h @@ -84,7 +84,7 @@ struct simple_attached_memory : memory_impl { void unlock() override {} void fill(unsigned char, event_impl::ptr) override {} shared_mem_params get_internal_params() const override { return { shared_mem_type::shared_mem_empty, nullptr, nullptr, nullptr, -#ifdef WIN32 +#ifdef _WIN32 nullptr, #else 0, diff --git a/inference-engine/thirdparty/clDNN/src/include/scatter_elements_update_inst.h b/inference-engine/thirdparty/clDNN/src/include/scatter_elements_update_inst.h new file mode 100644 index 00000000000000..b3aa5d5fa20da2 --- /dev/null +++ b/inference-engine/thirdparty/clDNN/src/include/scatter_elements_update_inst.h @@ -0,0 +1,49 @@ +/* +// Copyright (c) 2020 Intel Corporation +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. +*/ + +/////////////////////////////////////////////////////////////////////////////////////////////////// +#pragma once +#include "api/scatter_elements_update.hpp" +#include "primitive_inst.h" +#include + +namespace cldnn { +template <> +struct typed_program_node : public typed_program_node_base { + using parent = typed_program_node_base; + +public: + using parent::parent; + + program_node& input(size_t index = 0) const { return get_dependency(index); } +}; + +using scatter_elements_update_node = typed_program_node; + +template <> +class typed_primitive_inst : public typed_primitive_inst_base { + using parent = typed_primitive_inst_base; + +public: + static layout calc_output_layout(scatter_elements_update_node const& node); + static std::string to_string(scatter_elements_update_node const& node); + +public: + typed_primitive_inst(network_impl& network, scatter_elements_update_node const& desc); +}; + +using scatter_elements_update_inst = typed_primitive_inst; +} // namespace cldnn diff --git a/inference-engine/thirdparty/clDNN/src/memory.cpp b/inference-engine/thirdparty/clDNN/src/memory.cpp index 876adcfe484df6..26fb4b32b2a290 100644 --- a/inference-engine/thirdparty/clDNN/src/memory.cpp +++ b/inference-engine/thirdparty/clDNN/src/memory.cpp @@ -32,7 +32,7 @@ memory memory::allocate(const engine& engine, const layout& layout, uint32_t net memory memory::share_buffer(const engine& engine, const layout& layout, shared_handle buf, uint32_t net_id) { shared_mem_params params = { shared_mem_type::shared_mem_buffer, nullptr, nullptr, buf, -#ifdef WIN32 +#ifdef _WIN32 nullptr, #else 0, @@ -43,7 +43,7 @@ memory memory::share_buffer(const engine& engine, const layout& layout, shared_h memory memory::share_image(const engine& engine, const layout& layout, shared_handle img, uint32_t net_id) { shared_mem_params params = { shared_mem_type::shared_mem_image, nullptr, nullptr, img, -#ifdef WIN32 +#ifdef _WIN32 nullptr, #else 0, @@ -52,7 +52,7 @@ memory memory::share_image(const engine& engine, const layout& layout, shared_ha return memory(engine.get()->reinterpret_handle(layout, ¶ms, net_id).detach()); } -#ifdef WIN32 +#ifdef _WIN32 memory memory::share_surface(const engine& engine, const layout& layout, shared_handle surf, uint32_t plane, uint32_t net_id) { shared_mem_params params = { shared_mem_type::shared_mem_vasurface, nullptr, nullptr, nullptr, surf, plane }; diff --git a/inference-engine/thirdparty/clDNN/src/memory_pool.cpp b/inference-engine/thirdparty/clDNN/src/memory_pool.cpp index 290c5e28efcf82..a3930abe278dcc 100644 --- a/inference-engine/thirdparty/clDNN/src/memory_pool.cpp +++ b/inference-engine/thirdparty/clDNN/src/memory_pool.cpp @@ -90,7 +90,7 @@ memory_impl::ptr memory_pool::get_memory(const layout& layout, const shared_mem_ params, net_id), false }; return mem_impl; -#ifdef WIN32 +#ifdef _WIN32 } else if (params->mem_type == shared_mem_type::shared_mem_dxbuffer) { memory_impl::ptr mem_impl{ new gpu::gpu_dx_buffer(engine_impl::ptr(_engine), layout, params, diff --git a/inference-engine/thirdparty/clDNN/src/scatter_elements_update.cpp b/inference-engine/thirdparty/clDNN/src/scatter_elements_update.cpp new file mode 100644 index 00000000000000..9b68c5ae7dd4d7 --- /dev/null +++ b/inference-engine/thirdparty/clDNN/src/scatter_elements_update.cpp @@ -0,0 +1,72 @@ +/* +// Copyright (c) 2020 Intel Corporation +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. +*/ + +#include "scatter_elements_update_inst.h" + +#include "primitive_type_base.h" +#include "error_handler.h" +#include "json_object.h" +#include + +namespace cldnn { +primitive_type_id scatter_elements_update::type_id() { + static primitive_type_base instance; + return &instance; +} + +layout scatter_elements_update_inst::calc_output_layout(scatter_elements_update_node const& node) { + auto desc = node.get_primitive(); + + const int32_t axis = desc->axis; + const size_t input_number_of_dims = node.input(0).get_output_layout().size.sizes().size(); + + auto input_layout = node.input(0).get_output_layout(); + + auto output_shape = input_layout.size; + auto input_format = input_layout.format; + auto output_type = input_layout.data_type; + + if (node.has_fused_primitives()) { + output_type = node.get_fused_output_layout().data_type; + } + + if (static_cast(axis) < 0 || static_cast(axis) >= input_number_of_dims) + CLDNN_ERROR_MESSAGE(node.id(), "Incorrect axis value for ScatterElementsUpdate: Axis must be positive and less than the input tensor dimension."); + + return layout{output_type, input_format, output_shape}; +} + +std::string scatter_elements_update_inst::to_string(scatter_elements_update_node const& node) { + auto desc = node.get_primitive(); + auto node_info = node.desc_to_json(); + auto& input = node.input(); + + std::stringstream primitive_description; + + json_composite scatter_elements_update_info; + scatter_elements_update_info.add("input id", input.id()); + scatter_elements_update_info.add("axis", desc->axis); + scatter_elements_update_info.add("output shape", node.input(0).get_output_layout().size.to_string()); + + node_info->add("scatter_elements_update info", scatter_elements_update_info); + node_info->dump(primitive_description); + + return primitive_description.str(); +} + +scatter_elements_update_inst::typed_primitive_inst(network_impl& network, scatter_elements_update_node const& node) : parent(network, node) {} + +} // namespace cldnn diff --git a/inference-engine/thirdparty/clDNN/tests/test_cases/fusings_gpu_test.cpp b/inference-engine/thirdparty/clDNN/tests/test_cases/fusings_gpu_test.cpp index 24b4ca09f4af78..4dfa0cd65a0fa8 100644 --- a/inference-engine/thirdparty/clDNN/tests/test_cases/fusings_gpu_test.cpp +++ b/inference-engine/thirdparty/clDNN/tests/test_cases/fusings_gpu_test.cpp @@ -1,5 +1,5 @@ /* -// Copyright (c) 2019-2020 Intel Corporation +// Copyright (c) 2019-2021 Intel Corporation // // Licensed under the Apache License, Version 2.0 (the "License"); // you may not use this file except in compliance with the License. @@ -35,6 +35,7 @@ #include "api/permute.hpp" #include "api/gather.hpp" #include "api/scatter_update.hpp" +#include "api/scatter_elements_update.hpp" #include "api/depth_to_space.hpp" #include "api/space_to_depth.hpp" #include "api/batch_to_space.hpp" @@ -1483,6 +1484,45 @@ INSTANTIATE_TEST_CASE_P(fusings_gpu, conv_int8_scale, bc_test_params{CASE_CONV3D_S8S8_5, 2, 3}, }), ); +class conv_int8_eltwise : public ConvFusingTest {}; +TEST_P(conv_int8_eltwise, fp16_eltwise_out) { + auto p = GetParam(); + create_topologies(input_layout("input", get_input_layout(p)), + data("weights", get_mem(get_weights_layout(p))), + data("bias", get_mem(get_bias_layout(p))), + data("scale_data", get_mem(get_per_channel_layout(p), 1.0f/p.kernel.count())), + convolution("conv_prim", "input", {"weights"}, {"bias"}, p.groups, p.stride, p.pad, p.dilation), + eltwise("scale", {"conv_prim", "scale_data"}, eltwise_mode::prod, data_types::f16), + reorder("reorder_bfyx", "scale", p.default_format, data_types::f32) + ); + + tolerance = 1e-5f; + execute(p); +} + +INSTANTIATE_TEST_CASE_P(fusings_gpu, conv_int8_eltwise, + ::testing::ValuesIn(std::vector{ + bc_test_params{CASE_CONV_U8S8_1, 2, 3}, + bc_test_params{CASE_CONV_U8S8_2, 2, 3}, + bc_test_params{CASE_CONV_U8S8_3, 2, 3}, + bc_test_params{CASE_CONV_U8S8_4, 2, 3}, + bc_test_params{CASE_CONV_S8S8_1, 2, 3}, + bc_test_params{CASE_CONV_S8S8_2, 2, 3}, + bc_test_params{CASE_CONV_S8S8_3, 2, 3}, + bc_test_params{CASE_CONV_S8S8_4, 2, 3}, + + bc_test_params{CASE_CONV3D_U8S8_1, 2, 3}, + bc_test_params{CASE_CONV3D_U8S8_2, 2, 3}, + bc_test_params{CASE_CONV3D_U8S8_3, 2, 3}, + bc_test_params{CASE_CONV3D_U8S8_4, 2, 3}, + bc_test_params{CASE_CONV3D_U8S8_5, 2, 3}, + bc_test_params{CASE_CONV3D_S8S8_1, 2, 3}, + bc_test_params{CASE_CONV3D_S8S8_2, 2, 3}, + bc_test_params{CASE_CONV3D_S8S8_3, 2, 3}, + bc_test_params{CASE_CONV3D_S8S8_4, 2, 3}, + bc_test_params{CASE_CONV3D_S8S8_5, 2, 3}, + }), ); + class conv_int8_scale_shift_swish : public ConvFusingTest {}; TEST_P(conv_int8_scale_shift_swish, basic) { auto p = GetParam(); @@ -5743,6 +5783,206 @@ INSTANTIATE_TEST_CASE_P(fusings_gpu, scatter_update_scale_activation, scatter_update_test_params{ CASE_SCATTER_UPDATE_5D_FP16_5, 2, 4 }, }), ); +class scatter_update_scale_activation_eltwise : public ScatterUpdatePrimitiveFusingTest {}; +TEST_P(scatter_update_scale_activation_eltwise, basic) { + auto p = GetParam(); + create_topologies(input_layout("input", get_input_layout(p)), + data("scatter_update_indices", get_repeatless_mem(get_indices_layout(p), 0, static_cast(get_axis_dim(p)) - 1)), + data("scatter_update_updates", get_mem(get_updates_layout(p), 0, 1000)), + data("scale_data", get_mem(get_per_channel_layout(p), -10, 10)), + data("eltw_data", get_mem(layout(p.default_type, p.default_format, p.dictionary_shape))), + scatter_update("scatter_update_prim", "input", "scatter_update_indices", "scatter_update_updates", p.axis), + activation("activation", "scatter_update_prim", activation_func::abs), + eltwise("eltw", {"activation", "eltw_data"}, eltwise_mode::sum, p.default_type), + scale("scale", "eltw", "scale_data"), + reorder("reorder_bfyx", "scale", p.default_format, data_types::f32) + ); + tolerance = 1e-5f; + execute(p); +} + +INSTANTIATE_TEST_CASE_P(fusings_gpu, scatter_update_scale_activation_eltwise, + ::testing::ValuesIn(std::vector { + scatter_update_test_params{ CASE_SCATTER_UPDATE_FP32_1, 3, 5 }, + scatter_update_test_params{ CASE_SCATTER_UPDATE_FP32_2, 3, 5 }, + scatter_update_test_params{ CASE_SCATTER_UPDATE_FP32_3, 3, 5 }, + scatter_update_test_params{ CASE_SCATTER_UPDATE_FP32_4, 3, 5 }, + scatter_update_test_params{ CASE_SCATTER_UPDATE_FP32_5, 3, 5 }, + + scatter_update_test_params{ CASE_SCATTER_UPDATE_FP16_1, 3, 5 }, + scatter_update_test_params{ CASE_SCATTER_UPDATE_FP16_2, 3, 5 }, + scatter_update_test_params{ CASE_SCATTER_UPDATE_FP16_3, 3, 5 }, + scatter_update_test_params{ CASE_SCATTER_UPDATE_FP16_4, 3, 5 }, + scatter_update_test_params{ CASE_SCATTER_UPDATE_FP16_5, 3, 5 }, + + + scatter_update_test_params{ CASE_SCATTER_UPDATE_5D_FP32_1, 3, 5 }, + scatter_update_test_params{ CASE_SCATTER_UPDATE_5D_FP32_2, 3, 5 }, + scatter_update_test_params{ CASE_SCATTER_UPDATE_5D_FP32_3, 3, 5 }, + scatter_update_test_params{ CASE_SCATTER_UPDATE_5D_FP32_4, 3, 5 }, + scatter_update_test_params{ CASE_SCATTER_UPDATE_5D_FP32_5, 3, 5 }, + + scatter_update_test_params{ CASE_SCATTER_UPDATE_5D_FP16_1, 3, 5 }, + scatter_update_test_params{ CASE_SCATTER_UPDATE_5D_FP16_2, 3, 5 }, + scatter_update_test_params{ CASE_SCATTER_UPDATE_5D_FP16_3, 3, 5 }, + scatter_update_test_params{ CASE_SCATTER_UPDATE_5D_FP16_4, 3, 5 }, + scatter_update_test_params{ CASE_SCATTER_UPDATE_5D_FP16_5, 3, 5 }, + +}), ); + +/* ----------------------------------------------------------------------------------------------------- */ +/* ------------------------------------------ ScatterElementsUpdate cases --------------------------------------------- */ +/* ----------------------------------------------------------------------------------------------------- */ + +struct scatter_elements_update_test_params { + tensor input_shape; + tensor indices_shape; + cldnn::scatter_elements_update::scatter_elements_update_axis axis; + data_types data_type; + format input_format; + data_types default_type; + format default_format; + size_t expected_fused_primitives; + size_t expected_not_fused_primitives; +}; + +// input shape along the update axis should be larger than the total number of elements in the update tensor. +// This is not a limitation of operation itself, but a limitation of test implementation. +#define CASE_SCATTER_ELEMENTS_UPDATE_FP32_1 {8, 4, 1, 1}, {2, 4, 1, 1}, cldnn::scatter_elements_update::scatter_elements_update_axis::along_b, data_types::f32, format::bfyx, data_types::f32, format::bfyx +#define CASE_SCATTER_ELEMENTS_UPDATE_FP32_2 {2, 8, 1, 2}, {2, 2, 1, 2}, cldnn::scatter_elements_update::scatter_elements_update_axis::along_f, data_types::f32, format::bfyx, data_types::f32, format::bfyx +#define CASE_SCATTER_ELEMENTS_UPDATE_FP32_3 {2, 3, 10, 10}, {2, 2, 1, 2}, cldnn::scatter_elements_update::scatter_elements_update_axis::along_y, data_types::f32, format::bfyx, data_types::f32, format::bfyx + +#define CASE_SCATTER_ELEMENTS_UPDATE_FP16_1 {2, 2, 14, 12}, {2, 2, 3, 1}, cldnn::scatter_elements_update::scatter_elements_update_axis::along_x, data_types::f16, format::bfyx, data_types::f16, format::bfyx + +#define CASE_SCATTER_ELEMENTS_UPDATE_5D_FP32_1 {24, 3, 1, 4, 1}, {4, 3, 1, 2, 1}, cldnn::scatter_elements_update::scatter_elements_update_axis::along_b, data_types::f32, format::bfzyx, data_types::f32, format::bfzyx +#define CASE_SCATTER_ELEMENTS_UPDATE_5D_FP32_2 {2, 17, 2, 2, 2}, {1, 2, 2, 2, 2}, cldnn::scatter_elements_update::scatter_elements_update_axis::along_f, data_types::f32, format::bfzyx, data_types::f32, format::bfzyx +#define CASE_SCATTER_ELEMENTS_UPDATE_5D_FP32_3 {5, 3, 2, 20, 22}, {5, 1, 1, 2, 2}, cldnn::scatter_elements_update::scatter_elements_update_axis::along_y, data_types::f32, format::bfzyx, data_types::f32, format::bfzyx + +#define CASE_SCATTER_ELEMENTS_UPDATE_5D_FP16_1 {13, 2, 1, 2, 1}, {2, 2, 1, 2, 1}, cldnn::scatter_elements_update::scatter_elements_update_axis::along_b, data_types::f16, format::bfzyx, data_types::f16, format::bfzyx +#define CASE_SCATTER_ELEMENTS_UPDATE_5D_FP16_2 {1, 13, 1, 2, 1}, {1, 2, 1, 2, 1}, cldnn::scatter_elements_update::scatter_elements_update_axis::along_f, data_types::f16, format::bfzyx, data_types::f16, format::bfzyx +#define CASE_SCATTER_ELEMENTS_UPDATE_5D_FP16_3 {2, 3, 1, 13, 13}, {2, 3, 1, 2, 1}, cldnn::scatter_elements_update::scatter_elements_update_axis::along_y, data_types::f16, format::bfzyx, data_types::f16, format::bfzyx + +class ScatterElementsUpdatePrimitiveFusingTest : public ::BaseFusingTest { +public: + void execute(scatter_elements_update_test_params& p) { + + auto input_prim = get_mem(get_input_layout(p)); + network network_not_fused(this->engine, this->topology_non_fused, bo_not_fused); + network network_fused(this->engine, this->topology_fused, bo_fused); + network_fused.set_input_data("input", input_prim); + network_not_fused.set_input_data("input", input_prim); + compare(network_not_fused, network_fused, p); + } + + layout get_input_layout(scatter_elements_update_test_params& p) { + return layout{ p.data_type, p.input_format, p.input_shape }; + } + + layout get_indices_layout(scatter_elements_update_test_params& p) { + return layout{ p.data_type, p.input_format, p.indices_shape }; + } + + layout get_updates_layout(scatter_elements_update_test_params& p) { + return layout{ p.data_type, p.input_format, p.indices_shape }; + } + + size_t get_axis_dim(scatter_elements_update_test_params& p) { + switch (p.axis) { + case cldnn::scatter_elements_update::scatter_elements_update_axis::along_x: + return p.input_shape.spatial[0]; + case cldnn::scatter_elements_update::scatter_elements_update_axis::along_y: + return p.input_shape.spatial[1]; + case cldnn::scatter_elements_update::scatter_elements_update_axis::along_z: + return p.input_shape.spatial[2]; + case cldnn::scatter_elements_update::scatter_elements_update_axis::along_w: + return p.input_shape.spatial[3]; + case cldnn::scatter_elements_update::scatter_elements_update_axis::along_f: + return p.input_shape.feature[0]; + case cldnn::scatter_elements_update::scatter_elements_update_axis::along_b: + return p.input_shape.batch[0]; + default: + return 1; + } + } + + layout get_per_channel_layout(scatter_elements_update_test_params& p) { + return layout{ p.default_type, p.default_format, tensor{1, p.input_shape.feature[0], 1, 1} }; + } +}; + +class scatter_elements_update_quantize : public ScatterElementsUpdatePrimitiveFusingTest {}; +TEST_P(scatter_elements_update_quantize, basic) { + auto p = GetParam(); + const auto &seu = scatter_elements_update("scatter_elements_update_prim", "input", "scatter_elements_update_indices", "scatter_elements_update_updates", p.axis); + const auto &q = quantize("quantize", "scatter_elements_update_prim", "in_lo", "in_hi", "out_lo", "out_hi", 255, data_types::i8); + const auto &r = reorder("reorder_bfyx", "quantize", p.default_format, data_types::f32); + create_topologies(input_layout("input", get_input_layout(p)), + data("scatter_elements_update_indices", get_repeatless_mem(get_indices_layout(p), 0, static_cast(get_axis_dim(p)) - 1)), + data("scatter_elements_update_updates", get_mem(get_updates_layout(p), 0, 100)), + data("in_lo", get_mem(get_per_channel_layout(p), min_random, 0)), + data("in_hi", get_mem(get_per_channel_layout(p), 1, max_random)), + data("out_lo", get_mem(get_single_element_layout(p), -127)), + data("out_hi", get_mem(get_single_element_layout(p), 127)), + seu, + q, + r + ); + tolerance = 1.f; + execute(p); +} + +INSTANTIATE_TEST_CASE_P(fusings_gpu, scatter_elements_update_quantize, + ::testing::ValuesIn(std::vector{ + scatter_elements_update_test_params{ CASE_SCATTER_ELEMENTS_UPDATE_FP32_1, 2, 3 }, + scatter_elements_update_test_params{ CASE_SCATTER_ELEMENTS_UPDATE_FP32_2, 2, 3 }, + scatter_elements_update_test_params{ CASE_SCATTER_ELEMENTS_UPDATE_FP32_3, 2, 3 }, + + scatter_elements_update_test_params{ CASE_SCATTER_ELEMENTS_UPDATE_FP16_1, 2, 3 }, + + scatter_elements_update_test_params{ CASE_SCATTER_ELEMENTS_UPDATE_5D_FP32_1, 2, 3 }, + scatter_elements_update_test_params{ CASE_SCATTER_ELEMENTS_UPDATE_5D_FP32_2, 2, 3 }, + scatter_elements_update_test_params{ CASE_SCATTER_ELEMENTS_UPDATE_5D_FP32_3, 2, 3 }, + + scatter_elements_update_test_params{ CASE_SCATTER_ELEMENTS_UPDATE_5D_FP16_1, 2, 3 }, + scatter_elements_update_test_params{ CASE_SCATTER_ELEMENTS_UPDATE_5D_FP16_2, 2, 3 }, + scatter_elements_update_test_params{ CASE_SCATTER_ELEMENTS_UPDATE_5D_FP16_3, 2, 3 }, +}), ); + +class scatter_elements_update_scale_activation_eltwise : public ScatterElementsUpdatePrimitiveFusingTest {}; +TEST_P(scatter_elements_update_scale_activation_eltwise, basic) { + auto p = GetParam(); + create_topologies(input_layout("input", get_input_layout(p)), + data("scatter_elements_update_indices", get_repeatless_mem(get_indices_layout(p), 0, static_cast(get_axis_dim(p)) - 1)), + data("scatter_elements_update_updates", get_mem(get_updates_layout(p), 0, 100)), + data("scale_data", get_mem(get_per_channel_layout(p), -3, 3)), + data("eltwise_data", get_mem(layout{ p.data_type, p.input_format, p.input_shape})), + scatter_elements_update("scatter_elements_update_prim", "input", "scatter_elements_update_indices", "scatter_elements_update_updates", p.axis), + activation("activation", "scatter_elements_update_prim", activation_func::abs), + scale("scale", "activation", "scale_data"), + eltwise("eltwise", {"scale", "eltwise_data"}, eltwise_mode::sum, p.data_type), + reorder("reorder_bfyx", "eltwise", p.default_format, data_types::f32) + ); + tolerance = 1.0f; + execute(p); +} + +INSTANTIATE_TEST_CASE_P(fusings_gpu, scatter_elements_update_scale_activation_eltwise, + ::testing::ValuesIn(std::vector{ + scatter_elements_update_test_params{ CASE_SCATTER_ELEMENTS_UPDATE_FP32_1, 2, 5 }, + scatter_elements_update_test_params{ CASE_SCATTER_ELEMENTS_UPDATE_FP32_2, 2, 5 }, + scatter_elements_update_test_params{ CASE_SCATTER_ELEMENTS_UPDATE_FP32_3, 2, 5 }, + + scatter_elements_update_test_params{ CASE_SCATTER_ELEMENTS_UPDATE_FP16_1, 2, 5 }, + + scatter_elements_update_test_params{ CASE_SCATTER_ELEMENTS_UPDATE_5D_FP32_1, 2, 5 }, + scatter_elements_update_test_params{ CASE_SCATTER_ELEMENTS_UPDATE_5D_FP32_2, 2, 5 }, + scatter_elements_update_test_params{ CASE_SCATTER_ELEMENTS_UPDATE_5D_FP32_3, 2, 5 }, + + scatter_elements_update_test_params{ CASE_SCATTER_ELEMENTS_UPDATE_5D_FP16_1, 2, 5 }, + scatter_elements_update_test_params{ CASE_SCATTER_ELEMENTS_UPDATE_5D_FP16_2, 2, 5 }, + scatter_elements_update_test_params{ CASE_SCATTER_ELEMENTS_UPDATE_5D_FP16_3, 2, 5 }, +}), ); + /* ------------------------------------------------------------------------------------------------------------ */ /* ---------------------------------------- PERMUTE FUSE cases -------------------------------------------------- */ /* ------------------------------------------------------------------------------------------------------------ */ @@ -6693,7 +6933,7 @@ TEST_P(eltwise_fp32_fused_prims, eltwise_activation) { create_topologies(input_layout("input", get_input_layout(p)), input_layout("input2", get_input_layout2(p)), data("eltwise_data", get_mem(get_input_layout2(p), -10, 10)), - eltwise("eltwise1", {"input", "input2"}, p.mode, p.default_type), + eltwise("eltwise1", {"input", "input2"}, p.mode, data_types::f32), eltwise("eltwise2", {"eltwise1", "eltwise_data"}, eltwise_mode::prod, p.default_type), activation("activation", "eltwise2", activation_func::abs), reorder("out", "activation", p.default_format, data_types::f32)); diff --git a/inference-engine/thirdparty/clDNN/tests/test_cases/lstm_gpu_test.cpp b/inference-engine/thirdparty/clDNN/tests/test_cases/lstm_gpu_test.cpp index 3a6ea85184bd53..ed303de4cec2bf 100644 --- a/inference-engine/thirdparty/clDNN/tests/test_cases/lstm_gpu_test.cpp +++ b/inference-engine/thirdparty/clDNN/tests/test_cases/lstm_gpu_test.cpp @@ -35,8 +35,8 @@ #include #include -#ifdef WIN32 -#pragma warning(disable: 4503) +#ifdef _WIN32 +# pragma warning(disable: 4503) #endif using namespace cldnn; @@ -605,13 +605,15 @@ void generic_lstm_gpu_test(int layers, int sequence_len, int direction, int batc if (i == 0) { topology.add(lstm(lstm_id, lstm_inputs, weights_id, recurrent_id, hasBias ? biases_id : "", hasInitialHidden ? hidden_id : "", hasInitialCell ? cell_id : "", "", - clip_threshold, input_forget, {}, {}, + clip_threshold, input_forget, + { activation_func::logistic, activation_func::hyperbolic_tan, activation_func::hyperbolic_tan }, {}, lstm_output_selection::sequence, default_offset_type)); } else { topology.add(lstm(lstm_id, { prev_lstm_id }, weights_id, recurrent_id, hasBias ? biases_id : "", hasInitialHidden ? hidden_id : "", hasInitialCell ? cell_id : "", "", - clip_threshold, input_forget, {}, {}, + clip_threshold, input_forget, + { activation_func::logistic, activation_func::hyperbolic_tan, activation_func::hyperbolic_tan }, {}, lstm_output_selection::sequence, default_offset_type)); } prev_lstm_id = lstm_id; @@ -732,7 +734,8 @@ void lstm_gpu_output_test(const lstm_output_selection& output_selection, int dir topology.add(input_layout("hidden", hidden.get_layout())); topology.add(input_layout("cell", cell.get_layout())); topology.add(lstm("lstm", lstm_inputs, "weights", "recurrent", - "biases", "hidden", "cell", "", 0, false, {}, {}, + "biases", "hidden", "cell", "", 0, false, + { activation_func::logistic, activation_func::hyperbolic_tan, activation_func::hyperbolic_tan }, {}, output_selection, default_offset_type)); if (emit_last_cell) { @@ -894,7 +897,8 @@ void lstm_gpu_format_test(const cldnn::format& format, int directions) { topology.add(input_layout("hidden", hidden.get_layout())); topology.add(input_layout("cell", cell.get_layout())); topology.add(lstm("lstm"+get_string_id(0), lstm_inputs, "weights", "recurrent", - "biases", "hidden", "cell", "", 0, false, {}, {}, + "biases", "hidden", "cell", "", 0, false, + { activation_func::logistic, activation_func::hyperbolic_tan, activation_func::hyperbolic_tan }, {}, output_selection, default_offset_type)); if (emit_last_cell) @@ -1066,7 +1070,8 @@ void lstm_gpu_users_test() { topology.add(input_layout("hidden", hidden.get_layout())); topology.add(input_layout("cell", cell.get_layout())); topology.add(lstm("lstm", lstm_inputs, "weights", "recurrent", - "biases", "hidden", "cell", "", 0, false, {}, {}, + "biases", "hidden", "cell", "", 0, false, + { activation_func::logistic, activation_func::hyperbolic_tan, activation_func::hyperbolic_tan }, {}, lstm_output_selection::hidden, default_offset_type)); std::vector output_ids_offsets {"lstm", "hidden"}; topology.add(concatenation("concatenation", output_ids_offsets, concatenation::along_f)); @@ -1210,13 +1215,15 @@ void lstm_gpu_concatenated_input_test(int layers, int sequence_len, int directio if (i == 0) { topology.add(lstm(lstm_id, { "input" }, weights_id, recurrent_id, has_bias ? biases_id : "", has_initial_hidden ? hidden_id : "", has_initial_cell ? cell_id : "", "", - clip_threshold, input_forget, {}, {}, + clip_threshold, input_forget, + { activation_func::logistic, activation_func::hyperbolic_tan, activation_func::hyperbolic_tan }, {}, lstm_output_selection::sequence_cell, default_offset_type)); } else { topology.add(lstm(lstm_id, { prev_node_id }, weights_id, recurrent_id, has_bias ? biases_id : "", has_initial_hidden ? hidden_id : "", has_initial_cell ? cell_id : "", "", - clip_threshold, input_forget, {}, {}, + clip_threshold, input_forget, + { activation_func::logistic, activation_func::hyperbolic_tan, activation_func::hyperbolic_tan }, {}, lstm_output_selection::sequence_cell, default_offset_type)); } @@ -1536,7 +1543,8 @@ void lstm_gpu_chain_test(int batch_size, int input_size, int hidden_size, topology.add(lstm(lstm_id, lstm_inputs, weights_id, recurrent_id, has_bias ? biases_id : "", initial_hidden_id, initial_cell_id, - "", clip_threshold, input_forget, {}, {}, + "", clip_threshold, input_forget, + { activation_func::logistic, activation_func::hyperbolic_tan, activation_func::hyperbolic_tan }, {}, output_selection_per_layer, default_offset_type)); } else @@ -1544,7 +1552,8 @@ void lstm_gpu_chain_test(int batch_size, int input_size, int hidden_size, topology.add(lstm(lstm_id, { output_sequence_ids[layer - 1] }, weights_id, recurrent_id, has_bias ? biases_id : "", initial_hidden_id, initial_cell_id, - "", clip_threshold, input_forget, {}, {}, + "", clip_threshold, input_forget, + { activation_func::logistic, activation_func::hyperbolic_tan, activation_func::hyperbolic_tan }, {}, output_selection_per_layer, default_offset_type)); } @@ -1734,35 +1743,35 @@ TEST(lstm_custom_gpu, generic_lstm_custom_no_bias_hidden_cell_f32) { // generic_lstm_gpu_test paramters: // layers, sequence, dir, batch, input, hidden, bias, initial_h, initial_cell, threshold, coupled_input_forget -TEST(DISABLED_lstm_gpu, generic_lstm_f32) { +TEST(lstm_gpu, generic_lstm_f32) { generic_lstm_gpu_test(1, 7, 1, 3, 3, 2, true, true, true); } -TEST(DISABLED_lstm_gpu, generic_lstm_no_bias_f32) { +TEST(lstm_gpu, generic_lstm_no_bias_f32) { generic_lstm_gpu_test(1, 7, 1, 3, 3, 2, false, true, true); } -TEST(DISABLED_lstm_gpu, generic_lstm_no_hidden_f32) { +TEST(lstm_gpu, generic_lstm_no_hidden_f32) { generic_lstm_gpu_test(1, 7, 1, 5, 4, 3, true, false, true); } -TEST(DISABLED_lstm_gpu, generic_lstm_no_bias_hidden_f32) { +TEST(lstm_gpu, generic_lstm_no_bias_hidden_f32) { generic_lstm_gpu_test(1, 7, 1, 5, 4, 3, false, false, true); } -TEST(DISABLED_lstm_gpu, generic_lstm_no_cell_f32) { +TEST(lstm_gpu, generic_lstm_no_cell_f32) { generic_lstm_gpu_test(1, 7, 1, 5, 4, 3, true, true, false); } -TEST(DISABLED_lstm_gpu, generic_lstm_no_bias_cell_f32) { +TEST(lstm_gpu, generic_lstm_no_bias_cell_f32) { generic_lstm_gpu_test(1, 7, 1, 5, 4, 3, false, true, false); } -TEST(DISABLED_lstm_gpu, generic_lstm_no_hidden_cell_f32) { +TEST(lstm_gpu, generic_lstm_no_hidden_cell_f32) { generic_lstm_gpu_test(1, 7, 1, 5, 4, 3, true, false, false); } -TEST(DISABLED_lstm_gpu, generic_lstm_no_bias_hidden_cell_f32) { +TEST(lstm_gpu, generic_lstm_no_bias_hidden_cell_f32) { generic_lstm_gpu_test(1, 7, 1, 5, 4, 3, false, false, false); } @@ -1770,7 +1779,7 @@ TEST(DISABLED_lstm_gpu, generic_lstm_clip_f32) { generic_lstm_gpu_test(1, 7, 1, 3, 3, 2, true, true, true, 0.3f, 0); } -TEST(DISABLED_lstm_gpu, generic_lstm_input_forget_f32) { +TEST(lstm_gpu, generic_lstm_input_forget_f32) { generic_lstm_gpu_test(1, 7, 1, 3, 3, 2, true, true, true, 0.f, 1); } @@ -1778,116 +1787,116 @@ TEST(DISABLED_lstm_gpu, generic_lstm_clip_input_forget_f32) { generic_lstm_gpu_test(1, 7, 1, 3, 3, 2, true, true, true, 0.3f, 1); } -TEST(DISABLED_lstm_gpu, generic_lstm_offset_order_ifoz_f32) { +TEST(lstm_gpu, generic_lstm_offset_order_ifoz_f32) { default_offset_type = lstm_weights_order::ifoz; generic_lstm_gpu_test(1, 7, 1, 3, 3, 2, true, true, true); default_offset_type = lstm_weights_order::iofz; } -TEST(DISABLED_lstm_gpu, generic_lstm_canonical_f32) { +TEST(lstm_gpu, generic_lstm_canonical_f32) { generic_lstm_gpu_test(1, 1, 1, 1, 1, 1, true, true, true); } // bidirectional support -TEST(DISABLED_lstm_gpu, generic_lstm_bi_f32) { +TEST(lstm_gpu, generic_lstm_bi_f32) { generic_lstm_gpu_test(1, 7, 2, 2, 3, 4, false, false, false); } -TEST(DISABLED_lstm_gpu, generic_lstm_bi_bias_f32) { +TEST(lstm_gpu, generic_lstm_bi_bias_f32) { generic_lstm_gpu_test(1, 7, 2, 2, 3, 4, true, false, false); } -TEST(DISABLED_lstm_gpu, generic_lstm_bi_bias_hidden_f32) { +TEST(lstm_gpu, generic_lstm_bi_bias_hidden_f32) { generic_lstm_gpu_test(1, 7, 2, 2, 3, 4, true, true, false); } -TEST(DISABLED_lstm_gpu, generic_lstm_bi_bias_hidden_cell_f32) { +TEST(lstm_gpu, generic_lstm_bi_bias_hidden_cell_f32) { generic_lstm_gpu_test(1, 7, 2, 2, 3, 4, true, true, true); } // multi-layer support -TEST(DISABLED_lstm_gpu, generic_lstm_stacked_no_seq_f32) { +TEST(lstm_gpu, generic_lstm_stacked_no_seq_f32) { generic_lstm_gpu_test(4, 1, 1, 3, 3, 2, true, true, true); } -TEST(DISABLED_lstm_gpu, generic_lstm_stacked_seq_f32) { +TEST(lstm_gpu, generic_lstm_stacked_seq_f32) { generic_lstm_gpu_test(4, 7, 1, 3, 3, 2, true, true, true); } -TEST(DISABLED_lstm_gpu, generic_lstm_stacked_bi_f32) { +TEST(lstm_gpu, generic_lstm_stacked_bi_f32) { generic_lstm_gpu_test(4, 7, 2, 3, 3, 2, true, true, true); } -TEST(DISABLED_lstm_gpu, generic_lstm_stacked_seq_bi_f32) { +TEST(lstm_gpu, generic_lstm_stacked_seq_bi_f32) { generic_lstm_gpu_test(4, 7, 2, 3, 3, 2, true, true, true); } // optional outputs support -TEST(DISABLED_lstm_gpu, output_test_sequence_f32) { +TEST(lstm_gpu, output_test_sequence_f32) { lstm_gpu_output_test(lstm_output_selection::sequence, 1); } -TEST(DISABLED_lstm_gpu, output_test_hidden_f32) { +TEST(lstm_gpu, output_test_hidden_f32) { lstm_gpu_output_test(lstm_output_selection::hidden, 1); } -TEST(DISABLED_lstm_gpu, output_test_hidden_cell_f32) { +TEST(lstm_gpu, output_test_hidden_cell_f32) { lstm_gpu_output_test(lstm_output_selection::hidden_cell, 1); } -TEST(DISABLED_lstm_gpu, output_test_sequence_cell_f32) { +TEST(lstm_gpu, output_test_sequence_cell_f32) { lstm_gpu_output_test(lstm_output_selection::sequence_cell, 1); } -TEST(DISABLED_lstm_gpu, output_test_sequence_bi_f32) { +TEST(lstm_gpu, output_test_sequence_bi_f32) { lstm_gpu_output_test(lstm_output_selection::sequence, 2); } -TEST(DISABLED_lstm_gpu, output_test_hidden_bi_f32) { +TEST(lstm_gpu, output_test_hidden_bi_f32) { lstm_gpu_output_test(lstm_output_selection::hidden, 2); } -TEST(DISABLED_lstm_gpu, output_test_hidden_cell_bi_f32) { +TEST(lstm_gpu, output_test_hidden_cell_bi_f32) { lstm_gpu_output_test(lstm_output_selection::hidden_cell, 2); } -TEST(DISABLED_lstm_gpu, output_test_sequence_cell_bi_f32) { +TEST(lstm_gpu, output_test_sequence_cell_bi_f32) { lstm_gpu_output_test(lstm_output_selection::sequence_cell, 2); } // format tests -TEST(DISABLED_lstm_gpu, lstm_gpu_format_bfyx_f32) { +TEST(lstm_gpu, lstm_gpu_format_bfyx_f32) { lstm_gpu_format_test(cldnn::format::bfyx, 1); } -TEST(DISABLED_lstm_gpu, lstm_gpu_format_bfyx_bi_f32) { +TEST(lstm_gpu, lstm_gpu_format_bfyx_bi_f32) { lstm_gpu_format_test(cldnn::format::bfyx, 2); } -TEST(DISABLED_lstm_gpu, lstm_gpu_format_fyxb_f32) { +TEST(lstm_gpu, lstm_gpu_format_fyxb_f32) { lstm_gpu_format_test(cldnn::format::fyxb, 1); } -TEST(DISABLED_lstm_gpu, lstm_gpu_format_fyxb_bi_f32) { +TEST(lstm_gpu, lstm_gpu_format_fyxb_bi_f32) { lstm_gpu_format_test(cldnn::format::fyxb, 2); } // test for LSTM users' dependencies -TEST(DISABLED_lstm_gpu, lstm_users_f32) { +TEST(lstm_gpu, lstm_users_f32) { lstm_gpu_users_test(); } // Test for LSTM with concatenated input -TEST(DISABLED_lstm_gpu, generic_lstm_concatenated_input) { +TEST(lstm_gpu, generic_lstm_concatenated_input) { lstm_gpu_concatenated_input_test(1, 2, 2, 1, 1, 1, true, true, true); } -TEST(DISABLED_lstm_gpu, generic_lstm_concatenated_input_multi_layer) { +TEST(lstm_gpu, generic_lstm_concatenated_input_multi_layer) { lstm_gpu_concatenated_input_test(5, 5, 2, 1, 1, 4, true, true, true); } // test for LSTM with chain and stack (multilayer) -TEST(DISABLED_lstm_gpu, generic_lstm_chained_unidirectional_f32) { +TEST(lstm_gpu, generic_lstm_chained_unidirectional_f32) { // batch size = 1 // input size = 2 // hidden size = 4 @@ -1899,7 +1908,7 @@ TEST(DISABLED_lstm_gpu, generic_lstm_chained_unidirectional_f32) { lstm_gpu_chain_test(1, 2, 4, 1, 1, 2, 1, lstm_output_selection::sequence_cell); } -TEST(DISABLED_lstm_gpu, generic_lstm_chained_bidirectional_f32) { +TEST(lstm_gpu, generic_lstm_chained_bidirectional_f32) { // batch size = 1 // input size = 2 // hidden size = 4 @@ -1911,7 +1920,7 @@ TEST(DISABLED_lstm_gpu, generic_lstm_chained_bidirectional_f32) { lstm_gpu_chain_test(1, 2, 4, 2, 1, 1, 1, lstm_output_selection::sequence_cell); } -TEST(DISABLED_lstm_gpu, generic_lstm_chained_no_stack_bidirectional_f32) { +TEST(lstm_gpu, generic_lstm_chained_no_stack_bidirectional_f32) { // batch size = 2 // input size = 2 // hidden size = 4 @@ -1923,7 +1932,7 @@ TEST(DISABLED_lstm_gpu, generic_lstm_chained_no_stack_bidirectional_f32) { lstm_gpu_chain_test(2, 2, 4, 2, 1, 2, 5, lstm_output_selection::sequence_cell); } -TEST(DISABLED_lstm_gpu, generic_lstm_chained_stacked_bidirectional_f32) { +TEST(lstm_gpu, generic_lstm_chained_stacked_bidirectional_f32) { // batch size = 2 // input size = 2 // hidden size = 4 @@ -1972,35 +1981,35 @@ TEST(lstm_elt_gpu, generic_lstm_elt_no_cell_f16) { generic_lstm_elt_gpu_test(1, 1, 4, 6, 3, false); } -TEST(DISABLED_lstm_gpu, generic_lstm_f16) { +TEST(lstm_gpu, generic_lstm_f16) { generic_lstm_gpu_test(1, 7, 1, 3, 3, 2, true, true, true); } -TEST(DISABLED_lstm_gpu, generic_lstm_no_bias_f16) { +TEST(lstm_gpu, generic_lstm_no_bias_f16) { generic_lstm_gpu_test(1, 7, 1, 3, 3, 2, false, true, true); } -TEST(DISABLED_lstm_gpu, generic_lstm_no_hidden_f16) { +TEST(lstm_gpu, generic_lstm_no_hidden_f16) { generic_lstm_gpu_test(1, 7, 1, 5, 4, 3, true, false, true); } -TEST(DISABLED_lstm_gpu, generic_lstm_no_bias_hidden_f16) { +TEST(lstm_gpu, generic_lstm_no_bias_hidden_f16) { generic_lstm_gpu_test(1, 7, 1, 5, 4, 3, false, false, true); } -TEST(DISABLED_lstm_gpu, generic_lstm_no_cell_f16) { +TEST(lstm_gpu, generic_lstm_no_cell_f16) { generic_lstm_gpu_test(1, 7, 1, 5, 4, 3, true, true, false); } -TEST(DISABLED_lstm_gpu, generic_lstm_no_bias_cell_f16) { +TEST(lstm_gpu, generic_lstm_no_bias_cell_f16) { generic_lstm_gpu_test(1, 7, 1, 5, 4, 3, false, true, false); } -TEST(DISABLED_lstm_gpu, generic_lstm_no_hidden_cell_f16) { +TEST(lstm_gpu, generic_lstm_no_hidden_cell_f16) { generic_lstm_gpu_test(1, 7, 1, 5, 4, 3, true, false, false); } -TEST(DISABLED_lstm_gpu, generic_lstm_no_bias_hidden_cell_f16) { +TEST(lstm_gpu, generic_lstm_no_bias_hidden_cell_f16) { generic_lstm_gpu_test(1, 7, 1, 5, 4, 3, false, false, false); } @@ -2008,7 +2017,7 @@ TEST(DISABLED_lstm_gpu, generic_lstm_clip_f16) { generic_lstm_gpu_test(1, 7, 1, 3, 3, 2, true, true, true, 0.3f, 0); } -TEST(DISABLED_lstm_gpu, generic_lstm_input_forget_f16) { +TEST(lstm_gpu, generic_lstm_input_forget_f16) { generic_lstm_gpu_test(1, 7, 1, 3, 3, 2, true, true, true, 0.f, 1); } @@ -2016,35 +2025,35 @@ TEST(DISABLED_lstm_gpu, generic_lstm_clip_input_forget_f16) { generic_lstm_gpu_test(1, 7, 1, 3, 3, 2, true, true, true, 0.3f, 1); } -TEST(DISABLED_lstm_gpu, generic_lstm_offset_order_ifoz_f16) { +TEST(lstm_gpu, generic_lstm_offset_order_ifoz_f16) { default_offset_type = lstm_weights_order::ifoz; generic_lstm_gpu_test(1, 7, 1, 3, 3, 2, true, true, true); default_offset_type = lstm_weights_order::iofz; } -TEST(DISABLED_lstm_gpu, generic_lstm_canonical_f16) { +TEST(lstm_gpu, generic_lstm_canonical_f16) { generic_lstm_gpu_test(1, 1, 1, 1, 1, 1, true, true, true); } // bidirectional support -TEST(DISABLED_lstm_gpu, generic_lstm_bi_bias_f16) { +TEST(lstm_gpu, generic_lstm_bi_bias_f16) { generic_lstm_gpu_test(1, 7, 2, 2, 3, 4, true, false, false); } -TEST(DISABLED_lstm_gpu, generic_lstm_bi_bias_hidden_f16) { +TEST(lstm_gpu, generic_lstm_bi_bias_hidden_f16) { generic_lstm_gpu_test(1, 7, 2, 2, 3, 4, true, true, false); } -TEST(DISABLED_lstm_gpu, generic_lstm_bi_bias_hidden_cell_f16) { +TEST(lstm_gpu, generic_lstm_bi_bias_hidden_cell_f16) { generic_lstm_gpu_test(1, 7, 2, 2, 3, 4, true, true, true); } // multi-layer support -TEST(DISABLED_lstm_gpu, generic_lstm_stacked_seq_f16) { +TEST(lstm_gpu, generic_lstm_stacked_seq_f16) { generic_lstm_gpu_test(4, 7, 1, 3, 3, 2, true, true, true); } -TEST(DISABLED_lstm_gpu, generic_lstm_stacked_bi_f16) { +TEST(lstm_gpu, generic_lstm_stacked_bi_f16) { generic_lstm_gpu_test(4, 7, 2, 3, 3, 2, true, true, true); } @@ -2052,3 +2061,4 @@ TEST(DISABLED_lstm_gpu, generic_lstm_stacked_bi_f16) { // integration testing using multi-layer and chained LSTMs // LSTMs single input // optional activation list + diff --git a/inference-engine/thirdparty/clDNN/tests/test_cases/scatter_elements_update_gpu_test.cpp b/inference-engine/thirdparty/clDNN/tests/test_cases/scatter_elements_update_gpu_test.cpp new file mode 100644 index 00000000000000..433a4791525737 --- /dev/null +++ b/inference-engine/thirdparty/clDNN/tests/test_cases/scatter_elements_update_gpu_test.cpp @@ -0,0 +1,104 @@ +// Copyright (c) 2020 Intel Corporation +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +/////////////////////////////////////////////////////////////////////////////////////////////////// +#include + +#include +#include +#include +#include +#include + +#include +#include + +using namespace cldnn; +using namespace ::tests; + + +TEST(scatter_elements_update_gpu_fp16, d2411_axisF) { + // Dictionary : 2x4x1x1 + // Indexes : 2x2x1x1 + // Updates : 2x2x1x1 + // Axis : 1 + // Output : 2x4x1x1 + // Input values in fp16 + // + // Input: + // 3.f, 6.f, 5.f, 4.f, + // 1.f, 7.f, 2.f, 9.f + // + // Indexes: + // 0.f, 1.f + // 2.f, 3.f + // + // Updates: + // 10.f, 11.f, + // 12.f, 13.f + // + // Output: + // 10.f, 11.f, 5.f, 4.f, + // 1.f, 7.f, 12.f, 13.f + + engine engine; + + auto input1 = memory::allocate(engine, { data_types::f16, format::bfyx, { 2, 4, 1, 1 } }); // Dictionary + auto input2 = memory::allocate(engine, { data_types::f16, format::bfyx, { 2, 2, 1, 1 } }); // Indexes + auto input3 = memory::allocate(engine, { data_types::f16, format::bfyx, { 2, 2, 1, 1 } }); // Updates + auto axis = cldnn::scatter_elements_update::scatter_elements_update_axis::along_f; + + set_values(input1, { + FLOAT16(3.0f), FLOAT16(6.0f), FLOAT16(5.0f), FLOAT16(4.0f), + FLOAT16(1.0f), FLOAT16(7.0f), FLOAT16(2.0f), FLOAT16(9.0f) + }); + + set_values(input2, { + FLOAT16(0.0f), FLOAT16(1.0f), + FLOAT16(2.0f), FLOAT16(3.0f) + }); + + set_values(input3, { + FLOAT16(10.0f), FLOAT16(11.0f), + FLOAT16(12.0f), FLOAT16(13.0f) + }); + + topology topology; + topology.add(input_layout("InputData", input1.get_layout())); + topology.add(input_layout("InputIndices", input2.get_layout())); + topology.add(input_layout("InputUpdates", input3.get_layout())); + topology.add( + scatter_elements_update("scatter_elements_update", "InputData", "InputIndices", "InputUpdates", axis) + ); + + network network(engine, topology); + + network.set_input_data("InputData", input1); + network.set_input_data("InputIndices", input2); + network.set_input_data("InputUpdates", input3); + + auto outputs = network.execute(); + + auto output = outputs.at("scatter_elements_update").get_memory(); + auto output_ptr = output.pointer(); + + std::vector expected_results = { + 10.f, 11.f, 5.f, 4.f, + 1.f, 7.f, 12.f, 13.f + }; + + for (size_t i = 0; i < expected_results.size(); ++i) { + EXPECT_EQ(expected_results[i], float16_to_float32(output_ptr[i])); + } +} diff --git a/inference-engine/thirdparty/mkl-dnn b/inference-engine/thirdparty/mkl-dnn index 0d8def33381fe3..5fda5037935e43 160000 --- a/inference-engine/thirdparty/mkl-dnn +++ b/inference-engine/thirdparty/mkl-dnn @@ -1 +1 @@ -Subproject commit 0d8def33381fe359781b036b1da840178973cb0d +Subproject commit 5fda5037935e43d716eb359edfa6125048e386e7 diff --git a/inference-engine/tools/benchmark_tool/README.md b/inference-engine/tools/benchmark_tool/README.md index 68da2058758cd1..33f45b3a4c9a6f 100644 --- a/inference-engine/tools/benchmark_tool/README.md +++ b/inference-engine/tools/benchmark_tool/README.md @@ -4,6 +4,13 @@ This topic demonstrates how to run the Benchmark Python* Tool, which performs in > **NOTE:** This topic describes usage of Python implementation of the Benchmark Tool. For the C++ implementation, refer to [Benchmark C++ Tool](../../samples/benchmark_app/README.md). +> **TIP**: You also can work with the Benchmark Tool inside the OpenVINO™ [Deep Learning Workbench](@ref workbench_docs_Workbench_DG_Introduction) (DL Workbench). +> [DL Workbench](@ref workbench_docs_Workbench_DG_Introduction) is a platform built upon OpenVINO™ and provides a web-based graphical environment that enables you to optimize, fine-tune, analyze, visualize, and compare +> performance of deep learning models on various Intel® architecture +> configurations. In the DL Workbench, you can use most of OpenVINO™ toolkit components. +>
+> Proceed to an [easy installation from Docker](@ref workbench_docs_Workbench_DG_Install_from_Docker_Hub) to get started. + ## How It Works Upon start-up, the application reads command-line parameters and loads a network and images/binary files to the Inference Engine plugin, which is chosen depending on a specified device. The number of infer requests and execution approach depend on the mode defined with the `-api` command-line parameter. @@ -129,7 +136,7 @@ If a model has only image input(s), please a provide folder with images or a pat If a model has some specific input(s) (not images), please prepare a binary file(s), which is filled with data of appropriate precision and provide a path to them as input. If a model has mixed input types, input folder should contain all required files. Image inputs are filled with image files one by one. Binary inputs are filled with binary inputs one by one. -To run the tool, you can use public or Intel's pre-trained models. To download the models, use the OpenVINO [Model Downloader](@ref omz_tools_downloader_README) or go to [https://download.01.org/opencv/](https://download.01.org/opencv/). +To run the tool, you can use [public](@ref omz_models_public_index) or [Intel's](@ref omz_models_intel_index) pre-trained models from the Open Model Zoo. The models can be downloaded using the [Model Downloader](@ref omz_tools_downloader_README). > **NOTE**: Before running the tool with a trained model, make sure the model is converted to the Inference Engine format (\*.xml + \*.bin) using the [Model Optimizer tool](../../../docs/MO_DG/Deep_Learning_Model_Optimizer_DevGuide.md). diff --git a/inference-engine/tools/compile_tool/main.cpp b/inference-engine/tools/compile_tool/main.cpp index 24c0c52935d260..f1094a2b903ec3 100644 --- a/inference-engine/tools/compile_tool/main.cpp +++ b/inference-engine/tools/compile_tool/main.cpp @@ -509,7 +509,7 @@ int main(int argc, char* argv[]) { outputName = getFileNameFromPath(fileNameNoExt(FLAGS_m)) + ".blob"; } - std::ofstream outputFile{outputName}; + std::ofstream outputFile{outputName, std::ios::out | std::ios::binary}; if (!outputFile.is_open()) { std::cout << "Output file " << outputName << " can't be opened for writing" << std::endl; return EXIT_FAILURE; diff --git a/inference-engine/tools/vpu/common/vpu_tools_common.cpp b/inference-engine/tools/vpu/common/vpu_tools_common.cpp index 0fda2b33518942..f7e7ade765f7e0 100644 --- a/inference-engine/tools/vpu/common/vpu_tools_common.cpp +++ b/inference-engine/tools/vpu/common/vpu_tools_common.cpp @@ -3,8 +3,8 @@ // /* on windows min and max already defined that makes using numeric_limits impossible */ -#if defined(WIN32) -#define NOMINMAX +#ifdef _WIN32 +# define NOMINMAX #endif #include diff --git a/inference-engine/tools/vpu/vpu_perfcheck/main.cpp b/inference-engine/tools/vpu/vpu_perfcheck/main.cpp index d2e752582d300a..2c348dcdec2785 100644 --- a/inference-engine/tools/vpu/vpu_perfcheck/main.cpp +++ b/inference-engine/tools/vpu/vpu_perfcheck/main.cpp @@ -38,7 +38,7 @@ static char* m_exename = nullptr; -#if defined(WIN32) || defined(__APPLE__) || defined(ANDROID) +#if defined(_WIN32) || defined(__APPLE__) || defined(ANDROID) typedef std::chrono::time_point time_point; #else typedef std::chrono::time_point time_point; diff --git a/model-optimizer/automation/package_BOM.txt b/model-optimizer/automation/package_BOM.txt index e4080d168e1274..6b9577e8efe8e7 100644 --- a/model-optimizer/automation/package_BOM.txt +++ b/model-optimizer/automation/package_BOM.txt @@ -71,15 +71,20 @@ extensions/front/caffe/accum_ext.py extensions/front/caffe/argmax_ext.py extensions/front/caffe/ArgMaxFlatten.py extensions/front/caffe/axpy.py +extensions/front/caffe/batchnorm_ext.py extensions/front/caffe/binarization.py extensions/front/caffe/binary_conv_ext.py extensions/front/caffe/bn.py +extensions/front/caffe/bn_ext.py +extensions/front/caffe/concat_ext.py extensions/front/caffe/conv_ext.py extensions/front/caffe/correlation_ext.py +extensions/front/caffe/crop_ext.py extensions/front/caffe/ctcgreedydecoder_ext.py extensions/front/caffe/CustomLayersMapping.xml.example extensions/front/caffe/data_augmentation_ext.py extensions/front/caffe/detection_output.py +extensions/front/caffe/dropout_ext.py extensions/front/caffe/elementwise_ext.py extensions/front/caffe/eltwise_add_normalize.py extensions/front/caffe/elu.py @@ -90,6 +95,7 @@ extensions/front/caffe/input_ext.py extensions/front/caffe/interp_ext.py extensions/front/caffe/lrn_ext.py extensions/front/caffe/mvn_ext.py +extensions/front/caffe/MVNNormalizer.py extensions/front/caffe/normalize_ext.py extensions/front/caffe/permute_ext.py extensions/front/caffe/pooling_ext.py @@ -106,6 +112,8 @@ extensions/front/caffe/relu_ext.py extensions/front/caffe/reorgyolo_ext.py extensions/front/caffe/resample_ext.py extensions/front/caffe/reshape.py +extensions/front/caffe/roipooling_ext.py +extensions/front/caffe/scale_ext.py extensions/front/caffe/shufflechannel_ext.py extensions/front/caffe/sigmoid.py extensions/front/caffe/simplernms_ext.py @@ -224,6 +232,7 @@ extensions/front/mxnet/ssd_pattern_remove_transpose.py extensions/front/mxnet/ssd_reorder_detection_out_inputs.py extensions/front/mxnet/stack_ext.py extensions/front/mxnet/swapaxis_ext.py +extensions/front/mxnet/take_ext.py extensions/front/mxnet/tile_ext.py extensions/front/mxnet/tile_replacer.py extensions/front/mxnet/transpose_ext.py @@ -617,6 +626,7 @@ extensions/ops/axpy.py extensions/ops/BatchNormInference.py extensions/ops/binarization.py extensions/ops/BlockLSTM.py +extensions/ops/BN.py extensions/ops/box_nms.py extensions/ops/bucketize.py extensions/ops/Cast.py @@ -757,12 +767,7 @@ mo/front/caffe/collect_attributes.py mo/front/caffe/custom_layers_mapping.py mo/front/caffe/extractor.py mo/front/caffe/extractors/__init__.py -mo/front/caffe/extractors/batchnorm.py -mo/front/caffe/extractors/concat.py -mo/front/caffe/extractors/crop.py mo/front/caffe/extractors/native_caffe.py -mo/front/caffe/extractors/roipooling.py -mo/front/caffe/extractors/scale.py mo/front/caffe/extractors/tile.py mo/front/caffe/extractors/utils.py mo/front/caffe/loader.py diff --git a/model-optimizer/extensions/back/compress_quantized_weights.py b/model-optimizer/extensions/back/compress_quantized_weights.py index 0c2a2f4c7ef8e6..8c1e3d93fccafa 100644 --- a/model-optimizer/extensions/back/compress_quantized_weights.py +++ b/model-optimizer/extensions/back/compress_quantized_weights.py @@ -19,72 +19,84 @@ import numpy as np from extensions.ops.Cast import Cast +from extensions.ops.elementwise import Sub, Div, Mul, Negative from mo.back.replacement import BackReplacementPattern from mo.graph.graph import Graph, Node -from mo.middle.passes.convert_data_type import data_type_str_to_np +from mo.middle.passes.convert_data_type import data_type_str_to_np, np_data_type_to_destination_type from mo.ops.const import Const class CompressQuantizeWeights(BackReplacementPattern): """ - Allows to store constant weights as uint8 data type instead fp32. - The structure of pattern without Data nodes. - Detects pattern: - ------------------------------------------ - | fp32_weights ---> Initial_FakeQuantize | - ------------------------------------------ - - But actually it looks like: - - --------------------------------------------------------------------------- - | | - | | - | initial_input_low initial_input_high | - | \ / | - | \ / | - | (in: 1) (in: 2) | - | V V | - | fp32_weights ----> Initial_FakeQuantize | - | ^ ^ | - | (in: 3) (in: 4) | - | / \ | - | / \ | - | initial_output_low initial_output_high | - | | - | | - --------------------------------------------------------------------------- - - And transforms it to: - - ------------------------------------------------------------------------------------------------------------- - | | - | initial_input_low initial_input_high initial_output_low initial_output_high | - | \ / | / | - | \ / | / | - | (in: 1) (in: 2) (in: 3) (in: 4) | - | V V V V | - | fp32_weights ----> FakeQuantize ----> Convert (to fp32) ----> Initial_FakeQuantize | - | (with int8 output type) ^ ^ | - | ^ ^ (in: 1) (in: 2) | - | (in: 3) (in: 4) | | | - | | \ ------------------ | | - | | \ / | | - | output_low output_high | | - | (0) (levels - 1) | | - | | | | - | | | | - | ------------------------------------------------------------- | - | | - | | - | | - ------------------------------------------------------------------------------------------------------------- - - Initial_FakeQuantize will restore original fp32 values during inference. - - After value propagation the sub-graph will look like: - - uint8_weights ---> Convert (to fp32) ---> Initial_FakeQuantize - + Compress weights transformation goal is to pre-quantize data to minimize runtime calculations with constant data. + To achieve this goal we perform FakeQuantize decomposition to separate quantization from dequantization in it. + + FakeQuantize: + -[src_dtype]-> FakeQuantize -[src_dtype]-> + is an operation that could be represented as: + -[src_dtype]-> Quantize -[quantized_dtype]-> Dequantize -[src_dtype]-> + + Quantize and Dequantize operations are not present in OpenVINO supported opsets, but can be easily expressed + through supported ones. Transformation algorithm doesn't contain all the steps described + below (some of them are optimized). Steps are presented only to show the idea in details. + + Step 1: FQ decomposition + -[src_dtype]-> Quantize -[quantized_dtype]-> Dequantize -[src_dtype]-> + + Step 2: Representing Quantize and Dequantize through FakeQuantize and Convert operations + Simplified view: + -[src_dtype]-> FakeQuantize -[src_dtype]-> Convert -[quantized_dtype]-> Convert -[src_dtype]-> FakeQuantize -[quantized_dtype]-> + + Detailed view: + initial_input_low initial_input_high initial_output_low initial_output_high + \ / | / + (in: 1) (in: 2) (in: 3) (in: 4) + V V V V + Constant -> FakeQuantize` --> Convert --> Convert --> initial FakeQuantize --> + ^ ^ (quant_dtype) (src_dtype) ^ ^ + | | (in: 1) (in: 2) + (in: 3) (in: 4) | | + | \________________ _________________| | + | \ / | + new_output_low new_output_high | + -(levels // 2) (levels + new_output_low - 1) | + |__________________________________________________________________| + + Step 3: All inputs of initial FQ are Constants and we haven't added dynamic dependencies. Means we can const-fold + sub-graph we already have, but as our goal is to have quantized data, we should mark nodes to be folded. + + -[src_dtype]-> FakeQuantize -[src_dtype]-> Convert -[quantized_dtype]-> Convert -[src_dtype]-> FakeQuantize -[src_dtype]-> + |-------------------------Const Folding-------------------------------|----------------------Stays----------------------------| + + Resulting graph: + Constant -[quantized_dtype]-> Convert -[src_dtype]-> FakeQuantize -[src_dtype]-> + + Step 4: We reduced heavy manipulations with constant data in runtime, but we can go even further. + At this stage FakeQuantize node is playing dequantization role. It means it only shifts and scales the data. + No rounding is performed by this FakeQuantize as data was fully quantized earlier. + Also, runtime calculates this shift (zero point) and scale during low precision transformation. + It means we can pre-calculate even this information for them by simply decomposing FakeQuantize that plays + dequantization role to Subtract-Multiply sequence so resulting graph would be: + Constant -[quantized_dtype]-> Convert -[src_dtype]-> Subtract (zero_point) -> Multiply (scale) -[src_dtype]-> + + Where: + scale = (output_high - output_low) / (input_high - input_low) + WARNING: division by zero imposes restriction -- input_high can not be equal to input_low + zero_point = input_low - output_low / scale + + TODO: steps 5 and 6 are NOT IMPLEMENTED YET + TODO: DOES LPT NEED IT??? + Step 5: Having zero_point == 0 is really beneficial for performance, so we try to fuse Subtract up to the Constant. + It is not always possible because of the quantized_dtype possible range of values. + + Step 6: (Optional) From the nature of Subtract and Multiply operations they may be optimized out in cases: + zero_point == 0 + scale == 1 + + BENEFITS: + Such constant data packing reduces IR size (.bin file size) + Also, transformation prepares quantized constant data for Low Precision pipeline. + With that we can skip same calculations in the runtime and make loading of such sub-graphs to the plugin faster. """ enabled = True @@ -95,47 +107,110 @@ class CompressQuantizeWeights(BackReplacementPattern): def pattern(self): return dict( nodes=[ - ('weights_const', dict(type='Const')), - ('weights_d', dict(kind='data')), - ('quantize', dict(type='FakeQuantize', levels=lambda x: x is not None and 2 < x <= 256)), + ('const', dict(type='Const')), + ('const_d', dict()), + ('fake_quantize', dict(type='FakeQuantize', levels=lambda x: x is not None and 2 < x <= 256)), ], edges=[ - ('weights_const', 'weights_d'), - ('weights_d', 'quantize', {'in': 0}), + ('const', 'const_d'), + ('const_d', 'fake_quantize', {'in': 0}), ] ) - def replace_pattern(self, graph: Graph, match: Dict[str, Node]): - initial_fake_quantize = match['quantize'] - initial_fake_quantize_name = initial_fake_quantize.soft_get('name', initial_fake_quantize.id) + @staticmethod + def quantize_data(fake_quantize: Node, dst_type: type): + graph = fake_quantize.graph + name = fake_quantize.soft_get('name', fake_quantize.id) + levels = fake_quantize.levels - new_fake_quantize = initial_fake_quantize.copy_node(dict(name=initial_fake_quantize_name + '/Copy', - stop_value_propagation=False), graph) + quantize = fake_quantize.copy_node(dict(name=name + '/Copy', stop_value_propagation=False), graph) + fake_quantize.in_port(0).get_connection().set_destination(quantize.in_port(0)) - initial_fake_quantize.in_port(1).get_connection().set_destination(new_fake_quantize.in_port(1)) - initial_fake_quantize.in_port(2).get_connection().set_destination(new_fake_quantize.in_port(2)) + # inherit input limits + fake_quantize.in_port(1).get_connection().set_destination(quantize.in_port(1)) + fake_quantize.in_port(2).get_connection().set_destination(quantize.in_port(2)) - dst_type = match['weights_const'].value.dtype - if np.issubdtype(dst_type, np.floating): - dst_type = data_type_str_to_np(graph.graph['cmd_params'].data_type) + # calculate output limits for quantized weights + i_min = np.array([-(levels // 2)], dtype=dst_type) + i_max = np.array(levels + i_min - 1, dtype=dst_type) + assert i_max - i_min == levels - 1 + out_low = Const(graph, dict(name=name + '/Copy/out_low', value=i_min)).create_node() + out_high = Const(graph, dict(name=name + '/Copy/out_high', value=i_max)).create_node() + + out_low.out_port(0).connect(quantize.in_port(3)) + out_high.out_port(0).connect(quantize.in_port(4)) + out_low.out_port(0).connect(fake_quantize.in_port(1)) + out_high.out_port(0).connect(fake_quantize.in_port(2)) + + original_const = quantize.in_port(0).get_source().node + quantized_data_name = original_const.soft_get('name', original_const.id) + '/quantized' + cast = Cast(graph, dict(name=quantized_data_name, dst_type=np.int8, stop_value_propagation=False)).create_node() + + quantize.out_port(0).connect(cast.in_port(0)) + + cast.out_port(0).connect(fake_quantize.in_port(0)) + + @staticmethod + def dequantize_data(fake_quantize: Node, dst_type: type) -> Node: + graph = fake_quantize.graph + quantized_data = fake_quantize.in_port(0).get_source().node + name = fake_quantize.soft_get('name', fake_quantize.id) + + assert quantized_data.soft_get('type') == 'Convert' and quantized_data.dst_type == np.int8, \ + 'Weights aren`t compressed as expected for node {}'.format(fake_quantize.soft_get('name', fake_quantize.id)) + + dequantizing_cast = Cast(graph, dict( + name=quantized_data.name + "/to_{}".format(np_data_type_to_destination_type(dst_type)), + dst_type=dst_type, stop_value_propagation=True)).create_node() + fake_quantize.in_port(0).get_connection().set_destination(dequantizing_cast.in_port(0)) - i_min = np.array([0.], dtype=dst_type) - i_max = np.array([initial_fake_quantize.levels - 1.], dtype=dst_type) + # limits of dequantize + in_low = fake_quantize.in_port(1).get_source() + in_high = fake_quantize.in_port(2).get_source() + out_low = fake_quantize.in_port(3).get_source() + out_high = fake_quantize.in_port(4).get_source() - new_out_low_node = Const(graph, dict(name=initial_fake_quantize_name + '/Copy/out_low', - value=i_min)).create_node() - new_out_high_node = Const(graph, dict(name=initial_fake_quantize_name + '/Copy/out_high', - value=i_max)).create_node() + # scale calculation + output_range = Sub(graph, {'name': name + '/output_range'}).create_node() + output_range.in_port(0).connect(out_high) + output_range.in_port(1).connect(out_low) - new_out_low_node.out_port(0).connect(new_fake_quantize.in_port(3)) - new_out_high_node.out_port(0).connect(new_fake_quantize.in_port(4)) - new_out_low_node.out_port(0).connect(initial_fake_quantize.in_port(1)) - new_out_high_node.out_port(0).connect(initial_fake_quantize.in_port(2)) + input_range = Sub(graph, {'name': name + '/input_range'}).create_node() + input_range.in_port(0).connect(in_high) + input_range.in_port(1).connect(in_low) - cast_node = Cast(graph, dict(name=initial_fake_quantize_name + "/Convert_to_float", dst_type=dst_type, - stop_value_propagation=True)).create_node() - new_fake_quantize.out_port(0).connect(cast_node.in_port(0)) - initial_fake_quantize.in_port(0).get_connection().set_destination(new_fake_quantize.in_port(0)) - cast_node.out_port(0).connect(initial_fake_quantize.in_port(0)) + scale = Div(graph, {'name': name + '/scale'}).create_node() + scale.in_port(0).connect(output_range.out_port(0)) + scale.in_port(1).connect(input_range.out_port(0)) + + # shift calculation + descaled_output_low = Div(graph, {'name': name + '/descaled_output_low'}).create_node() + descaled_output_low.in_port(0).connect(out_low) + descaled_output_low.in_port(1).connect(scale.out_port(0)) + + shift = Sub(graph, {'name': name + '/zero_point'}).create_node() + shift.in_port(0).connect(in_low) + shift.in_port(1).connect(descaled_output_low.out_port(0)) + + # DeQuantize(x) == Mul(Sub(x, zero_point), scale) + sub_zp = Sub(graph, {'name': name + '/minus_zp'}).create_node() + sub_zp.in_port(0).connect(dequantizing_cast.out_port(0)) + sub_zp.in_port(1).connect(shift.out_port(0)) + + mul_scale = Mul(graph, {'name': name + '/mulpiply_by_scale'}).create_node() + mul_scale.in_port(0).connect(sub_zp.out_port(0)) + mul_scale.in_port(1).connect(scale.out_port(0)) + + fake_quantize.out_port(0).get_connection().set_source(mul_scale.out_port(0)) + + graph.remove_nodes_from([fake_quantize.id, fake_quantize.out_node(0)]) + + def replace_pattern(self, graph: Graph, match: Dict[str, Node]): + fake_quantize = match['fake_quantize'] + + dst_type = match['const'].value.dtype + if np.issubdtype(dst_type, np.floating): + dst_type = data_type_str_to_np(graph.graph['cmd_params'].data_type) - cast_node['force_precision_in_ports'] = {0: 'uint8'} + self.quantize_data(fake_quantize, dst_type) + self.dequantize_data(fake_quantize, dst_type) diff --git a/model-optimizer/extensions/back/compress_quantized_weights_test.py b/model-optimizer/extensions/back/compress_quantized_weights_test.py index 579e6419a872be..7bfab89bdb6520 100644 --- a/model-optimizer/extensions/back/compress_quantized_weights_test.py +++ b/model-optimizer/extensions/back/compress_quantized_weights_test.py @@ -20,788 +20,127 @@ from generator import generator, generate from extensions.back.compress_quantized_weights import CompressQuantizeWeights +from extensions.ops.Cast import Cast +from extensions.ops.elementwise import Sub, Mul from extensions.ops.fakequantize import FakeQuantize from mo.front.common.partial_infer.eltwise import eltwise_infer -from mo.graph.graph import Node -from mo.ops.const import Const from mo.utils.ir_engine.compare_graphs import compare_graphs -from mo.utils.unittest.graph import build_graph, regular_op_with_shaped_data, regular_op_with_empty_data, \ - valued_const_with_data, result, connect - -nodes_attributes = { - # placeholder - 'placeholder': {'type': 'Parameter', 'kind': 'op', 'op': 'Parameter'}, - 'placeholder_data': {'kind': 'data'}, - - # weights - 'weights_const': {'type': 'Const', 'kind': 'op', 'value': np.array([], dtype=np.float32), 'op': 'Const'}, - 'weights_data': {'kind': 'data'}, - - # quantize - 'quantize1': {'type': 'FakeQuantize', 'kind': 'op', 'levels': 5, 'op': 'FakeQuantize'}, - 'quantize2': {'type': 'FakeQuantize', 'kind': 'op', 'levels': 2, 'op': 'FakeQuantize'}, - 'quantize3': {'type': 'FakeQuantize', 'kind': 'op', 'levels': None, 'op': 'FakeQuantize'}, - 'quantize4': {'type': 'FakeQuantize', 'kind': 'op', 'levels': 122, 'op': 'FakeQuantize'}, - 'quantize5': {'type': 'FakeQuantize', 'kind': 'op', 'levels': 202, 'op': 'FakeQuantize'}, - 'quantize6': {'type': 'FakeQuantize', 'kind': 'op', 'levels': 257, 'op': 'FakeQuantize'}, - 'quantize_data': {'kind': 'data'}, - 'new_quantize1': {'kind': 'op', 'type': 'FakeQuantize', 'levels': 5, 'op': 'FakeQuantize'}, - 'new_quantize4': {'kind': 'op', 'type': 'FakeQuantize', 'levels': 122, 'op': 'FakeQuantize'}, - 'new_quantize5': {'kind': 'op', 'type': 'FakeQuantize', 'levels': 202, 'op': 'FakeQuantize'}, - 'new_quantize_data': {'kind': 'data'}, - - # quantize input/output - 'output_high_init': {'kind': 'op', 'type': 'Const', 'op': 'Const'}, - 'output_high_init_data': {'kind': 'data', 'value': 3}, - 'output_low_init': {'kind': 'op', 'type': 'Const', 'op': 'Const'}, - 'output_low_init_data': {'kind': 'data', 'value': -1.5}, - - 'input_low': {'kind': 'op', 'type': 'Const', 'op': 'Const'}, - 'input_low_data': {'kind': 'data'}, - 'input_high': {'kind': 'op', 'type': 'Const', 'op': 'Const'}, - 'input_high_data': {'kind': 'data'}, - - 'output_low': {'kind': 'op', 'type': 'Const', 'op': 'Const'}, - 'output_low_data': {'kind': 'data'}, - 'output_high': {'kind': 'op', 'type': 'Const', 'op': 'Const'}, - 'output_high_data': {'kind': 'data'}, - - 'output_high_init_data1': {'kind': 'data', 'value': 256.1}, - 'output_low_init_data1': {'kind': 'data', 'value': 17.3}, - - 'output_high_init_data2': {'kind': 'data', 'value': -0.42}, - 'output_low_init_data2': {'kind': 'data', 'value': -2.573}, - - # eltwise ops - 'mul': {'kind': 'op', 'op': 'Mul'}, - 'add': {'kind': 'op', 'op': 'Add'}, - - 'scale': {'kind': 'op', 'type': 'Const', 'value': 1.125, 'op': 'Const'}, - 'shift': {'kind': 'op', 'type': 'Const', 'value': -1.5, 'op': 'Const'}, - 'scale1': {'kind': 'op', 'type': 'Const', 'value': 1.9735537190082646, 'op': 'Const'}, - 'shift1': {'kind': 'op', 'type': 'Const', 'value': 17.3, 'op': 'Const'}, - 'scale2': {'kind': 'op', 'type': 'Const', 'value': 0.010711442786069652, 'op': 'Const'}, - 'shift2': {'kind': 'op', 'type': 'Const', 'value': -2.573, 'op': 'Const'}, - - 'shift_data': {'kind': 'data'}, - 'scale_data': {'kind': 'data'}, - 'mul_data': {'kind': 'data'}, - 'add_data': {'kind': 'data'}, - - 'convolution': {'type': 'Convolution', 'kind': 'op', 'op': 'Convolution'}, - 'convert': {'type': 'Convert', 'kind': 'op', 'dst_type': np.float32, 'op': 'Cast'}, - 'convert_data': {'kind': 'data'}, - - 'result_data': {'kind': 'data'}, - 'result': {'kind': 'op', 'op': 'Result'}, - - # accuracy test - 'ac_weights': {'kind': 'op', 'op': 'Const', 'shape': None, 'value': None, 'infer': Const.infer}, - 'ac_weights_data': {'kind': 'data', 'shape': None, 'value': None}, - - 'ac_input_low': {'kind': 'op', 'type': 'Const', 'shape': None, 'value': None, 'infer': Const.infer, 'op': 'Const'}, - 'ac_input_low_data': {'kind': 'data', 'value': None, 'shape': None}, - 'ac_input_high': {'kind': 'op', 'type': 'Const', 'shape': None, 'value': None, 'infer': Const.infer, 'op': 'Const'}, - 'ac_input_high_data': {'kind': 'data', 'value': None, 'shape': None}, - 'ac_output_low': {'kind': 'op', 'type': 'Const', 'shape': None, 'value': None, 'infer': Const.infer, 'op': 'Const'}, - 'ac_output_low_data': {'kind': 'data', 'value': None, 'shape': None}, - 'ac_output_high': {'kind': 'op', 'type': 'Const', 'shape': None, 'value': None, 'infer': Const.infer, 'op': 'Const'}, - 'ac_output_high_data': {'kind': 'data', 'value': None, 'shape': None}, - - 'ac_fakeQuantize': {'kind': 'op', 'type': 'FakeQuantize', 'levels': None, 'infer': FakeQuantize.infer, 'op': 'FakeQuantize'}, - 'ac_fakeQuantize_data': {'kind': 'data', 'shape': None, 'value': None}, - 'ac_quantize': {'kind': 'op', 'type': 'fakeQuantize', 'levels': None, 'infer': FakeQuantize.infer, 'op': 'FakeQuantize'}, - 'ac_quantize_data': {'kind': 'data', 'shape': None, 'value': None}, - - 'ac_convolution': {'kind': 'op', 'type': 'Convolution', 'op': 'Convolution'}, - - 'ac_mul': {'kind': 'op', 'op': 'Mul', 'infer': lambda node: eltwise_infer(node, lambda a, b: a * b)}, - 'ac_mul_data': {'kind': 'data', 'shape': None, 'value': None}, - 'ac_add': {'kind': 'op', 'op': 'Add', 'infer': lambda node: eltwise_infer(node, lambda a, b: a + b)}, - 'ac_add_data': {'kind': 'data', 'shape': None, 'value': None}, - - 'ac_scale': {'kind': 'op', 'type': 'Const', 'shape': None, 'value': None, 'infer': Const.infer, 'op': 'Const'}, - 'ac_scale_data': {'kind': 'data', 'shape': None, 'value': None}, - 'ac_shift': {'kind': 'op', 'type': 'Const', 'shape': None, 'value': None, 'infer': Const.infer, 'op': 'Const'}, - 'ac_shift_data': {'kind': 'data', 'shape': None, 'value': None}, - - 'ac_output_low_ref': {'kind': 'op', 'type': 'Const', 'shape': None, 'value': None, 'infer': Const.infer, 'op': 'Const'}, - 'ac_output_low_ref_data': {'kind': 'data', 'shape': None, 'value': None}, - 'ac_output_high_ref': {'kind': 'op', 'type': 'Const', 'shape': None, 'value': None, 'infer': Const.infer, 'op': 'Const'}, - 'ac_output_high_ref_data': {'kind': 'data', 'shape': None, 'value': None} -} +from mo.utils.unittest.graph import build_graph, regular_op_with_shaped_data, valued_const_with_data, result, connect, \ + shaped_const_with_data -class WeightQuantizeTest(unittest.TestCase): - - def test_negative_quantize(self): - graph = build_graph(nodes_attributes, - [('weights_const', 'weights_data'), - ('weights_data', 'mul'), - ('scale', 'mul'), - ('mul', 'add'), - ('shift', 'add'), - ('add', 'quantize_data'), - ('quantize_data', 'convolution')], - nodes_with_edges_only=True) - graph.graph['cmd_params'] = Namespace(data_type='FP32') - - graph_ref = build_graph(nodes_attributes, - [('weights_const', 'weights_data'), - ('weights_data', 'mul'), - ('scale', 'mul'), - ('mul', 'add'), - ('shift', 'add'), - ('add', 'quantize_data'), - ('quantize_data', 'convolution')], - nodes_with_edges_only=True) +def nodes_dict(original, transformed=None, levels=255, data=None, il=[-127], ih=[127], ol=[-127], oh=[127]): + shape = [1, 2, 3, 4] if data is None else np.array(data).shape + data = np.ones(shape, dtype=original) if data is None else np.array(data, dtype=original) + int_data = data.astype(dtype=np.int8) + transformed = transformed if transformed is not None else original - CompressQuantizeWeights().find_and_replace_pattern(graph) - (flag, resp) = compare_graphs(graph, graph_ref, 'convolution', check_op_attrs=True) - self.assertTrue(flag, resp) + return { + **valued_const_with_data('weights', data), + **valued_const_with_data('int_weights', int_data), - def test_negative_quantize_levels_2(self): - graph = build_graph(nodes_attributes, - [('weights_const', 'weights_data'), - ('weights_data', 'quantize2', {'in': 0}), - ('input_low', 'input_low_data'), - ('input_low_data', 'quantize2', {'in': 1}), - ('input_high', 'input_high_data'), - ('input_high_data', 'quantize2', {'in': 2}), - ('output_low_init', 'output_low_init_data'), - ('output_low_init_data', 'quantize2', {'in': 3}), - ('output_high_init', 'output_high_init_data'), - ('output_high_init_data', 'quantize2', {'in': 4}), - ('quantize2', 'quantize_data'), - ('quantize_data', 'convolution', {'in': 1})], - nodes_with_edges_only=True) - - graph.graph['cmd_params'] = Namespace(data_type='FP32') - - graph_ref = build_graph(nodes_attributes, - [('weights_const', 'weights_data'), - ('weights_data', 'quantize2', {'in': 0}), - ('input_low', 'input_low_data'), - ('input_low_data', 'quantize2', {'in': 1}), - ('input_high', 'input_high_data'), - ('input_high_data', 'quantize2', {'in': 2}), - ('output_low_init', 'output_low_init_data'), - ('output_low_init_data', 'quantize2', {'in': 3}), - ('output_high_init', 'output_high_init_data'), - ('output_high_init_data', 'quantize2', {'in': 4}), - ('quantize2', 'quantize_data'), - ('quantize_data', 'convolution', {'in': 1})], - nodes_with_edges_only=True) + **regular_op_with_shaped_data( + 'cast', shape, {'type': 'Convert', 'op': 'Cast', 'infer': Cast.infer, 'dst_type': transformed}), - CompressQuantizeWeights().find_and_replace_pattern(graph) - (flag, resp) = compare_graphs(graph, graph_ref, 'convolution', check_op_attrs=True) - self.assertTrue(flag, resp) + **valued_const_with_data('il', np.array(il)), + **valued_const_with_data('ih', np.array(ih)), + **valued_const_with_data('ol', np.array(ol)), + **valued_const_with_data('oh', np.array(oh)), - def test_negative_quantize_levels_257(self): - graph = build_graph(nodes_attributes, - [('weights_const', 'weights_data'), - ('weights_data', 'quantize6', {'in': 0}), - ('input_low', 'input_low_data'), - ('input_low_data', 'quantize6', {'in': 1}), - ('input_high', 'input_high_data'), - ('input_high_data', 'quantize6', {'in': 2}), - ('output_low_init', 'output_low_init_data'), - ('output_low_init_data', 'quantize6', {'in': 3}), - ('output_high_init', 'output_high_init_data'), - ('output_high_init_data', 'quantize6', {'in': 4}), - ('quantize6', 'quantize_data'), - ('quantize_data', 'convolution', {'in': 1})], - nodes_with_edges_only=True) - - graph.graph['cmd_params'] = Namespace(data_type='FP32') - - graph_ref = build_graph(nodes_attributes, - [('weights_const', 'weights_data'), - ('weights_data', 'quantize6', {'in': 0}), - ('input_low', 'input_low_data'), - ('input_low_data', 'quantize6', {'in': 1}), - ('input_high', 'input_high_data'), - ('input_high_data', 'quantize6', {'in': 2}), - ('output_low_init', 'output_low_init_data'), - ('output_low_init_data', 'quantize6', {'in': 3}), - ('output_high_init', 'output_high_init_data'), - ('output_high_init_data', 'quantize6', {'in': 4}), - ('quantize6', 'quantize_data'), - ('quantize_data', 'convolution', {'in': 1})], - nodes_with_edges_only=True) + **regular_op_with_shaped_data( + 'FQ', shape, {'type': 'FakeQuantize', 'infer': FakeQuantize.infer, 'stop_value_propagation': True, + 'levels': levels, 'op': 'FakeQuantize'}), - CompressQuantizeWeights().find_and_replace_pattern(graph) - (flag, resp) = compare_graphs(graph, graph_ref, 'convolution', check_op_attrs=True) - self.assertTrue(flag, resp) + **valued_const_with_data('zp', np.array([0])), + **valued_const_with_data('scale', np.array([1])), - def test_negative_quantize_levels_None(self): - graph = build_graph(nodes_attributes, - [('weights_const', 'weights_data'), - ('weights_data', 'quantize3', {'in': 0}), - ('input_low', 'input_low_data'), - ('input_low_data', 'quantize3', {'in': 1}), - ('input_high', 'input_high_data'), - ('input_high_data', 'quantize3', {'in': 2}), - ('output_low_init', 'output_low_init_data'), - ('output_low_init_data', 'quantize3', {'in': 3}), - ('output_high_init', 'output_high_init_data'), - ('output_high_init_data', 'quantize3', {'in': 4}), - ('quantize3', 'quantize_data'), - ('quantize_data', 'convolution', {'in': 1})], - nodes_with_edges_only=True) - - graph.graph['cmd_params'] = Namespace(data_type='FP32') - - graph_ref = build_graph(nodes_attributes, - [('weights_const', 'weights_data'), - ('weights_data', 'quantize3', {'in': 0}), - ('input_low', 'input_low_data'), - ('input_low_data', 'quantize3', {'in': 1}), - ('input_high', 'input_high_data'), - ('input_high_data', 'quantize3', {'in': 2}), - ('output_low_init', 'output_low_init_data'), - ('output_low_init_data', 'quantize3', {'in': 3}), - ('output_high_init', 'output_high_init_data'), - ('output_high_init_data', 'quantize3', {'in': 4}), - ('quantize3', 'quantize_data'), - ('quantize_data', 'convolution', {'in': 1})], - nodes_with_edges_only=True) + **regular_op_with_shaped_data( + 'sub', shape, {'type': 'Subtract', 'op': 'Sub', 'infer': lambda node: eltwise_infer(node, Sub.operation)}), - CompressQuantizeWeights().find_and_replace_pattern(graph) - (flag, resp) = compare_graphs(graph, graph_ref, 'convolution', check_op_attrs=True) - self.assertTrue(flag, resp) - - def test_positive_quantize1(self): - """ - int8 interval [0; 4] - fp32 interval [-1.5; 3] - """ - graph = build_graph(nodes_attributes, - [('weights_const', 'weights_data'), - ('weights_data', 'quantize1', {'in': 0}), - ('input_low', 'input_low_data'), - ('input_low_data', 'quantize1', {'in': 1}), - ('input_high', 'input_high_data'), - ('input_high_data', 'quantize1', {'in': 2}), - ('output_low_init', 'output_low_init_data'), - ('output_low_init_data', 'quantize1', {'in': 3}), - ('output_high_init', 'output_high_init_data'), - ('output_high_init_data', 'quantize1', {'in': 4}), - ('quantize1', 'quantize_data'), - ('quantize_data', 'convolution', {'in': 1}), - ('placeholder', 'placeholder_data'), - ('placeholder_data', 'convolution', {'in': 0})], - {'input_low': {'shape': np.array([1]), 'value': -1.5}, - 'input_low_data': {'value': -1.5}, - 'input_high': {'shape': np.array([1]), 'value': 3}, - 'input_high_data': {'value': 3}}, - nodes_with_edges_only=True) - graph.graph['cmd_params'] = Namespace(data_type='FP32') - - graph_ref = build_graph(nodes_attributes, - [('weights_const', 'weights_data'), - ('weights_data', 'new_quantize1', {'in': 0}), - ('input_low', 'input_low_data'), - ('input_low_data', 'new_quantize1', {'in': 1}), - ('input_high', 'input_high_data'), - ('input_high_data', 'new_quantize1', {'in': 2}), - ('output_low', 'output_low_data'), - ('output_low_data', 'new_quantize1', {'in': 3}), - ('output_high', 'output_high_data'), - ('output_high_data', 'new_quantize1', {'in': 4}), - ('new_quantize1', 'new_quantize_data'), - ('new_quantize_data', 'convert'), - ('convert', 'convert_data'), - ('convert_data', 'quantize1', {'in': 0}), - ('output_low_data', 'quantize1', {'in': 1}), - ('output_high_data', 'quantize1', {'in': 2}), - ('output_low_init', 'output_low_init_data'), - ('output_low_init_data', 'quantize1', {'in': 3}), - ('output_high_init', 'output_high_init_data'), - ('output_high_init_data', 'quantize1', {'in': 4}), - ('quantize1', 'quantize_data'), - ('quantize_data', 'convolution', {'in': 1}), - ('placeholder', 'placeholder_data'), - ('placeholder_data', 'convolution', {'in': 0})], - nodes_with_edges_only=True) - - CompressQuantizeWeights().find_and_replace_pattern(graph) - (flag, resp) = compare_graphs(graph, graph_ref, 'convolution', check_op_attrs=True) - self.assertTrue(flag, resp) + **regular_op_with_shaped_data( + 'mul', shape, {'type': 'Multiply', 'op': 'Mul', 'infer': lambda node: eltwise_infer(node, Mul.operation)}), - def test_positive_quantize2(self): - """ - int8 interval [0; 121] - fp32 interval [17.3; 256.1] - """ - graph = build_graph(nodes_attributes, - [('weights_const', 'weights_data'), - ('weights_data', 'quantize4', {'in': 0}), - ('input_low', 'input_low_data'), - ('input_low_data', 'quantize4', {'in': 1}), - ('input_high', 'input_high_data'), - ('input_high_data', 'quantize4', {'in': 2}), - ('output_low_init', 'output_low_init_data1'), - ('output_low_init_data1', 'quantize4', {'in': 3}), - ('output_high_init', 'output_high_init_data1'), - ('output_high_init_data1', 'quantize4', {'in': 4}), - ('quantize4', 'quantize_data'), - ('quantize_data', 'convolution', {'in': 1}), - ('placeholder', 'placeholder_data'), - ('placeholder_data', 'convolution', {'in': 0})], - {'input_low': {'shape': np.array([1]), 'value': 17.3}, - 'input_low_data': {'value': 17.3}, - 'input_high': {'shape': np.array([1]), 'value': 256.1}, - 'input_high_data': {'value': 256.1}}, - nodes_with_edges_only=True) - graph.graph['cmd_params'] = Namespace(data_type='FP32') - - graph_ref = build_graph(nodes_attributes, - [('weights_const', 'weights_data'), - ('weights_data', 'new_quantize4', {'in': 0}), - ('input_low', 'input_low_data'), - ('input_low_data', 'new_quantize4', {'in': 1}), - ('input_high', 'input_high_data'), - ('input_high_data', 'new_quantize4', {'in': 2}), - ('output_low', 'output_low_data'), - ('output_low_data', 'new_quantize4', {'in': 3}), - ('output_high', 'output_high_data'), - ('output_high_data', 'new_quantize4', {'in': 4}), - ('new_quantize4', 'new_quantize_data'), - ('new_quantize_data', 'convert'), - ('convert', 'convert_data'), - ('convert_data', 'quantize4', {'in': 0}), - ('output_low_data', 'quantize4', {'in': 1}), - ('output_high_data', 'quantize4', {'in': 2}), - ('output_low_init', 'output_low_init_data1'), - ('output_low_init_data1', 'quantize4', {'in': 3}), - ('output_high_init', 'output_high_init_data1'), - ('output_high_init_data1', 'quantize4', {'in': 4}), - ('quantize4', 'quantize_data'), - ('quantize_data', 'convolution', {'in': 1}), - ('placeholder', 'placeholder_data'), - ('placeholder_data', 'convolution', {'in': 0})], - nodes_with_edges_only=True) + **result() +} - CompressQuantizeWeights().find_and_replace_pattern(graph) - (flag, resp) = compare_graphs(graph, graph_ref, 'convolution', check_op_attrs=True) - self.assertTrue(flag, resp) - def test_positive_quantize3(self): - """ - int8 interval [0; 201] - fp32 interval [-2.573; -0.42] - """ - - graph = build_graph(nodes_attributes, - [('weights_const', 'weights_data'), - ('weights_data', 'quantize5', {'in': 0}), - ('input_low', 'input_low_data'), - ('input_low_data', 'quantize5', {'in': 1}), - ('input_high', 'input_high_data'), - ('input_high_data', 'quantize5', {'in': 2}), - ('output_low_init', 'output_low_init_data2'), - ('output_low_init_data2', 'quantize5', {'in': 3}), - ('output_high_init', 'output_high_init_data2'), - ('output_high_init_data2', 'quantize5', {'in': 4}), - ('quantize5', 'quantize_data'), - ('quantize_data', 'convolution', {'in': 1}), - ('placeholder', 'placeholder_data'), - ('placeholder_data', 'convolution', {'in': 0})], - {'input_low': {'shape': np.array([1]), 'value': -2.573}, - 'input_low_data': {'value': -2.573}, - 'input_high': {'shape': np.array([1]), 'value': -0.42}, - 'input_high_data': {'value': -0.42}}, - nodes_with_edges_only=True) - graph.graph['cmd_params'] = Namespace(data_type='FP32') - - graph_ref = build_graph(nodes_attributes, - [('weights_const', 'weights_data'), - ('weights_data', 'new_quantize5', {'in': 0}), - ('input_low', 'input_low_data'), - ('input_low_data', 'new_quantize5', {'in': 1}), - ('input_high', 'input_high_data'), - ('input_high_data', 'new_quantize5', {'in': 2}), - ('output_low', 'output_low_data'), - ('output_low_data', 'new_quantize5', {'in': 3}), - ('output_high', 'output_high_data'), - ('output_high_data', 'new_quantize5', {'in': 4}), - ('new_quantize5', 'new_quantize_data'), - ('new_quantize_data', 'convert'), - ('convert', 'convert_data'), - ('convert_data', 'quantize5', {'in': 0}), - ('output_low_data', 'quantize5', {'in': 1}), - ('output_high_data', 'quantize5', {'in': 2}), - ('output_low_init', 'output_low_init_data2'), - ('output_low_init_data2', 'quantize5', {'in': 3}), - ('output_high_init', 'output_high_init_data2'), - ('output_high_init_data2', 'quantize5', {'in': 4}), - ('quantize5', 'quantize_data'), - ('quantize_data', 'convolution', {'in': 1}), - ('placeholder', 'placeholder_data'), - ('placeholder_data', 'convolution', {'in': 0})], - nodes_with_edges_only=True) +class CompressionQuantizeDequantizeSeparateTest(unittest.TestCase): + def test_quantize(self): + original_type = np.float32 + nodes = nodes_dict(original_type) - CompressQuantizeWeights().find_and_replace_pattern(graph) - (flag, resp) = compare_graphs(graph, graph_ref, 'convolution', check_op_attrs=True) - self.assertTrue(flag, resp) + graph = build_graph(nodes, [ + *connect('weights:0', '0:FQ'), + *connect('il:0', '1:FQ'), + *connect('ih:0', '2:FQ'), + *connect('ol:0', '3:FQ'), + *connect('oh:0', '4:FQ'), + *connect('FQ:0', 'output'), + ], nodes_with_edges_only=True) - def test_accuracy_tensor1(self): - """ - [1.0, 2.0, 3.0, 4.0] - """ - - graph = build_graph(nodes_attributes, - [('ac_weights', 'ac_weights_data'), - ('ac_weights_data', 'ac_fakeQuantize', {'in': 0}), - ('ac_input_low', 'ac_input_low_data'), - ('ac_input_low_data', 'ac_fakeQuantize', {'in': 1}), - ('ac_input_high', 'ac_input_high_data'), - ('ac_input_high_data', 'ac_fakeQuantize', {'in': 2}), - ('ac_output_low', 'ac_output_low_data'), - ('ac_output_low_data', 'ac_fakeQuantize', {'in': 3}), - ('ac_output_high', 'ac_output_high_data'), - ('ac_output_high_data', 'ac_fakeQuantize', {'in': 4}), - ('ac_fakeQuantize', 'ac_fakeQuantize_data'), - ('ac_fakeQuantize_data', 'ac_convolution', {'in': 1}), - ('placeholder', 'placeholder_data'), - ('placeholder_data', 'ac_convolution', {'in': 0}), - ('ac_convolution', 'result_data'), - ('result_data', 'result') - ], - {'ac_weights': {'shape': np.array([4]), 'value': np.array([1.0, 2.0, 3.0, 4.0])}, - 'ac_input_low': {'shape': np.array([1]), 'value': 1}, - 'ac_input_high': {'shape': np.array([1]), 'value': 4}, - 'ac_output_low': {'shape': np.array([1]), 'value': 1}, - 'ac_output_high': {'shape': np.array([1]), 'value': 4}, - 'ac_fakeQuantize': {'levels': 256}}, - nodes_with_edges_only=True) - - graph_ref = build_graph(nodes_attributes, - [('ac_weights', 'ac_weights_data'), - ('ac_weights_data', 'ac_quantize', {'in': 0}), - ('ac_input_low', 'ac_input_low_data'), - ('ac_input_low_data', 'ac_quantize', {'in': 1}), - ('ac_input_high', 'ac_input_high_data'), - ('ac_input_high_data', 'ac_quantize', {'in': 2}), - ('ac_output_low_ref', 'ac_output_low_ref_data'), - ('ac_output_low_ref_data', 'ac_quantize', {'in': 3}), - ('ac_output_high_ref', 'ac_output_high_ref_data'), - ('ac_output_high_ref_data', 'ac_quantize', {'in': 4}), - ('ac_quantize', 'ac_quantize_data'), - ('ac_quantize_data', 'ac_mul', {'in': 1}), - ('ac_scale', 'ac_scale_data'), - ('ac_scale_data', 'ac_mul', {'in': 0}), - ('ac_mul', 'ac_mul_data'), - ('ac_mul_data', 'ac_add', {'in': 1}), - ('ac_shift', 'ac_shift_data'), - ('ac_shift_data', 'ac_add', {'in': 0}), - ('ac_add', 'ac_add_data'), - ('ac_add_data', 'ac_fakeQuantize', {'in': 0}), - ('ac_input_low', 'ac_input_low_data'), - ('ac_input_low_data', 'ac_fakeQuantize', {'in': 1}), - ('ac_input_high', 'ac_input_high_data'), - ('ac_input_high_data', 'ac_fakeQuantize', {'in': 2}), - ('ac_output_low', 'ac_output_low_data'), - ('ac_output_low_data', 'ac_fakeQuantize', {'in': 3}), - ('ac_output_high', 'ac_output_high_data'), - ('ac_output_high_data', 'ac_fakeQuantize', {'in': 4}), - ('ac_fakeQuantize', 'ac_fakeQuantize_data'), - ('ac_fakeQuantize_data', 'ac_convolution', {'in': 1}), - ('placeholder', 'placeholder_data'), - ('placeholder_data', 'ac_convolution', {'in': 0}), - ('ac_convolution', 'result_data'), - ('result_data', 'result') - ], - {'ac_weights': {'shape': np.array([4]), 'value': np.array([1.0, 2.0, 3.0, 4.0])}, - 'ac_quantize': {'levels': 256}, - 'ac_fakeQuantize': {'levels': 256}, - 'ac_input_low': {'shape': np.array([1]), 'value': 1}, - 'ac_input_high': {'shape': np.array([1]), 'value': 4}, - 'ac_output_low_ref': {'shape': np.array([1]), 'value': 0}, - 'ac_output_high_ref': {'shape': np.array([1]), 'value': 255}, - 'ac_scale': {'shape': np.array([1]), 'value': 0.011764705882352941}, - 'ac_shift': {'shape': np.array([1]), 'value': 1}, - 'ac_output_low': {'shape': np.array([1]), 'value': 1}, - 'ac_output_high': {'shape': np.array([1]), 'value': 4}, - }, - nodes_with_edges_only=True) + error_message = 'Unexpected number of FakeQuantize nodes {} CompressQuantizeWeights.quantize_data call `{}`' + fq_nodes = graph.get_op_nodes(type='FakeQuantize') + self.assertEqual(len(fq_nodes), 1, error_message.format('before', len(fq_nodes))) + fake_quantize = fq_nodes[0] + CompressQuantizeWeights.quantize_data(fake_quantize, original_type) graph.clean_up() - graph_ref.clean_up() - w_array = Node(graph, 'ac_weights').out_port(0).get_destination().data.get_value() - w_array_ref = Node(graph_ref, 'ac_weights').out_port(0).get_destination().data.get_value() - - self.assertTrue(np.all(w_array == w_array_ref)) - - def test_accuracy_tensor2(self): - - """ - [-1.5, -0.32, 0.167, 2.8] - """ - - graph = build_graph(nodes_attributes, - [('ac_weights', 'ac_weights_data'), - ('ac_weights_data', 'ac_fakeQuantize', {'in': 0}), - ('ac_input_low', 'ac_input_low_data'), - ('ac_input_low_data', 'ac_fakeQuantize', {'in': 1}), - ('ac_input_high', 'ac_input_high_data'), - ('ac_input_high_data', 'ac_fakeQuantize', {'in': 2}), - ('ac_output_low', 'ac_output_low_data'), - ('ac_output_low_data', 'ac_fakeQuantize', {'in': 3}), - ('ac_output_high', 'ac_output_high_data'), - ('ac_output_high_data', 'ac_fakeQuantize', {'in': 4}), - ('ac_fakeQuantize', 'ac_fakeQuantize_data'), - ('ac_fakeQuantize_data', 'ac_convolution', {'in': 1}), - ('placeholder', 'placeholder_data'), - ('placeholder_data', 'ac_convolution', {'in': 0}), - ('ac_convolution', 'result_data'), - ('result_data', 'result') - ], - {'ac_weights': {'shape': np.array([4]), 'value': np.array([-1.5, -0.32, 0.167, 2.8])}, - 'ac_input_low': {'shape': np.array([1]), 'value': -1.5}, - 'ac_input_high': {'shape': np.array([1]), 'value': 2.8}, - 'ac_output_low': {'shape': np.array([1]), 'value': -1.5}, - 'ac_output_high': {'shape': np.array([1]), 'value': 2.8}, - 'ac_fakeQuantize': {'levels': 256}}, - nodes_with_edges_only=True) - - graph_ref = build_graph(nodes_attributes, - [('ac_weights', 'ac_weights_data'), - ('ac_weights_data', 'ac_quantize', {'in': 0}), - ('ac_input_low', 'ac_input_low_data'), - ('ac_input_low_data', 'ac_quantize', {'in': 1}), - ('ac_input_high', 'ac_input_high_data'), - ('ac_input_high_data', 'ac_quantize', {'in': 2}), - ('ac_output_low_ref', 'ac_output_low_ref_data'), - ('ac_output_low_ref_data', 'ac_quantize', {'in': 3}), - ('ac_output_high_ref', 'ac_output_high_ref_data'), - ('ac_output_high_ref_data', 'ac_quantize', {'in': 4}), - ('ac_quantize', 'ac_quantize_data'), - ('ac_quantize_data', 'ac_mul', {'in': 1}), - ('ac_scale', 'ac_scale_data'), - ('ac_scale_data', 'ac_mul', {'in': 0}), - ('ac_mul', 'ac_mul_data'), - ('ac_mul_data', 'ac_add', {'in': 1}), - ('ac_shift', 'ac_shift_data'), - ('ac_shift_data', 'ac_add', {'in': 0}), - ('ac_add', 'ac_add_data'), - ('ac_add_data', 'ac_fakeQuantize', {'in': 0}), - ('ac_input_low', 'ac_input_low_data'), - ('ac_input_low_data', 'ac_fakeQuantize', {'in': 1}), - ('ac_input_high', 'ac_input_high_data'), - ('ac_input_high_data', 'ac_fakeQuantize', {'in': 2}), - ('ac_output_low', 'ac_output_low_data'), - ('ac_output_low_data', 'ac_fakeQuantize', {'in': 3}), - ('ac_output_high', 'ac_output_high_data'), - ('ac_output_high_data', 'ac_fakeQuantize', {'in': 4}), - ('ac_fakeQuantize', 'ac_fakeQuantize_data'), - ('ac_fakeQuantize_data', 'ac_convolution', {'in': 1}), - ('placeholder', 'placeholder_data'), - ('placeholder_data', 'ac_convolution', {'in': 0}), - ('ac_convolution', 'result_data'), - ('result_data', 'result') - ], - {'ac_weights': {'shape': np.array([4]), 'value': np.array([-1.5, -0.32, 0.167, 2.8])}, - 'ac_quantize': {'levels': 256}, - 'ac_fakeQuantize': {'levels': 256}, - 'ac_input_low': {'shape': np.array([1]), 'value': -1.5}, - 'ac_input_high': {'shape': np.array([1]), 'value': 2.8}, - 'ac_output_low_ref': {'shape': np.array([1]), 'value': 0}, - 'ac_output_high_ref': {'shape': np.array([1]), 'value': 255}, - 'ac_scale': {'shape': np.array([1]), 'value': 0.016862745098039214}, - 'ac_shift': {'shape': np.array([1]), 'value': -1.5}, - 'ac_output_low': {'shape': np.array([1]), 'value': -1.5}, - 'ac_output_high': {'shape': np.array([1]), 'value': 2.8}, - }, - nodes_with_edges_only=True) + fq_nodes = graph.get_op_nodes(type='FakeQuantize') + self.assertEqual(len(fq_nodes), 1, error_message.format('after', len(fq_nodes))) + self.assertEqual(fq_nodes[0].in_port(0).get_source().node.soft_get('type'), 'Const') + self.assertEqual(fq_nodes[0].in_port(0).get_source().node.data_type, np.int8) - graph.clean_up() - graph_ref.clean_up() + graph_ref = build_graph(nodes, [ + *connect('int_weights:0', '0:FQ'), + *connect('il:0', '1:FQ'), + *connect('ih:0', '2:FQ'), + *connect('ol:0', '3:FQ'), + *connect('oh:0', '4:FQ'), + *connect('FQ:0', 'output'), + ], nodes_with_edges_only=True) + + (flag, resp) = compare_graphs(graph, graph_ref, 'output', check_op_attrs=True) + self.assertTrue(flag, resp) - w_array = Node(graph, 'ac_weights').out_port(0).get_destination().data.get_value() - w_array_ref = Node(graph_ref, 'ac_weights').out_port(0).get_destination().data.get_value() - - self.assertTrue(np.all(w_array == w_array_ref)) - - def test_accuracy_tensor3(self): - - """ - [-2.586, -1.338, 2.773, 4.414] - """ - - graph = build_graph(nodes_attributes, - [('ac_weights', 'ac_weights_data'), - ('ac_weights_data', 'ac_fakeQuantize', {'in': 0}), - ('ac_input_low', 'ac_input_low_data'), - ('ac_input_low_data', 'ac_fakeQuantize', {'in': 1}), - ('ac_input_high', 'ac_input_high_data'), - ('ac_input_high_data', 'ac_fakeQuantize', {'in': 2}), - ('ac_output_low', 'ac_output_low_data'), - ('ac_output_low_data', 'ac_fakeQuantize', {'in': 3}), - ('ac_output_high', 'ac_output_high_data'), - ('ac_output_high_data', 'ac_fakeQuantize', {'in': 4}), - ('ac_fakeQuantize', 'ac_fakeQuantize_data'), - ('ac_fakeQuantize_data', 'ac_convolution', {'in': 1}), - ('placeholder', 'placeholder_data'), - ('placeholder_data', 'ac_convolution', {'in': 0}), - ('ac_convolution', 'result_data'), - ('result_data', 'result')], - {'ac_weights': {'shape': np.array([4]), 'value': np.array([-2.586, -1.338, 2.773, 4.414])}, - 'ac_input_low': {'shape': np.array([1]), 'value': -2.586}, - 'ac_input_high': {'shape': np.array([1]), 'value': 4.414}, - 'ac_output_low': {'shape': np.array([1]), 'value': -2.586}, - 'ac_output_high': {'shape': np.array([1]), 'value': 4.414}, - 'ac_fakeQuantize': {'levels': 256}}, - nodes_with_edges_only=True) - - graph_ref = build_graph(nodes_attributes, - [('ac_weights', 'ac_weights_data'), - ('ac_weights_data', 'ac_quantize', {'in': 0}), - ('ac_input_low', 'ac_input_low_data'), - ('ac_input_low_data', 'ac_quantize', {'in': 1}), - ('ac_input_high', 'ac_input_high_data'), - ('ac_input_high_data', 'ac_quantize', {'in': 2}), - ('ac_output_low_ref', 'ac_output_low_ref_data'), - ('ac_output_low_ref_data', 'ac_quantize', {'in': 3}), - ('ac_output_high_ref', 'ac_output_high_ref_data'), - ('ac_output_high_ref_data', 'ac_quantize', {'in': 4}), - ('ac_quantize', 'ac_quantize_data'), - ('ac_quantize_data', 'ac_mul', {'in': 1}), - ('ac_scale', 'ac_scale_data'), - ('ac_scale_data', 'ac_mul', {'in': 0}), - ('ac_mul', 'ac_mul_data'), - ('ac_mul_data', 'ac_add', {'in': 1}), - ('ac_shift', 'ac_shift_data'), - ('ac_shift_data', 'ac_add', {'in': 0}), - ('ac_add', 'ac_add_data'), - ('ac_add_data', 'ac_fakeQuantize', {'in': 0}), - ('ac_input_low', 'ac_input_low_data'), - ('ac_input_low_data', 'ac_fakeQuantize', {'in': 1}), - ('ac_input_high', 'ac_input_high_data'), - ('ac_input_high_data', 'ac_fakeQuantize', {'in': 2}), - ('ac_output_low', 'ac_output_low_data'), - ('ac_output_low_data', 'ac_fakeQuantize', {'in': 3}), - ('ac_output_high', 'ac_output_high_data'), - ('ac_output_high_data', 'ac_fakeQuantize', {'in': 4}), - ('ac_fakeQuantize', 'ac_fakeQuantize_data'), - ('ac_fakeQuantize_data', 'ac_convolution', {'in': 1}), - ('placeholder', 'placeholder_data'), - ('placeholder_data', 'ac_convolution', {'in': 0}), - ('ac_convolution', 'result_data'), - ('result_data', 'result') - ], - {'ac_weights': {'shape': np.array([4]), 'value': np.array([-2.586, -1.338, 2.773, - 4.414])}, - 'ac_quantize': {'levels': 256}, - 'ac_fakeQuantize': {'levels': 256}, - 'ac_input_low': {'shape': np.array([1]), 'value': -2.586}, - 'ac_input_high': {'shape': np.array([1]), 'value': 4.414}, - 'ac_output_low_ref': {'shape': np.array([1]), 'value': 0}, - 'ac_output_high_ref': {'shape': np.array([1]), 'value': 255}, - 'ac_scale': {'shape': np.array([1]), 'value': 0.027450980392156862}, - 'ac_shift': {'shape': np.array([1]), 'value': -2.586}, - 'ac_output_low': {'shape': np.array([1]), 'value': -2.586}, - 'ac_output_high': {'shape': np.array([1]), 'value': 4.414}, - }, - nodes_with_edges_only=True) + def test_dequantize(self): + original_type = np.float32 + nodes = nodes_dict(original_type, np.int8) - graph.clean_up() - graph_ref.clean_up() + graph = build_graph(nodes, [ + *connect('weights:0', '0:cast'), + *connect('cast:0', '0:FQ'), + *connect('il:0', '1:FQ'), + *connect('ih:0', '2:FQ'), + *connect('ol:0', '3:FQ'), + *connect('oh:0', '4:FQ'), + *connect('FQ:0', 'output'), + ], nodes_with_edges_only=True) - w_array = Node(graph, 'ac_weights').out_port(0).get_destination().data.get_value() - w_array_ref = Node(graph_ref, 'ac_weights').out_port(0).get_destination().data.get_value() - - self.assertTrue(np.all(w_array == w_array_ref)) - - def test_accuracy_tensor4(self): - - eps = np.finfo(np.float32).eps - - graph = build_graph(nodes_attributes, - [('ac_weights', 'ac_weights_data'), - ('ac_weights_data', 'ac_fakeQuantize', {'in': 0}), - ('ac_input_low', 'ac_input_low_data'), - ('ac_input_low_data', 'ac_fakeQuantize', {'in': 1}), - ('ac_input_high', 'ac_input_high_data'), - ('ac_input_high_data', 'ac_fakeQuantize', {'in': 2}), - ('ac_output_low', 'ac_output_low_data'), - ('ac_output_low_data', 'ac_fakeQuantize', {'in': 3}), - ('ac_output_high', 'ac_output_high_data'), - ('ac_output_high_data', 'ac_fakeQuantize', {'in': 4}), - ('ac_fakeQuantize', 'ac_fakeQuantize_data'), - ('ac_fakeQuantize_data', 'ac_convolution', {'in': 1}), - ('placeholder', 'placeholder_data'), - ('placeholder_data', 'ac_convolution', {'in': 0}), - ('ac_convolution', 'result_data'), - ('result_data', 'result')], - {'ac_weights': {'shape': np.array([4]), 'value': np.array([1, 1 + eps, - 1 + 2 * eps, 1 + 3 * eps])}, - 'ac_input_low': {'shape': np.array([1]), 'value': 1}, - 'ac_input_high': {'shape': np.array([1]), 'value': 1 + 3 * eps}, - 'ac_output_low': {'shape': np.array([1]), 'value': 1}, - 'ac_output_high': {'shape': np.array([1]), 'value': 1 + 3 * eps}, - 'ac_fakeQuantize': {'levels': 256}}, - nodes_with_edges_only=True) - - graph_ref = build_graph(nodes_attributes, - [('ac_weights', 'ac_weights_data'), - ('ac_weights_data', 'ac_quantize', {'in': 0}), - ('ac_input_low', 'ac_input_low_data'), - ('ac_input_low_data', 'ac_quantize', {'in': 1}), - ('ac_input_high', 'ac_input_high_data'), - ('ac_input_high_data', 'ac_quantize', {'in': 2}), - ('ac_output_low_ref', 'ac_output_low_ref_data'), - ('ac_output_low_ref_data', 'ac_quantize', {'in': 3}), - ('ac_output_high_ref', 'ac_output_high_ref_data'), - ('ac_output_high_ref_data', 'ac_quantize', {'in': 4}), - ('ac_quantize', 'ac_quantize_data'), - ('ac_quantize_data', 'ac_mul', {'in': 1}), - ('ac_scale', 'ac_scale_data'), - ('ac_scale_data', 'ac_mul', {'in': 0}), - ('ac_mul', 'ac_mul_data'), - ('ac_mul_data', 'ac_add', {'in': 1}), - ('ac_shift', 'ac_shift_data'), - ('ac_shift_data', 'ac_add', {'in': 0}), - ('ac_add', 'ac_add_data'), - ('ac_add_data', 'ac_fakeQuantize', {'in': 0}), - ('ac_input_low', 'ac_input_low_data'), - ('ac_input_low_data', 'ac_fakeQuantize', {'in': 1}), - ('ac_input_high', 'ac_input_high_data'), - ('ac_input_high_data', 'ac_fakeQuantize', {'in': 2}), - ('ac_output_low', 'ac_output_low_data'), - ('ac_output_low_data', 'ac_fakeQuantize', {'in': 3}), - ('ac_output_high', 'ac_output_high_data'), - ('ac_output_high_data', 'ac_fakeQuantize', {'in': 4}), - ('ac_fakeQuantize', 'ac_fakeQuantize_data'), - ('ac_fakeQuantize_data', 'ac_convolution', {'in': 1}), - ('placeholder', 'placeholder_data'), - ('placeholder_data', 'ac_convolution', {'in': 0}), - ('ac_convolution', 'result_data'), - ('result_data', 'result')], - {'ac_weights': {'shape': np.array([4]), 'value': np.array([1, 1 + eps, - 1 + 2 * eps, 1 + 3 * eps])}, - 'ac_quantize': {'levels': 256}, - 'ac_fakeQuantize': {'levels': 256}, - 'ac_input_low': {'shape': np.array([1]), 'value': 1}, - 'ac_input_high': {'shape': np.array([1]), 'value': 1 + 3 * eps}, - 'ac_output_low_ref': {'shape': np.array([1]), 'value': 0}, - 'ac_output_high_ref': {'shape': np.array([1]), 'value': 255}, - 'ac_scale': {'shape': np.array([1]), 'value': 3 * eps / 255}, - 'ac_shift': {'shape': np.array([1]), 'value': 1}, - 'ac_output_low': {'shape': np.array([1]), 'value': 1}, - 'ac_output_high': {'shape': np.array([1]), 'value': 1 + 3 * eps}, - }, - nodes_with_edges_only=True) + error_message = 'Unexpected number of {} nodes {} CompressQuantizeWeights.dequantize_data call `{}`' + fq_nodes = graph.get_op_nodes(type='FakeQuantize') + cast_nodes = graph.get_op_nodes(name='cast') + self.assertEqual(len(fq_nodes), 1, error_message.format('FakeQuantize', 'before', len(fq_nodes))) + self.assertEqual(len(cast_nodes), 1, error_message.format('Convert', 'before', len(cast_nodes))) + cast_nodes[0]['need_shape_inference'] = True + CompressQuantizeWeights.dequantize_data(fq_nodes[0], original_type) graph.clean_up() - graph_ref.clean_up() - w_array = Node(graph, 'ac_weights').out_port(0).get_destination().data.get_value() - w_array_ref = Node(graph_ref, 'ac_weights').out_port(0).get_destination().data.get_value() + fq_nodes = graph.get_op_nodes(type='FakeQuantize') + self.assertEqual(len(fq_nodes), 0, error_message.format('FakeQuantize', 'after', len(fq_nodes))) - self.assertTrue(np.all(w_array == w_array_ref)) + graph_ref = build_graph(nodes, [ + *connect('int_weights:0', '0:cast'), + *connect('cast:0', '0:sub'), + *connect('zp:0', '1:sub'), + *connect('sub:0', '0:mul'), + *connect('scale:0', '1:mul'), + *connect('mul:0', 'output'), + ], {'cast': {'dst_type': original_type}}, nodes_with_edges_only=True) + + (flag, resp) = compare_graphs(graph, graph_ref, 'output', check_op_attrs=True) + self.assertTrue(flag, resp) @generator @@ -817,28 +156,48 @@ class CompressionDataTypeTest(unittest.TestCase): ('FP16', np.float32, np.float16), ('FP32', np.float16, np.float32), ('FP16', np.float16, np.float16), - ]) + ]) def test_data_type(self, model_dtype, original, transformed=None): if transformed is None: transformed = original + nodes = nodes_dict(original, transformed) - nodes = { - **valued_const_with_data('weights', np.ones([1, 2, 3, 4], dtype=original)), + graph = build_graph(nodes, [ + *connect('weights:0', '0:FQ'), + *connect('il:0', '1:FQ'), + *connect('ih:0', '2:FQ'), + *connect('ol:0', '3:FQ'), + *connect('oh:0', '4:FQ'), + *connect('FQ:0', 'output'), + ], nodes_with_edges_only=True, cli=Namespace(data_type=model_dtype, static_shape=True)) + + CompressQuantizeWeights().find_and_replace_pattern(graph) + graph.clean_up() + + graph_ref = build_graph(nodes, [ + *connect('int_weights:0', '0:cast'), + *connect('cast:0', '0:sub'), + *connect('zp:0', '1:sub'), + *connect('sub:0', '0:mul'), + *connect('scale:0', '1:mul'), + *connect('mul:0', 'output'), + ], nodes_with_edges_only=True) + (flag, resp) = compare_graphs(graph, graph_ref, 'output', check_op_attrs=True) + self.assertTrue(flag, resp) - **valued_const_with_data('int_weights', np.ones([1, 2, 3, 4], dtype=np.uint8)), - **regular_op_with_shaped_data('cast', [1, 2, 3, 4], {'type': 'Convert', 'dst_type': transformed, - 'op': 'Cast'}), - **valued_const_with_data('il', np.array([0])), - **valued_const_with_data('ih', np.array([254])), - **valued_const_with_data('ol', np.array([0])), - **valued_const_with_data('oh', np.array([254])), +@generator +class AccuracyCheckFP32Test(unittest.TestCase): + eps = np.finfo(np.float32).eps - **regular_op_with_shaped_data('FQ', [1, 2, 3, 4], {'type': 'FakeQuantize', 'infer': FakeQuantize.infer, - 'stop_value_propagation': True, 'levels': 255, - 'op': 'FakeQuantize'}), - **result() - } + @generate(*[ + ([-2.586, -1.338, 2.773, 4.414], [-2.586], [4.414], [-2.586], [4.414], 256), + ([-1.5, -0.32, 0.167, 2.8], [-1.5], [2.8], [-1.5], [2.8], 256), + ([1, 1 + eps, 1 + 2 * eps, 1 + 3 * eps], [1], [1 + 3 * eps], [1], [1 + 3 * eps], 256), + ([1.0, 2.0, 3.0, 4.0], [1], [4], [1], [4], 256), + ]) + def test_accuracy(self, data, in_low, in_high, out_low, out_high, levels): + nodes = nodes_dict(np.float32, None, levels, data, in_low, in_high, out_low, out_high) graph = build_graph(nodes, [ *connect('weights:0', '0:FQ'), @@ -847,19 +206,57 @@ def test_data_type(self, model_dtype, original, transformed=None): *connect('ol:0', '3:FQ'), *connect('oh:0', '4:FQ'), *connect('FQ:0', 'output'), - ], nodes_with_edges_only=True, cli=Namespace(data_type=model_dtype, static_shape=True)) + ], nodes_with_edges_only=True) + graph_ref = graph.copy() CompressQuantizeWeights().find_and_replace_pattern(graph) + + for node in graph.get_op_nodes() + graph_ref.get_op_nodes(): + node['stop_value_propagation'] = False + node['need_shape_inference'] = node.soft_get('need_shape_inference', True) + graph.clean_up() + graph_ref.clean_up() - graph_ref = build_graph(nodes, [ - *connect('int_weights:0', '0:cast'), - *connect('cast:0', '0:FQ'), + const_result_graph = build_graph({**shaped_const_with_data('weights', np.array(data).shape), **result()}, + [*connect('weights', 'output')], nodes_with_edges_only=True) + (flag, resp) = compare_graphs(graph, const_result_graph, 'output', check_op_attrs=True) + self.assertTrue(flag, resp) + + (flag, resp) = compare_graphs(graph_ref, const_result_graph, 'output', check_op_attrs=True) + self.assertTrue(flag, resp) + + # as this two graphs calculated the same data through different constant folding functions, they resulted in + # constants of different data type since FakeQuantize always have f32 output dtype, but eltwises use numpy + # for folding which doesn't have such restriction + const_node = graph.get_op_nodes(type='Const') + self.assertEqual(len(const_node), 1) + if const_node[0].data_type == np.float64: + const_node[0].data_type = np.float32 + + (flag, resp) = compare_graphs(graph, graph_ref, 'output', check_op_attrs=True) + self.assertTrue(flag, resp) + + # I would like to leave this commented code here to quickly check the actual output value: + # print(result_node.in_port(0).data.get_value()) # actual calculated value + + +@generator +class NegativeCompressionTestLevels(unittest.TestCase): + @generate(*[(2), (257), (None), (0), (-5)]) + def test_negative_fq_unacceptable_levels(self, levels): + nodes = nodes_dict(np.float32, None, levels) + + graph = build_graph(nodes, [ + *connect('weights:0', '0:FQ'), *connect('il:0', '1:FQ'), *connect('ih:0', '2:FQ'), *connect('ol:0', '3:FQ'), *connect('oh:0', '4:FQ'), *connect('FQ:0', 'output'), ], nodes_with_edges_only=True) + graph_ref = graph.copy() + CompressQuantizeWeights().find_and_replace_pattern(graph) + (flag, resp) = compare_graphs(graph, graph_ref, 'output', check_op_attrs=True) self.assertTrue(flag, resp) diff --git a/model-optimizer/extensions/front/GeLUMerger_Erf.py b/model-optimizer/extensions/front/GeLUMerger_Erf.py index 62dee8985840cc..d95fe2c8dff8f6 100644 --- a/model-optimizer/extensions/front/GeLUMerger_Erf.py +++ b/model-optimizer/extensions/front/GeLUMerger_Erf.py @@ -1,5 +1,5 @@ """ - Copyright (C) 2017-2020 Intel Corporation + Copyright (C) 2017-2021 Intel Corporation Licensed under the Apache License, Version 2.0 (the "License"); you may not use this file except in compliance with the License. @@ -19,26 +19,26 @@ from math import sqrt, fabs from extensions.ops.gelu import GeLUOP -from mo.front.common.replacement import FrontReplacementSubgraph -from mo.graph.graph import Graph +from mo.front.common.replacement import FrontReplacementPattern +from mo.graph.graph import Graph, rename_nodes +from mo.middle.pattern_match import apply_pattern -class GeLUMergerErf(FrontReplacementSubgraph): +class GeLUMergerErf(FrontReplacementPattern): enabled = True - def pattern(self): - log.info('Enabled GeLU Merger replacement for approximation with Erf') + def pattern1(self): + # (0.5 * x) * (1 + erf(x / sqrt(2)) return dict( nodes=[ - ('mul', dict(op='Mul')), + ('mul', dict(op='Mul')), ('mul0', dict(op='Mul')), - ('div', dict(op='Div')), - ('erf', dict(op='Erf')), - ('add', dict(op='Add')), + ('div', dict(op='Div')), + ('erf', dict(op='Erf')), + ('add', dict(op='Add')), ('mul_param', dict(op='Const')), ('div_param', dict(op='Const')), ('add_param', dict(op='Const')), - ], edges=[ ('mul', 'mul0'), @@ -50,13 +50,67 @@ def pattern(self): ('add_param', 'add'), ]) - def replace_sub_graph(self, graph: Graph, match: dict): + def pattern2(self): + # 0.5 * (x * (1 + erf(x / sqrt(2))) + return dict( + nodes=[ + ('mul', dict(op='Mul')), + ('mul0', dict(op='Mul')), + ('div', dict(op='Div')), + ('erf', dict(op='Erf')), + ('add', dict(op='Add')), + ('mul_param', dict(op='Const')), + ('div_param', dict(op='Const')), + ('add_param', dict(op='Const')), + ], + edges=[ + ('div', 'erf'), + ('erf', 'add'), + ('add', 'mul'), + ('mul', 'mul0'), + ('mul_param', 'mul0'), + ('div_param', 'div'), + ('add_param', 'add'), + ]) + + def pattern3(self): + # x * (0.5 * (1 + erf(x / sqrt(2))) + return dict( + nodes=[ + ('mul', dict(op='Mul')), + ('mul0', dict(op='Mul')), + ('div', dict(op='Div')), + ('erf', dict(op='Erf')), + ('add', dict(op='Add')), + ('mul_param', dict(op='Const')), + ('div_param', dict(op='Const')), + ('add_param', dict(op='Const')), + ], + edges=[ + ('div', 'erf'), + ('erf', 'add'), + ('add', 'mul'), + ('mul', 'mul0'), + ('mul_param', 'mul'), + ('div_param', 'div'), + ('add_param', 'add'), + ]) + + def find_and_replace_pattern(self, graph: Graph): + log.info('Enabled GeLU Merger replacement for approximation with Erf') + apply_pattern(graph, **self.pattern1(), action=self.replace_gelu) + apply_pattern(graph, **self.pattern2(), action=self.replace_gelu) + apply_pattern(graph, **self.pattern3(), action=self.replace_gelu) + + def replace_gelu(self, graph: Graph, match: dict): # Gaussian Error Linear Unit # f(x) = 0.5 * x * (1 + erf(x / sqrt(2)) + out_node = match['mul0'] + node_name = out_node.soft_get('name', out_node.id) div = match['div'] - inp_port = div.in_port(0).get_source() - inp = inp_port.node - log.debug('Found potential Erf-based GeLU pattern after {} with name {}'.format(inp.op, inp.name)) + inp_node = div.in_port(0).get_source().node + inp_name = inp_node.soft_get('name', out_node.id) + log.debug('Found potential Erf-based GeLU pattern after {} with name {}'.format(inp_node.op, inp_name)) # take the values of the mul, add and div div_param = match['div_param'] @@ -71,7 +125,8 @@ def replace_sub_graph(self, graph: Graph, match: dict): sqrt2 = sqrt(2.0) # check that the values match the approximation if fabs(div_param - sqrt2) < 1e-06 and mul_param == 0.5 and add_param == 1.0: - log.debug('Confirmed Erf-based GELU pattern after {} with name {}'.format(inp.op, inp.name)) - gelu = GeLUOP(graph, dict(name=inp.name + '/GELU_')).create_node() - inp_port.connect(gelu.in_port(0)) - match['mul0'].out_port(0).get_connection().set_source(gelu.out_port(0)) + log.debug('Confirmed Erf-based GELU pattern after {} with name {}'.format(inp_node.op, inp_name)) + gelu = GeLUOP(graph, dict(name=inp_name + '/GELU_')).create_node() + div.in_port(0).get_connection().set_destination(gelu.in_port(0)) + out_node.out_port(0).get_connection().set_source(gelu.out_port(0)) + rename_nodes([(out_node, node_name + '/TBD'), (gelu, node_name)]) diff --git a/model-optimizer/extensions/front/GeLUMerger_Erf_test.py b/model-optimizer/extensions/front/GeLUMerger_Erf_test.py new file mode 100644 index 00000000000000..8f3a4ebca3d75e --- /dev/null +++ b/model-optimizer/extensions/front/GeLUMerger_Erf_test.py @@ -0,0 +1,117 @@ +""" + Copyright (C) 2018-2021 Intel Corporation + + Licensed under the Apache License, Version 2.0 (the "License"); + you may not use this file except in compliance with the License. + You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + + Unless required by applicable law or agreed to in writing, software + distributed under the License is distributed on an "AS IS" BASIS, + WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + See the License for the specific language governing permissions and + limitations under the License. +""" + +import unittest +from math import sqrt + +from extensions.front.GeLUMerger_Erf import GeLUMergerErf +from mo.front.common.partial_infer.utils import float_array, int64_array +from mo.utils.ir_engine.compare_graphs import compare_graphs +from mo.utils.unittest.graph import build_graph, const, regular_op, result, build_graph + +ref_nodes = {**regular_op('input', {'type': 'Parameter'}), + **regular_op('gelu', {'type': 'Gelu', 'name': 'final_mul'}), + **result('result') + } +ref_edges = [('input', 'gelu'), ('gelu', 'result')] + + +class GeLUMergerErfTest(unittest.TestCase): + nodes = { + **regular_op('input', {'op': 'Parameter', 'type': 'Parameter'}), + **regular_op('mul', {'op': 'Mul'}), + **regular_op('mul0', {'op': 'Mul', 'name': 'final_mul'}), + **regular_op('div', {'op': 'Div'}), + **regular_op('erf', {'op': 'Erf'}), + **regular_op('add', {'op': 'Add'}), + **const('mul_param', float_array([0.5])), + **const('div_param', float_array([sqrt(2.)])), + **const('add_param', int64_array([1])), + **result('result'), + } + + def test_gelu_p1(self): + edges = [('input', 'mul'), + ('mul', 'mul0'), + ('input', 'div'), + ('div', 'erf'), + ('erf', 'add'), + ('add', 'mul0'), + ('mul_param', 'mul'), + ('div_param', 'div'), + ('add_param', 'add'), + ('mul0', 'result')] + + graph = build_graph(self.nodes, edges) + + graph_ref = build_graph(ref_nodes, ref_edges) + graph.stage = 'front' + + GeLUMergerErf().find_and_replace_pattern(graph) + graph.clean_up() + + (flag, resp) = compare_graphs(graph, graph_ref, 'result') + self.assertTrue(flag, resp) + self.assertTrue(len(graph.get_op_nodes(name='final_mul')) == 1 and + graph.get_op_nodes(name='final_mul')[0].op == 'Gelu') + + def test_gelu_p2(self): + edges = [('input', 'mul'), + ('div', 'erf'), + ('erf', 'add'), + ('add', 'mul'), + ('mul', 'mul0'), + ('mul_param', 'mul0'), + ('div_param', 'div'), + ('add_param', 'add'), + ('mul0', 'result')] + + graph = build_graph(self.nodes, edges) + + graph_ref = build_graph(ref_nodes, ref_edges) + graph.stage = 'front' + + GeLUMergerErf().find_and_replace_pattern(graph) + graph.clean_up() + + (flag, resp) = compare_graphs(graph, graph_ref, 'result') + self.assertTrue(flag, resp) + self.assertTrue(len(graph.get_op_nodes(name='final_mul')) == 1 and + graph.get_op_nodes(name='final_mul')[0].op == 'Gelu') + + def test_gelu_p3(self): + edges = [('input', 'mul'), + ('div', 'erf'), + ('erf', 'add'), + ('add', 'mul'), + ('mul', 'mul0'), + ('mul_param', 'mul'), + ('div_param', 'div'), + ('add_param', 'add'), + ('mul0', 'result')] + + graph = build_graph(self.nodes, edges) + + graph_ref = build_graph(ref_nodes, ref_edges) + graph.stage = 'front' + + GeLUMergerErf().find_and_replace_pattern(graph) + graph.clean_up() + + (flag, resp) = compare_graphs(graph, graph_ref, 'result') + self.assertTrue(flag, resp) + self.assertTrue(len(graph.get_op_nodes(name='final_mul')) == 1 and + graph.get_op_nodes(name='final_mul')[0].op == 'Gelu') diff --git a/model-optimizer/extensions/front/LayerNorm.py b/model-optimizer/extensions/front/LayerNorm.py index 7ced6688cc15e5..80f3779b521dab 100644 --- a/model-optimizer/extensions/front/LayerNorm.py +++ b/model-optimizer/extensions/front/LayerNorm.py @@ -1,5 +1,5 @@ """ - Copyright (C) 2020 Intel Corporation + Copyright (C) 2017-2021 Intel Corporation Licensed under the Apache License, Version 2.0 (the "License"); you may not use this file except in compliance with the License. @@ -17,6 +17,7 @@ import logging as log from mo.front.common.replacement import FrontReplacementSubgraph +from mo.front.tf.graph_utils import create_op_with_const_inputs from mo.graph.graph import Graph, rename_nodes from extensions.ops.mvn import MVN @@ -65,9 +66,9 @@ def replace_sub_graph(self, graph: Graph, match: dict): if add_param.value.size == 1 and pow_param.value.size == 1 and add_param.value.item() <= 1e-05 \ and pow_param.value.item() == 0.5 and match['pool0_param'].value == match['pool1_param'].value: log.debug('Found LayerNorm pattern after {} with name {}'.format(inp_port.node.op, inp_port.node.name)) - mvn = MVN(graph, {'eps': add_param.value.item(), - 'axes': match['pool1_param'].value, - 'normalize_variance': 1}).create_node() + mvn = create_op_with_const_inputs(graph, MVN, {1: match['pool1_param'].value}, + {'eps': add_param.value.item(), 'normalize_variance': 1, + 'eps_mode': 'inside_sqrt'}) div_name = match['div'].soft_get('name', match['div'].id) rename_nodes([(match['div'], div_name + '/to_be_removed'), (mvn, div_name)]) diff --git a/model-optimizer/extensions/front/LayerNorm_test.py b/model-optimizer/extensions/front/LayerNorm_test.py index 5b35acbb8575de..59a6079d4ceaca 100644 --- a/model-optimizer/extensions/front/LayerNorm_test.py +++ b/model-optimizer/extensions/front/LayerNorm_test.py @@ -1,5 +1,5 @@ """ - Copyright (C) 2020 Intel Corporation + Copyright (C) 2017-2021 Intel Corporation Licensed under the Apache License, Version 2.0 (the "License"); you may not use this file except in compliance with the License. @@ -17,7 +17,6 @@ import unittest import numpy as np -from math import sqrt from extensions.front.LayerNorm import LayerNorm from mo.utils.ir_engine.compare_graphs import compare_graphs @@ -41,7 +40,8 @@ nodes_attributes_ref = { 'inp': {'kind': 'op', 'op': 'AnyOp'}, - 'mvn': {'kind': 'op', 'op': 'MVN'}, + 'mvn': {'kind': 'op', 'op': 'MVN', 'eps': 1e-6, 'normalize_variance': 1, 'eps_mode': 'inside_sqrt'}, + 'mvn_param': {'kind': 'op', 'op': 'Const'}, 'out': {'kind': 'op', 'op': 'AnyOp'}, } @@ -72,6 +72,7 @@ def test_MVNPatternReplacement_test_1(self): nodes_with_edges_only=True) graph_ref = build_graph(nodes_attributes_ref, [('inp', 'mvn'), + ('mvn_param', 'mvn'), ('mvn', 'out')], {}, nodes_with_edges_only=True) graph.stage = 'front' diff --git a/model-optimizer/extensions/front/caffe/MVNNormalize_test.py b/model-optimizer/extensions/front/caffe/MVNNormalize_test.py new file mode 100644 index 00000000000000..b16f55235adda6 --- /dev/null +++ b/model-optimizer/extensions/front/caffe/MVNNormalize_test.py @@ -0,0 +1,81 @@ +""" + Copyright (C) 2017-2021 Intel Corporation + + Licensed under the Apache License, Version 2.0 (the "License"); + you may not use this file except in compliance with the License. + You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + + Unless required by applicable law or agreed to in writing, software + distributed under the License is distributed on an "AS IS" BASIS, + WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + See the License for the specific language governing permissions and + limitations under the License. +""" + +import unittest + +import numpy as np + +from extensions.front.caffe.MVNNormalizer import MVNCaffeToMVN +from mo.utils.ir_engine.compare_graphs import compare_graphs +from mo.utils.unittest.graph import build_graph, regular_op_with_empty_data, result, const, connect_front + +nodes = { + **regular_op_with_empty_data('input', {'type': 'Parameter'}), + **regular_op_with_empty_data('mvn_caffe', {'op': 'MVNCaffe'}), + **result(), + + # nodes after replacement + **const('start_1', np.array(1)), + **const('start_2', np.array(2)), + **const('step', np.array(1)), + **regular_op_with_empty_data('rank', {'op': 'Rank', 'type': None}), + **regular_op_with_empty_data('range', {'op': 'Range', 'type': None}), + **regular_op_with_empty_data('mvn', {'op': 'MVN', 'type': None}), +} + + +class MVNNormalizerTest(unittest.TestCase): + def test_mvn_normalizer(self): + graph = build_graph(nodes, [('input', 'mvn_caffe'), + ('mvn_caffe', 'output')], + {'mvn_caffe': {'across_channels': 0}}, + nodes_with_edges_only=True) + graph.stage = 'front' + + MVNCaffeToMVN().find_and_replace_pattern(graph) + + graph_ref = build_graph(nodes, [('input', 'mvn', {'out': 0}), + ('input', 'rank', {'out': 0}), + *connect_front('start_2', '0:range'), + *connect_front('rank', '1:range'), + *connect_front('step', '2:range'), + *connect_front('range', '1:mvn'), + ('mvn', 'output')], + nodes_with_edges_only=True) + + (flag, resp) = compare_graphs(graph, graph_ref, 'output', check_op_attrs=True) + self.assertTrue(flag, resp) + + def test_mvn_normalizer_across_channels(self): + graph = build_graph(nodes, [('input', 'mvn_caffe'), + ('mvn_caffe', 'output')], + {'mvn_caffe': {'across_channels': 1}}, + nodes_with_edges_only=True) + graph.stage = 'front' + + MVNCaffeToMVN().find_and_replace_pattern(graph) + + graph_ref = build_graph(nodes, [('input', 'mvn', {'out': 0}), + ('input', 'rank', {'out': 0}), + *connect_front('start_1', '0:range'), + *connect_front('rank', '1:range'), + *connect_front('step', '2:range'), + *connect_front('range', '1:mvn'), + ('mvn', 'output')], + nodes_with_edges_only=True) + + (flag, resp) = compare_graphs(graph, graph_ref, 'output', check_op_attrs=True) + self.assertTrue(flag, resp) diff --git a/model-optimizer/extensions/front/caffe/MVNNormalizer.py b/model-optimizer/extensions/front/caffe/MVNNormalizer.py new file mode 100644 index 00000000000000..bd9ebff5c83912 --- /dev/null +++ b/model-optimizer/extensions/front/caffe/MVNNormalizer.py @@ -0,0 +1,55 @@ +""" + Copyright (C) 2017-2021 Intel Corporation + + Licensed under the Apache License, Version 2.0 (the "License"); + you may not use this file except in compliance with the License. + You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + + Unless required by applicable law or agreed to in writing, software + distributed under the License is distributed on an "AS IS" BASIS, + WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + See the License for the specific language governing permissions and + limitations under the License. +""" +from extensions.ops.mvn import MVN +from extensions.ops.range import Range +from extensions.ops.rank import Rank +from mo.front.common.partial_infer.utils import int64_array +from mo.front.common.replacement import FrontReplacementPattern +from mo.front.tf.graph_utils import create_op_with_const_inputs +from mo.graph.graph import Graph, rename_nodes + +import numpy as np + + +class MVNCaffeToMVN(FrontReplacementPattern): + """ + Replace MVNCaffe operation with MVN + """ + enabled = True + + def find_and_replace_pattern(self, graph: Graph): + for node in graph.get_op_nodes(op='MVNCaffe'): + node_name = node.soft_get('name', node.id) + + start_axis = 2 + if node['across_channels'] == 1: + start_axis = 1 + + rank = Rank(graph, {'name': node_name + '/Rank'}).create_node() + + # create range of axes based on `start_axis` and rank of input + rng = create_op_with_const_inputs(graph, Range, {0: int64_array(start_axis), 2: int64_array(1)}, + {'name': node_name + '/Range', 'output_type': np.int64}) + rng.in_port(1).connect(rank.out_port(0)) + + new_mvn = MVN(graph, {'eps': node.soft_get('eps', 1e-9), 'eps_mode': 'inside_sqrt', + 'normalize_variance': node.soft_get('normalize_variance', 1)}).create_node( + [node.in_port(0).get_source().node, rng]) + new_mvn.in_port(0).get_connection().add_destination(rank.in_port(0)) + node.out_port(0).get_connection().set_source(new_mvn.out_port(0)) + rename_nodes([(node, node_name + '/tbd'), (new_mvn, node_name)]) + + graph.remove_node(node.id) diff --git a/model-optimizer/extensions/front/caffe/batchnorm_ext.py b/model-optimizer/extensions/front/caffe/batchnorm_ext.py new file mode 100644 index 00000000000000..419fb738f1ae32 --- /dev/null +++ b/model-optimizer/extensions/front/caffe/batchnorm_ext.py @@ -0,0 +1,53 @@ +""" + Copyright (C) 2018-2021 Intel Corporation + + Licensed under the Apache License, Version 2.0 (the "License"); + you may not use this file except in compliance with the License. + You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + + Unless required by applicable law or agreed to in writing, software + distributed under the License is distributed on an "AS IS" BASIS, + WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + See the License for the specific language governing permissions and + limitations under the License. +""" +import numpy as np + +from extensions.ops.BatchNormInference import BatchNormInference +from mo.front.caffe.extractors.utils import embed_input +from mo.front.extractor import FrontExtractorOp + + +class BatchNormalizationExtractor(FrontExtractorOp): + op = 'batchnorm' + enabled = True + + @classmethod + def extract(cls, node): + eps = node.pb.batch_norm_param.eps + attrs = { + 'eps': eps + } + pb_model = None if not node.soft_get('model_pb', None) else node.model_pb + if pb_model: + blobs = pb_model.blobs + assert len(blobs) >= 2, 'BatchNorm accepts not less then two input blobs' + mean = np.array(blobs[0].data) + variance = np.array(blobs[1].data) + + if len(blobs) == 3: + scale = blobs[2].data[0] + if scale != 0: + scale = 1.0 / scale + mean *= scale + variance *= scale + + embed_input(attrs, 1, 'gamma', np.ones(mean.shape), 'gamma') + embed_input(attrs, 2, 'beta', np.zeros(variance.shape), 'beta') + embed_input(attrs, 3, 'mean', mean, 'biases') + embed_input(attrs, 4, 'variance', variance, 'weights') + + BatchNormInference.update_node_stat(node, attrs) + return cls.enabled diff --git a/model-optimizer/extensions/front/caffe/bn.py b/model-optimizer/extensions/front/caffe/bn.py index 3ad77c441c512e..4aa3cdc34323fa 100644 --- a/model-optimizer/extensions/front/caffe/bn.py +++ b/model-optimizer/extensions/front/caffe/bn.py @@ -1,5 +1,5 @@ """ - Copyright (C) 2018-2020 Intel Corporation + Copyright (C) 2018-2021 Intel Corporation Licensed under the Apache License, Version 2.0 (the "License"); you may not use this file except in compliance with the License. @@ -27,7 +27,7 @@ class BNToScaleShift(FrontReplacementOp): """ Replaces BN layer with ScaleShift. """ - op = "batchNormInference" + op = "BN" enabled = True def replace_op(self, graph: Graph, node: Node): @@ -35,6 +35,7 @@ def replace_op(self, graph: Graph, node: Node): param = graph.node[node.id]['pb'].bn_param pb_model = graph.node[node.id]['model_pb'] + blobs = pb_model.blobs if len(blobs) != 4: diff --git a/model-optimizer/mo/front/caffe/extractors/concat.py b/model-optimizer/extensions/front/caffe/bn_ext.py similarity index 64% rename from model-optimizer/mo/front/caffe/extractors/concat.py rename to model-optimizer/extensions/front/caffe/bn_ext.py index 1c7afba15e77e0..39f83be46065f7 100644 --- a/model-optimizer/mo/front/caffe/extractors/concat.py +++ b/model-optimizer/extensions/front/caffe/bn_ext.py @@ -1,5 +1,5 @@ """ - Copyright (C) 2018-2020 Intel Corporation + Copyright (C) 2018-2021 Intel Corporation Licensed under the Apache License, Version 2.0 (the "License"); you may not use this file except in compliance with the License. @@ -14,12 +14,15 @@ limitations under the License. """ -from mo.front.common.partial_infer.concat import concat_infer +from extensions.ops.BN import BN +from mo.front.extractor import FrontExtractorOp -def concat_ext(pb_layer, pb_model): - return { - 'type': "Concat", - 'axis': pb_layer.concat_param.axis, - 'infer': concat_infer - } +class BNExtractor(FrontExtractorOp): + op = 'BN' + enabled = True + + @classmethod + def extract(cls, node): + BN.update_node_stat(node, {}) + return cls.enabled diff --git a/model-optimizer/extensions/front/caffe/bn_test.py b/model-optimizer/extensions/front/caffe/bn_test.py index 37c0c3fa5b5cd4..c6be7929e05463 100644 --- a/model-optimizer/extensions/front/caffe/bn_test.py +++ b/model-optimizer/extensions/front/caffe/bn_test.py @@ -1,5 +1,5 @@ """ - Copyright (C) 2018-2020 Intel Corporation + Copyright (C) 2018-2021 Intel Corporation Licensed under the Apache License, Version 2.0 (the "License"); you may not use this file except in compliance with the License. @@ -13,9 +13,8 @@ See the License for the specific language governing permissions and limitations under the License. """ -import unittest - import numpy as np +import unittest from extensions.front.caffe.bn import BNToScaleShift from mo.graph.graph import Node @@ -47,7 +46,7 @@ def test_bn(self): FakeParam('data', shift)]) nodes = [ ('input', {'kind': 'op', 'type': 'Identity', 'op': 'Identity'}), - ('bn', {'type': None, 'kind': 'op', 'op': 'batchNormInference', 'pb': bn_pb, 'model_pb': bn_bin}), + ('bn', {'type': None, 'kind': 'op', 'op': 'BN', 'pb': bn_pb, 'model_pb': bn_bin}), ('output', {'kind': 'op', 'type': 'Identity', 'op': 'Identity'}), ] edges = [ diff --git a/model-optimizer/extensions/front/caffe/concat_ext.py b/model-optimizer/extensions/front/caffe/concat_ext.py new file mode 100644 index 00000000000000..abbc85cd390f99 --- /dev/null +++ b/model-optimizer/extensions/front/caffe/concat_ext.py @@ -0,0 +1,32 @@ +""" + Copyright (C) 2018-2021 Intel Corporation + + Licensed under the Apache License, Version 2.0 (the "License"); + you may not use this file except in compliance with the License. + You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + + Unless required by applicable law or agreed to in writing, software + distributed under the License is distributed on an "AS IS" BASIS, + WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + See the License for the specific language governing permissions and + limitations under the License. +""" + +from mo.front.extractor import FrontExtractorOp +from mo.ops.concat import Concat + + +class ConcatFrontExtractor(FrontExtractorOp): + op = 'concat' + enabled = True + + @classmethod + def extract(cls, node): + pb = node.pb + mapping_rule = { + 'axis': pb.concat_param.axis, + } + Concat.update_node_stat(node, mapping_rule) + return cls.enabled diff --git a/model-optimizer/mo/front/caffe/extractors/crop.py b/model-optimizer/extensions/front/caffe/crop_ext.py similarity index 96% rename from model-optimizer/mo/front/caffe/extractors/crop.py rename to model-optimizer/extensions/front/caffe/crop_ext.py index 55957e56869525..073e286b4330f0 100644 --- a/model-optimizer/mo/front/caffe/extractors/crop.py +++ b/model-optimizer/extensions/front/caffe/crop_ext.py @@ -1,5 +1,5 @@ """ - Copyright (C) 2018-2020 Intel Corporation + Copyright (C) 2018-2021 Intel Corporation Licensed under the Apache License, Version 2.0 (the "License"); you may not use this file except in compliance with the License. diff --git a/model-optimizer/mo/front/caffe/extractors/crop_test.py b/model-optimizer/extensions/front/caffe/crop_ext_test.py similarity index 94% rename from model-optimizer/mo/front/caffe/extractors/crop_test.py rename to model-optimizer/extensions/front/caffe/crop_ext_test.py index 6476aabac3cb28..755c0c066d1ae2 100644 --- a/model-optimizer/mo/front/caffe/extractors/crop_test.py +++ b/model-optimizer/extensions/front/caffe/crop_ext_test.py @@ -1,5 +1,5 @@ """ - Copyright (C) 2018-2020 Intel Corporation + Copyright (C) 2018-2021 Intel Corporation Licensed under the Apache License, Version 2.0 (the "License"); you may not use this file except in compliance with the License. @@ -17,7 +17,7 @@ import unittest from unittest.mock import patch -from mo.front.caffe.extractors.crop import CropFrontExtractor +from extensions.front.caffe.crop_ext import CropFrontExtractor from mo.front.common.partial_infer.crop import crop_infer from mo.ops.crop import Crop from mo.ops.op import Op diff --git a/model-optimizer/mo/front/caffe/extractors/roipooling.py b/model-optimizer/extensions/front/caffe/dropout_ext.py similarity index 57% rename from model-optimizer/mo/front/caffe/extractors/roipooling.py rename to model-optimizer/extensions/front/caffe/dropout_ext.py index fd7fc4edfedd18..2737986e83155b 100644 --- a/model-optimizer/mo/front/caffe/extractors/roipooling.py +++ b/model-optimizer/extensions/front/caffe/dropout_ext.py @@ -1,5 +1,5 @@ """ - Copyright (C) 2018-2020 Intel Corporation + Copyright (C) 2018-2021 Intel Corporation Licensed under the Apache License, Version 2.0 (the "License"); you may not use this file except in compliance with the License. @@ -14,15 +14,16 @@ limitations under the License. """ -from mo.front.common.partial_infer.roipooling import roipooling_infer +from extensions.ops.identity import Identity +from mo.front.extractor import FrontExtractorOp +from mo.graph.graph import Node -def roipooling_ext(proto_layer, model_layer): - param = proto_layer.roi_pooling_param - return { - 'type': 'ROIPooling', - 'pooled_h': param.pooled_h, - 'pooled_w': param.pooled_w, - 'spatial_scale': param.spatial_scale, - 'infer': roipooling_infer - } +class DropoutFrontExtractor(FrontExtractorOp): + op = 'dropout' + enabled = True + + @classmethod + def extract(cls, node: Node): + Identity.update_node_stat(node, {}) + return cls.enabled diff --git a/model-optimizer/extensions/front/caffe/mvn_ext.py b/model-optimizer/extensions/front/caffe/mvn_ext.py index f9445fb6ccac11..6b50a786edcc5d 100644 --- a/model-optimizer/extensions/front/caffe/mvn_ext.py +++ b/model-optimizer/extensions/front/caffe/mvn_ext.py @@ -1,5 +1,5 @@ """ - Copyright (C) 2018-2020 Intel Corporation + Copyright (C) 2018-2021 Intel Corporation Licensed under the Apache License, Version 2.0 (the "License"); you may not use this file except in compliance with the License. @@ -13,7 +13,7 @@ See the License for the specific language governing permissions and limitations under the License. """ -from extensions.ops.mvn import MVN +from extensions.ops.mvn import MVNCaffe from mo.front.caffe.collect_attributes import collect_attributes from mo.front.extractor import FrontExtractorOp @@ -30,5 +30,5 @@ def extract(cls, node): attrs = collect_attributes(param) # update the attributes of the node - MVN.update_node_stat(node, attrs) + MVNCaffe.update_node_stat(node, attrs) return cls.enabled diff --git a/model-optimizer/extensions/front/caffe/pooling_ext.py b/model-optimizer/extensions/front/caffe/pooling_ext.py index 102ed82339bcb0..873e0d38be43df 100644 --- a/model-optimizer/extensions/front/caffe/pooling_ext.py +++ b/model-optimizer/extensions/front/caffe/pooling_ext.py @@ -1,5 +1,5 @@ """ - Copyright (C) 2018-2020 Intel Corporation + Copyright (C) 2018-2021 Intel Corporation Licensed under the Apache License, Version 2.0 (the "License"); you may not use this file except in compliance with the License. @@ -31,7 +31,7 @@ def extract(cls, node): param = proto_layer.pooling_param method = 'max' - exclude_pad = 'true' + exclude_pad = True kernel = [0, 0] stride = [1, 1] padding = [0, 0] @@ -46,10 +46,10 @@ def extract(cls, node): if param.pool == 0: method = 'max' - exclude_pad = 'true' + exclude_pad = True elif param.pool == 1: method = 'avg' - exclude_pad = 'false' + exclude_pad = False else: raise ValueError('Unknown Pooling Method!') diff --git a/model-optimizer/extensions/front/caffe/pooling_ext_test.py b/model-optimizer/extensions/front/caffe/pooling_ext_test.py index 576896c2342859..c5ada7f7787052 100644 --- a/model-optimizer/extensions/front/caffe/pooling_ext_test.py +++ b/model-optimizer/extensions/front/caffe/pooling_ext_test.py @@ -1,5 +1,5 @@ """ - Copyright (C) 2018-2020 Intel Corporation + Copyright (C) 2018-2021 Intel Corporation Licensed under the Apache License, Version 2.0 (the "License"); you may not use this file except in compliance with the License. @@ -36,7 +36,7 @@ def test_pooling_ext_global(self): 'stride': 2, 'pad': 3, 'pool': 0, - 'global_pooling': 1, + 'global_pooling': True, 'ceil_mode': 1 } node = PB({'pb': FakeProtoLayer(FakeMultiParam(params))}) @@ -48,9 +48,9 @@ def test_pooling_ext_global(self): 'pad': np.array([[0, 0], [0, 0], [0, 0], [0, 0]], dtype=np.int64), 'pad_spatial_shape': np.array([[0, 0], [0, 0]], dtype=np.int64), 'pool_method': 'max', - 'exclude_pad': 'true', + 'exclude_pad': True, 'infer': Pooling.infer, - 'global_pool': 1, + 'global_pool': True, 'output_spatial_shape': None, 'pooling_convention': 'full', 'rounding_type': 'ceil' @@ -72,7 +72,7 @@ def test_pooling_ext(self): 'stride': 2, 'pad': 3, 'pool': 1, - 'global_pooling': 0, + 'global_pooling': False, 'ceil_mode': 0 } node = PB({'pb': FakeProtoLayer(FakeMultiParam(params))}) @@ -84,9 +84,9 @@ def test_pooling_ext(self): 'pad': np.array([[0, 0], [0, 0], [3, 3], [3, 3]], dtype=np.int64), 'pad_spatial_shape': np.array([[3, 3], [3, 3]], dtype=np.int64), 'pool_method': 'avg', - 'exclude_pad': 'false', + 'exclude_pad': False, 'infer': Pooling.infer, - 'global_pool': 0, + 'global_pool': False, 'output_spatial_shape': None, 'pooling_convention': 'valid' } @@ -106,7 +106,7 @@ def test_pooling_ext_exception(self): 'stride': 2, 'pad': 3, 'pool': 3, - 'global_pooling': 1 + 'global_pooling': True } node = PB({'pb': FakeProtoLayer(FakeMultiParam(params))}) self.assertRaises(ValueError, PoolingFrontExtractor.extract, node) diff --git a/model-optimizer/extensions/front/caffe/roipooling_ext.py b/model-optimizer/extensions/front/caffe/roipooling_ext.py new file mode 100644 index 00000000000000..76cbd4fd3925b0 --- /dev/null +++ b/model-optimizer/extensions/front/caffe/roipooling_ext.py @@ -0,0 +1,35 @@ +""" + Copyright (C) 2018-2021 Intel Corporation + + Licensed under the Apache License, Version 2.0 (the "License"); + you may not use this file except in compliance with the License. + You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + + Unless required by applicable law or agreed to in writing, software + distributed under the License is distributed on an "AS IS" BASIS, + WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + See the License for the specific language governing permissions and + limitations under the License. +""" + +from mo.front.extractor import FrontExtractorOp +from mo.ops.roipooling import ROIPooling + + +class ROIPoolingFrontExtractor(FrontExtractorOp): + op = 'roipooling' + enabled = True + + @classmethod + def extract(cls, node): + param = node.pb.roi_pooling_param + attrs = { + 'pooled_h': param.pooled_h, + 'pooled_w': param.pooled_w, + 'spatial_scale': param.spatial_scale, + } + + ROIPooling.update_node_stat(node, attrs) + return cls.enabled diff --git a/model-optimizer/extensions/front/caffe/scale_ext.py b/model-optimizer/extensions/front/caffe/scale_ext.py new file mode 100644 index 00000000000000..53c5a1a323b396 --- /dev/null +++ b/model-optimizer/extensions/front/caffe/scale_ext.py @@ -0,0 +1,55 @@ +""" + Copyright (C) 2018-2021 Intel Corporation + + Licensed under the Apache License, Version 2.0 (the "License"); + you may not use this file except in compliance with the License. + You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + + Unless required by applicable law or agreed to in writing, software + distributed under the License is distributed on an "AS IS" BASIS, + WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + See the License for the specific language governing permissions and + limitations under the License. +""" + +import numpy as np + +from mo.front.caffe.extractors.utils import embed_input, weights_biases +from mo.front.common.partial_infer.elemental import copy_shape_infer +from mo.front.extractor import FrontExtractorOp +from mo.ops.scale_shift import ScaleShiftOp +from mo.utils.utils import NamedAttrsClass + + +class ScaleFrontExtractor(FrontExtractorOp): + op = 'scale' + enabled = True + + @classmethod + def extract(cls, node): + pb = node.pb + model = node.model_pb + param = pb.scale_param + attrs = { + 'axis': param.axis, + } + + if model is None and len(pb.bottom) == 1: + # default weights and biases for scale layer if the caffemodel file doesn't contain them + model = NamedAttrsClass({'blobs': np.array([NamedAttrsClass({'data': np.array([1])}), + NamedAttrsClass({'data': np.array([0])})])}) + # scale with 1 input and 1 or 2 blobs + if model and len(model.blobs) != 0 and len(pb.bottom) == 1: + attrs.update(weights_biases(param.bias_term, model)) + # 2 inputs + bias + elif len(pb.bottom) == 2 and param.bias_term: + if model is None or len(model.blobs) == 0: + # default bias for scale layer with 2 inputs if the caffemodel file doesn't contain them + model = NamedAttrsClass({'blobs': np.array([NamedAttrsClass({'data': np.array([0])})])}) + + embed_input(attrs, 1, 'biases', model.blobs[0].data) + ScaleShiftOp.update_node_stat(node, attrs) + return cls.enabled + diff --git a/model-optimizer/extensions/front/instance_normalization.py b/model-optimizer/extensions/front/instance_normalization.py index 8a22b7b19038a0..d4f9917e832a27 100644 --- a/model-optimizer/extensions/front/instance_normalization.py +++ b/model-optimizer/extensions/front/instance_normalization.py @@ -1,5 +1,5 @@ """ - Copyright (C) 2018-2020 Intel Corporation + Copyright (C) 2018-2021 Intel Corporation Licensed under the Apache License, Version 2.0 (the "License"); you may not use this file except in compliance with the License. @@ -16,8 +16,14 @@ from extensions.ops.elementwise import Add, Mul from extensions.ops.mvn import MVN +from extensions.ops.range import Range +from extensions.ops.rank import Rank +from mo.front.common.partial_infer.utils import int64_array from mo.front.common.replacement import FrontReplacementOp -from mo.graph.graph import Node, Graph +from mo.front.tf.graph_utils import create_op_with_const_inputs +from mo.graph.graph import Node, Graph, rename_nodes + +import numpy as np class InstanceNormalization(FrontReplacementOp): @@ -29,18 +35,26 @@ class InstanceNormalization(FrontReplacementOp): enabled = True def replace_op(self, graph: Graph, node: Node): - - # Add new nodes - mvn = MVN(graph, {'eps': node.epsilon, 'name': node.name + '/Ins_Norm/MVN_', }).create_node() - mul = Mul(graph, {'axis': 1, 'name': node.name + '/Ins_Norm/mul_'}).create_node() - add = Add(graph, {'axis': 1, 'name': node.name + '/Ins_Norm/add_'}).create_node() - - # Connect nodes + name = node.soft_get('name', node.id) + + # create range of axes for MVN based on `start_axis` and rank of input + rank = Rank(graph, {'name': name + '/Rank'}).create_node() + rng = create_op_with_const_inputs(graph, Range, {0: int64_array(2), 2: int64_array(1)}, + {'name': name + '/Range', 'output_type': np.int64}) + mvn = MVN(graph, {'eps': node.epsilon, 'eps_mode': 'inside_sqrt', 'normalize_variance': 1, + 'name': name + '/Ins_Norm/MVN_', }).create_node() node.in_port(0).get_connection().set_destination(mvn.in_port(0)) + rng.out_port(0).connect(mvn.in_port(1)) + mul = Mul(graph, {'axis': 1, 'name': name + '/Ins_Norm/mul_'}).create_node() + mvn.out_port(0).connect(mul.in_port(0)) node.in_port(1).get_connection().set_destination(mul.in_port(1)) + add = Add(graph, {'axis': 1, 'name': name + '/Ins_Norm/add_'}).create_node() + mul.out_port(0).connect(add.in_port(0)) node.in_port(2).get_connection().set_destination(add.in_port(1)) - mvn.out_port(0).connect(mul.in_port(0)) - mul.out_port(0).connect(add.in_port(0)) + mvn.in_port(0).get_connection().add_destination(rank.in_port(0)) + rng.in_port(1).connect(rank.out_port(0)) + + rename_nodes([(node, name + '/TBD'), (add, name)]) return [add.id] diff --git a/model-optimizer/extensions/front/instance_normalization_test.py b/model-optimizer/extensions/front/instance_normalization_test.py index 26814a287a9903..91ad4a5646b258 100644 --- a/model-optimizer/extensions/front/instance_normalization_test.py +++ b/model-optimizer/extensions/front/instance_normalization_test.py @@ -1,5 +1,5 @@ """ - Copyright (C) 2018-2020 Intel Corporation + Copyright (C) 2018-2021 Intel Corporation Licensed under the Apache License, Version 2.0 (the "License"); you may not use this file except in compliance with the License. @@ -16,10 +16,8 @@ import unittest -import networkx as nx - from extensions.front.instance_normalization import InstanceNormalization -from mo.middle.pattern_match import node_match +from mo.utils.ir_engine.compare_graphs import compare_graphs from mo.utils.unittest.graph import build_graph nodes_attributes = { @@ -27,15 +25,21 @@ 'scale': {'kind': 'op', 'op': 'AnyOp'}, 'B': {'kind': 'op', 'op': 'AnyOp'}, 'node': {'kind': 'op', 'op': 'InstanceNormalization', 'epsilon': None}, + 'out': {'kind': 'op', 'op': 'AnyOp'}, } nodes_ref_attributes = { - 'input': {'op': 'AnyOp'}, - 'scale': {'op': 'AnyOp'}, - 'B': {'op': 'AnyOp'}, + 'input': {'kind': 'op', 'op': 'AnyOp'}, + 'scale': {'kind': 'op', 'op': 'AnyOp'}, + 'B': {'kind': 'op', 'op': 'AnyOp'}, + 'start': {'kind': 'op', 'op': 'Const'}, + 'step': {'kind': 'op', 'op': 'Const'}, + 'rank': {'kind': 'op', 'op': 'Rank'}, + 'mvn_axes': {'kind': 'op', 'op': 'Range'}, 'mvn': {'kind': 'op', 'op': 'MVN', 'name': 'node/Ins_Norm/MVN_', 'eps': None}, 'mul': {'kind': 'op', 'op': 'Mul', 'name': 'node/Ins_Norm/mul_'}, 'add': {'kind': 'op', 'op': 'Add', 'name': 'node/Ins_Norm/add_'}, + 'out': {'kind': 'op', 'op': 'AnyOp'}, } @@ -45,18 +49,61 @@ def test_instance_normalization_test_1(self): [('input', 'node'), ('scale', 'node'), ('B', 'node'), + ('node', 'out') + ], + {'node': {'epsilon': 0.123}, + }, nodes_with_edges_only=True) + + graph_ref = build_graph(nodes_ref_attributes, + [('input', 'mvn', {'out': 0}), + ('input', 'rank', {'out': 0}), + ('start', 'mvn_axes'), + ('rank', 'mvn_axes'), + ('step', 'mvn_axes'), + ('mvn_axes', 'mvn'), + ('mvn', 'mul'), + ('scale', 'mul'), + ('mul', 'add'), + ('B', 'add'), + ('add', 'out') + ], + {'mvn': {'eps': 0.123, 'eps_mode': 'inside_sqrt', 'normalize_variance': 1}, + }, nodes_with_edges_only=True) + + graph.stage = 'front' + + tested_class = InstanceNormalization() + tested_class.find_and_replace_pattern(graph) + + (flag, resp) = compare_graphs(graph, graph_ref, 'out', check_op_attrs=False) + self.assertTrue(flag, resp) + + def test_instance_normalization_test_2(self): + graph = build_graph(nodes_attributes, + [('input', 'out', {'out': 0, 'in': 0}), + ('input', 'node', {'out': 1}), + ('scale', 'node'), + ('B', 'node'), + ('node', 'out', {'in': 1}) ], {'node': {'epsilon': 0.123}, }, nodes_with_edges_only=True) - ref_graph = build_graph(nodes_ref_attributes, - [('input', 'mvn'), + graph_ref = build_graph(nodes_ref_attributes, + [('input', 'out', {'out': 0, 'in': 0}), + ('input', 'mvn', {'out': 1}), + ('input', 'rank', {'out': 1}), + ('start', 'mvn_axes'), + ('rank', 'mvn_axes'), + ('step', 'mvn_axes'), + ('mvn_axes', 'mvn'), ('mvn', 'mul'), ('scale', 'mul'), ('mul', 'add'), ('B', 'add'), + ('add', 'out', {'in': 1}) ], - {'mvn': {'eps': 0.123}, + {'mvn': {'eps': 0.123, 'eps_mode': 'inside_sqrt', 'normalize_variance': 1}, }, nodes_with_edges_only=True) graph.stage = 'front' @@ -64,4 +111,5 @@ def test_instance_normalization_test_1(self): tested_class = InstanceNormalization() tested_class.find_and_replace_pattern(graph) - self.assertTrue(nx.is_isomorphic(graph, ref_graph, node_match)) + (flag, resp) = compare_graphs(graph, graph_ref, 'out', check_op_attrs=False) + self.assertTrue(flag, resp) diff --git a/model-optimizer/extensions/front/mxnet/pooling_ext.py b/model-optimizer/extensions/front/mxnet/pooling_ext.py index feae85da36004c..54b9a7be00a5b2 100644 --- a/model-optimizer/extensions/front/mxnet/pooling_ext.py +++ b/model-optimizer/extensions/front/mxnet/pooling_ext.py @@ -1,5 +1,5 @@ """ - Copyright (C) 2018-2020 Intel Corporation + Copyright (C) 2018-2021 Intel Corporation Licensed under the Apache License, Version 2.0 (the "License"); you may not use this file except in compliance with the License. @@ -41,7 +41,7 @@ def extract(cls, node): 'pad': np.array([[0, 0], [0, 0], *[[pad, pad] for pad in padding]], dtype=np.int64), 'pad_spatial_shape': np.array([[pad, pad] for pad in padding], dtype=np.int64), 'pool_method': method, - 'exclude_pad': 'false', + 'exclude_pad': False, 'output_spatial_shape': None, 'spatial_dims': None, 'channel_dims': np.array([1], dtype=np.int64), diff --git a/model-optimizer/extensions/front/mxnet/pooling_ext_test.py b/model-optimizer/extensions/front/mxnet/pooling_ext_test.py index 17258cc0b18b2a..7212a27f232ae3 100644 --- a/model-optimizer/extensions/front/mxnet/pooling_ext_test.py +++ b/model-optimizer/extensions/front/mxnet/pooling_ext_test.py @@ -1,5 +1,5 @@ """ - Copyright (C) 2018-2020 Intel Corporation + Copyright (C) 2018-2021 Intel Corporation Licensed under the Apache License, Version 2.0 (the "License"); you may not use this file except in compliance with the License. @@ -40,7 +40,7 @@ def test_conv_ext_ideal_numbers(self): 'stride': np.array([1, 1, 3, 2]), 'window': np.array([1, 1, 3, 4]), 'pool_method': 'max', - 'exclude_pad': 'false', + 'exclude_pad': False, } for key in exp_res.keys(): diff --git a/model-optimizer/extensions/front/mxnet/take_ext.py b/model-optimizer/extensions/front/mxnet/take_ext.py new file mode 100644 index 00000000000000..590f0a3d68633d --- /dev/null +++ b/model-optimizer/extensions/front/mxnet/take_ext.py @@ -0,0 +1,33 @@ +""" + Copyright (C) 2017-2021 Intel Corporation + + Licensed under the Apache License, Version 2.0 (the "License"); + you may not use this file except in compliance with the License. + You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + + Unless required by applicable law or agreed to in writing, software + distributed under the License is distributed on an "AS IS" BASIS, + WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + See the License for the specific language governing permissions and + limitations under the License. +""" + +from extensions.ops.gather import AttributedGather +from mo.front.extractor import FrontExtractorOp +from mo.front.mxnet.extractors.utils import get_mxnet_layer_attrs +from mo.graph.graph import Node + + +class TakeExtractor(FrontExtractorOp): + op = 'take' + enabled = True + + @classmethod + def extract(cls, node: Node): + attrs = get_mxnet_layer_attrs(node.symbol_dict) + AttributedGather.update_node_stat(node, { + 'axis': attrs.int('axis', 0), + }) + return cls.enabled diff --git a/model-optimizer/extensions/front/onnx/loop_ext.py b/model-optimizer/extensions/front/onnx/loop_ext.py index b55c6605483588..6a2168df4eb4b8 100644 --- a/model-optimizer/extensions/front/onnx/loop_ext.py +++ b/model-optimizer/extensions/front/onnx/loop_ext.py @@ -28,30 +28,6 @@ from mo.utils.error import Error -def connect_body_output(loop_node: Node, loop_output_port_idx: int, internal_result: Node, axis: [int, None] = None, - start: [int, None] = None, end: [int, None] = None, stride: [int, None] = None, - part_size: [int, None] = None): - assert loop_node.soft_get('op') == 'Loop' - assert internal_result.soft_get('op') == 'Result' - assert internal_result.id in loop_node.body - - loop_node.output_port_map.append({'axis': axis, 'stride': stride, 'part_size': part_size, 'start': start, - 'end': end, 'external_port_id': loop_output_port_idx, - 'internal_layer_id': internal_result['internal_layer_id']}) - - -def connect_body_input(loop_node: Node, loop_input_port_idx: int, body_parameter: Node, - axis: [int, None] = None, start: [int, None] = None, end: [int, None] = None, - stride: [int, None] = None, part_size: [int, None] = None): - assert loop_node.soft_get('op') == 'Loop' - assert body_parameter.soft_get('op') == 'Parameter' - assert body_parameter.id in loop_node.body - - loop_node.input_port_map.append({'axis': axis, 'stride': stride, 'part_size': part_size, 'start': start, - 'end': end, 'external_port_id': loop_input_port_idx, - 'internal_layer_id': body_parameter['internal_layer_id']}) - - class LoopExtractor(FrontExtractorOp): op = 'Loop' enabled = True @@ -177,14 +153,14 @@ def extract(cls, loop_node): 'out_attrs': ['out', 'name'], 'data_attrs': ['fw_tensor_debug_info']} ) - connect_body_input(loop_node, next_loop_input_port_idx, body_node) + Loop.connect_body_input(loop_node, next_loop_input_port_idx, body_node) next_loop_input_port_idx += 1 # mark current iteration input Parameter node Loop.mark_current_iteration_parameter_node(loop_node, body_parameters[0]) # connect initial value for "execution condition" input of the loop - connect_body_input(loop_node, 1, body_parameters[1]) + Loop.connect_body_input(loop_node, 1, body_parameters[1]) # add back edge with "execution condition" Loop.add_back_edge(loop_node, body_parameters[1], body_results[0]) # mark "execution condition" Result node @@ -192,17 +168,17 @@ def extract(cls, loop_node): # connect initial value for "loop carried" dependencies variables for idx in range(loop_carried_dependencies_count): - connect_body_input(loop_node, idx + 2, body_parameters[idx + 2]) + Loop.connect_body_input(loop_node, idx + 2, body_parameters[idx + 2]) # add back edge for "loop carried" dependencies variables for idx in range(loop_carried_dependencies_count): Loop.add_back_edge(loop_node, body_parameters[idx + 2], body_results[idx + 1]) # connect final value for "loop carried" dependencies variables for idx in range(loop_carried_dependencies_count): - connect_body_output(loop_node, idx, body_results[idx + 1]) + Loop.connect_body_output(loop_node, idx, body_results[idx + 1]) # connect "scan outputs" and mark axis for concatenation for idx in range(loop_carried_dependencies_count, loop_carried_dependencies_count + scan_outputs_count): - connect_body_output(loop_node, idx, body_results[idx + 1], axis=0) + Loop.connect_body_output(loop_node, idx, body_results[idx + 1], axis=0) # run function to parse body nodes attributes similar to the main graph extract_node_attrs(body_graph, lambda node: onnx_op_extractor(node, check_for_duplicates(onnx_op_extractors))) diff --git a/model-optimizer/extensions/front/onnx/mean_variance_normalization_ext.py b/model-optimizer/extensions/front/onnx/mean_variance_normalization_ext.py index 5a0c53f8d87015..a4d5f286490d0a 100644 --- a/model-optimizer/extensions/front/onnx/mean_variance_normalization_ext.py +++ b/model-optimizer/extensions/front/onnx/mean_variance_normalization_ext.py @@ -1,5 +1,5 @@ """ - Copyright (C) 2018-2020 Intel Corporation + Copyright (C) 2018-2021 Intel Corporation Licensed under the Apache License, Version 2.0 (the "License"); you may not use this file except in compliance with the License. @@ -19,7 +19,7 @@ from extensions.ops.mvn import MVN from mo.front.extractor import FrontExtractorOp from mo.front.onnx.extractors.utils import onnx_attr -from mo.utils.error import Error +from mo.ops.const import Const class MeanVarianceNormalizationExtractor(FrontExtractorOp): @@ -33,18 +33,15 @@ def extract(cls, node): default=np.array([0, 2, 3], dtype=np.int64), dst_type=lambda x: np.array(x, dtype=np.int64)) - if 0 in axes: - raise Error('Reduction over the batch dimension in node "{}" is not supported by the backend.'.format(name)) - # Dimension 4 (if it's present in the input tensor) should also be in the list of axes for reduction. - # This case will be handled at the MVN Op side, because input shape is not available at that stage. - for i in (2, 3): - if i not in axes: - raise Error('Reduction over spatial dimensions in node "{}" is obligatory for a backend.'.format(name)) + axes = Const(node.graph, {'value': axes, 'name': name + '/Axes'}).create_node() + node.add_input_port(1, skip_if_exist=True) + node.in_port(1).connect(axes.out_port(0)) attrs = { 'eps': 1e-9, 'normalize_variance': 1, - 'axes': axes + 'eps_mode': 'outside_sqrt' } + MVN.update_node_stat(node, attrs) return cls.enabled diff --git a/model-optimizer/extensions/front/onnx/pooling_ext.py b/model-optimizer/extensions/front/onnx/pooling_ext.py index 3e41e664983520..40abbfeeda70b6 100644 --- a/model-optimizer/extensions/front/onnx/pooling_ext.py +++ b/model-optimizer/extensions/front/onnx/pooling_ext.py @@ -1,5 +1,5 @@ """ - Copyright (C) 2018-2020 Intel Corporation + Copyright (C) 2018-2021 Intel Corporation Licensed under the Apache License, Version 2.0 (the "License"); you may not use this file except in compliance with the License. @@ -112,7 +112,7 @@ def common_onnx_pool_extractor(node): # exclude_pad = True only when count_include_pad == 0 exclude_pad = onnx_attr(node, 'count_include_pad', 'i', default=0) == 0 - global_pooling = 0 + global_pooling = False if node.op in ['MaxPool', 'GlobalMaxPool']: method = 'max' elif node.op in ['AveragePool', 'GlobalAveragePool']: @@ -136,7 +136,7 @@ def common_onnx_pool_extractor(node): 'pad': final_pads, 'pad_spatial_shape': np.array(pads, dtype=np.int64) if pads is not None else None, 'pool_method': method, - 'exclude_pad': 'true' if exclude_pad else 'false', + 'exclude_pad': True if exclude_pad else False, 'global_pool': global_pooling, 'output_spatial_shape': None, 'rounding_type': rt, diff --git a/model-optimizer/extensions/front/tf/activation_ext.py b/model-optimizer/extensions/front/tf/activation_ext.py index 83492f8754edad..d8a856260b1f7b 100644 --- a/model-optimizer/extensions/front/tf/activation_ext.py +++ b/model-optimizer/extensions/front/tf/activation_ext.py @@ -14,7 +14,7 @@ limitations under the License. """ from extensions.ops.activation_ops import Abs, Elu, Erf, Exp, ReLU, LeakyReLU, LogicalNot, ReLU6, Sigmoid, \ - Sin, Sinh, Cos, Cosh, Tan, Tanh, Ceiling, Atanh, Acosh, Asinh, Mish + Sin, Sinh, Cos, Cosh, Tan, Tanh, Ceiling, Atanh, Acosh, Asinh, Mish, Log from mo.front.extractor import FrontExtractorOp @@ -220,3 +220,13 @@ class MishExtractor(FrontExtractorOp): def extract(cls, node): Mish.update_node_stat(node) return cls.enabled + + +class LogExtractor(FrontExtractorOp): + op = 'Log' + enabled = True + + @classmethod + def extract(cls, node): + Log.update_node_stat(node) + return cls.enabled diff --git a/model-optimizer/extensions/front/tf/mvn.py b/model-optimizer/extensions/front/tf/mvn.py index edd401fe702939..003d23939d498d 100644 --- a/model-optimizer/extensions/front/tf/mvn.py +++ b/model-optimizer/extensions/front/tf/mvn.py @@ -1,5 +1,5 @@ """ - Copyright (C) 2017-2020 Intel Corporation + Copyright (C) 2017-2021 Intel Corporation Licensed under the Apache License, Version 2.0 (the "License"); you may not use this file except in compliance with the License. @@ -59,7 +59,8 @@ def replace_sub_graph(self, graph: Graph, match: dict): mvn = MVN(graph, dict( name=fbn.name + '/MVN_', eps=fbn.eps, - required_reduction_indices=[1, 2] if fbn.data_format == b'NHWC' else [2, 3] + eps_mode='outside_sqrt', + normalize_variance=1 )) mvn.attrs['old_infer'] = mvn.attrs['infer'] mvn.attrs['infer'] = __class__.infer @@ -84,19 +85,20 @@ def replace_sub_graph(self, graph: Graph, match: dict): @staticmethod def infer(node: Node): - if not (node.in_node(1).has_valid('value') and node.in_node(2).has_valid('value')): + axes_1_value = node.in_port(1).data.get_value() + axes_2_value = node.in_port(2).data.get_value() + if axes_1_value is None or axes_2_value is None: log.warning('Reduction indices for mean and variance for MVN node {} are not constants'.format(node.name)) return - if not (all(node.in_node(1).value == node.required_reduction_indices) and - all(node.in_node(2).value == node.required_reduction_indices)): - log.warning('Reduction indices for mean {} and variance {} do not match required ones {}'.format( - node.in_node(1).value, - node.in_node(2).value, - node.required_reduction_indices + if not (all(axes_1_value == axes_2_value)): + log.warning('Reduction indices for mean {} and variance {} do not match'.format( + axes_1_value, + axes_2_value )) return - node.graph.remove_edge(node.in_node(2).id, node.id) - node.graph.remove_edge(node.in_node(1).id, node.id) + node.in_port(2).disconnect() node.old_infer(node) + node.infer = node.old_infer + del node['old_infer'] diff --git a/model-optimizer/extensions/front/tf/mvn_unrolled.py b/model-optimizer/extensions/front/tf/mvn_unrolled.py index 49b403a542e6ea..78229f991d8cb0 100644 --- a/model-optimizer/extensions/front/tf/mvn_unrolled.py +++ b/model-optimizer/extensions/front/tf/mvn_unrolled.py @@ -1,5 +1,5 @@ """ - Copyright (C) 2017-2020 Intel Corporation + Copyright (C) 2017-2021 Intel Corporation Licensed under the Apache License, Version 2.0 (the "License"); you may not use this file except in compliance with the License. @@ -56,7 +56,8 @@ def pattern(self): def replace_sub_graph(graph: Graph, match: dict): mvn = MVN(graph, dict( name=match['truediv'].name + '/MVN_', - required_reduction_indices=[1, 2] if graph.graph['layout'] == 'NHWC' else [2, 3] + eps_mode='outside_sqrt', + normalize_variance=1 )) mvn.attrs['old_infer'] = mvn.attrs['infer'] mvn.attrs['infer'] = __class__.infer @@ -72,29 +73,33 @@ def replace_sub_graph(graph: Graph, match: dict): @staticmethod def infer(node: Node): - if not (node.in_node(1).has_valid('value') and node.in_node(2).has_valid('value')): + axes_1_value = node.in_port(1).data.get_value() + axes_2_value = node.in_port(2).data.get_value() + if axes_1_value is None or axes_2_value is None: log.warning('Reduction indices for mean and variance for MVN node {} are not constants'.format(node.name)) return - if not (all(node.in_node(1).value == node.required_reduction_indices) and - all(node.in_node(2).value == node.required_reduction_indices)): - log.warning('Reduction indices for mean {} and variance {} do not match required ones {}'.format( - node.in_node(1).value, - node.in_node(2).value, - node.required_reduction_indices + if not (all(axes_1_value == axes_2_value)): + log.warning('Reduction indices for mean {} and variance {} do not match'.format( + axes_1_value, + axes_2_value )) return - - if not (node.in_node(3).has_valid('value') and node.in_node(4).has_valid('value')): + + power_value = node.in_port(3).data.get_value() + eps_value = node.in_port(4).data.get_value() + if power_value is None or eps_value is None: log.warning('Power or/and epsilon values for MVN node {} are not constants'.format(node.name)) return - if node.in_node(3).value != 0.5: - log.warning('Power for MVN node {} ({}) is not equal to 0.5'.format(node.name, node.in_node(3).value)) + if power_value != 0.5: + log.warning('Power for MVN node {} ({}) is not equal to 0.5'.format(node.name, power_value)) return - node['eps'] = node.in_node(4).value + node['eps'] = eps_value - for i in range(1, 5): - node.graph.remove_edge(node.in_node(i).id, node.id) + for i in range(2, 5): + node.in_port(i).disconnect() node.old_infer(node) + node.infer = node.old_infer + del node['old_infer'] diff --git a/model-optimizer/extensions/front/tf/pooling_ext.py b/model-optimizer/extensions/front/tf/pooling_ext.py index 028ed2fd5699ea..9606990d49d005 100644 --- a/model-optimizer/extensions/front/tf/pooling_ext.py +++ b/model-optimizer/extensions/front/tf/pooling_ext.py @@ -1,5 +1,5 @@ """ - Copyright (C) 2018-2020 Intel Corporation + Copyright (C) 2018-2021 Intel Corporation Licensed under the Apache License, Version 2.0 (the "License"); you may not use this file except in compliance with the License. @@ -86,6 +86,6 @@ def create_pooling_attrs(node, pool_method): 'pool_method': pool_method, 'type': 'Pooling', 'layout': data_format.s.decode(), - 'exclude_pad': 'true', + 'exclude_pad': True, } return attrs \ No newline at end of file diff --git a/model-optimizer/extensions/front/tf/pooling_ext_test.py b/model-optimizer/extensions/front/tf/pooling_ext_test.py index bcab0ed87f18f7..9f74a74fa7e8bf 100644 --- a/model-optimizer/extensions/front/tf/pooling_ext_test.py +++ b/model-optimizer/extensions/front/tf/pooling_ext_test.py @@ -1,5 +1,5 @@ """ - Copyright (C) 2018-2020 Intel Corporation + Copyright (C) 2018-2021 Intel Corporation Licensed under the Apache License, Version 2.0 (the "License"); you may not use this file except in compliance with the License. @@ -48,7 +48,7 @@ def test_pool_defaults(self): 'pad': None, # will be inferred when input shape is known 'pad_spatial_shape': None, 'type': 'Pooling', - 'exclude_pad': 'true', + 'exclude_pad': True, } node = PB({'pb': pb}) AvgPoolFrontExtractor.extract(node) diff --git a/model-optimizer/extensions/front/tf/transposed_mvn_unrolled.py b/model-optimizer/extensions/front/tf/transposed_mvn_unrolled.py index 5567335da1bcb1..6f7c895169fc38 100644 --- a/model-optimizer/extensions/front/tf/transposed_mvn_unrolled.py +++ b/model-optimizer/extensions/front/tf/transposed_mvn_unrolled.py @@ -1,5 +1,5 @@ """ - Copyright (C) 2018-2020 Intel Corporation + Copyright (C) 2018-2021 Intel Corporation Licensed under the Apache License, Version 2.0 (the "License"); you may not use this file except in compliance with the License. @@ -25,7 +25,7 @@ from extensions.ops.transpose import Transpose from mo.front.common.partial_infer.utils import int64_array from mo.front.common.replacement import FrontReplacementSubgraph -from mo.front.tf.graph_utils import create_op_node_with_second_input +from mo.front.tf.graph_utils import create_op_node_with_second_input, create_op_with_const_inputs from mo.graph.graph import Graph from mo.ops.reshape import Reshape from mo.ops.shape import Shape @@ -173,8 +173,10 @@ def replace_sub_graph(self, graph: Graph, match: dict): variance = match['variance'] eps_port_num = 0 if add.in_port(0).get_connection().get_source().node.id != variance.id else 1 eps = add.in_port(eps_port_num).get_connection().get_source().node - mvn_node = MVN(graph, dict(name=div_name + '/MVN/MVN_T_', required_reduction_indices=[1, 2, 3], - eps=eps.value)).create_node() + mvn_node = create_op_with_const_inputs(graph, MVN, {1: int64_array([1, 2, 3])}, + dict(name=div_name + '/MVN/MVN_T_', + eps=eps.value, normalize_variance=1, + eps_mode='inside_sqrt')) first_permute.out_port(0).connect(mvn_node.in_port(0)) second_permute = create_op_node_with_second_input(graph, Transpose, permute_order, diff --git a/model-optimizer/extensions/front/tf/while_ext.py b/model-optimizer/extensions/front/tf/while_ext.py index 52f7defad3d152..bb29379e05c24b 100644 --- a/model-optimizer/extensions/front/tf/while_ext.py +++ b/model-optimizer/extensions/front/tf/while_ext.py @@ -15,7 +15,6 @@ """ import copy -from extensions.front.onnx.loop_ext import connect_body_input, connect_body_output from extensions.ops.loop import Loop from extensions.ops.parameter import Parameter from mo.front.common.register_custom_ops import check_for_duplicates @@ -174,7 +173,7 @@ def extract(cls, loop_node): # connect external input ports with body parameter nodes except current iteration # since it must be disconnected from external port for idx in range(1, len(body_parameters)): - connect_body_input(loop_node, idx, body_parameters[idx]) + Loop.connect_body_input(loop_node, idx, body_parameters[idx]) # mark current iteration input Parameter node and execution condition Result node Loop.mark_current_iteration_parameter_node(loop_node, body_parameters[0]) @@ -186,7 +185,7 @@ def extract(cls, loop_node): # connect body outputs with Loop operation output ports except the execution condition result for idx in range(len(body_results)-1): - connect_body_output(loop_node, idx, body_results[idx]) + Loop.connect_body_output(loop_node, idx, body_results[idx]) # run function to parse body nodes attributes similar to the main graph extract_node_attrs(body_graph, lambda node: tf_op_extractor(node, check_for_duplicates(tf_op_extractors))) diff --git a/model-optimizer/extensions/middle/FusedBatchNormTraining.py b/model-optimizer/extensions/middle/FusedBatchNormTraining.py index 3640a59c8e34dd..586c38a38927fa 100644 --- a/model-optimizer/extensions/middle/FusedBatchNormTraining.py +++ b/model-optimizer/extensions/middle/FusedBatchNormTraining.py @@ -1,5 +1,5 @@ """ - Copyright (C) 2018-2020 Intel Corporation + Copyright (C) 2018-2021 Intel Corporation Licensed under the Apache License, Version 2.0 (the "License"); you may not use this file except in compliance with the License. @@ -17,8 +17,9 @@ import numpy as np from extensions.ops.mvn import MVN +from extensions.ops.range import Range from mo.front.common.partial_infer.utils import int64_array -from mo.front.tf.graph_utils import create_op_node_with_second_input +from mo.front.tf.graph_utils import create_op_node_with_second_input, create_op_with_const_inputs from mo.graph.graph import Graph from mo.middle.replacement import MiddleReplacementPattern from mo.ops.const import Const @@ -45,20 +46,21 @@ def pattern(self): return dict( nodes=[ ('op', dict(kind='op', op=lambda op: op in ['FusedBatchNorm', 'FusedBatchNormV2', 'FusedBatchNormV3'], - is_training=True))], + is_training=True))], edges=[] ) def replace_pattern(self, graph: Graph, match: dict): node = match['op'] + node_name = node.soft_get('name', node.id) node.is_training = False shape = node.in_port(1).data.get_shape() assert shape is not None, 'The shape of scale input of the BatchNorm node {} is not defined'.format(node.name) - bn_mean = Const(graph, {'name': node.name + '/mean', 'value': np.zeros(shape, dtype=np.float32), + bn_mean = Const(graph, {'name': node_name + '/mean', 'value': np.zeros(shape, dtype=np.float32), 'override_output_shape': True}).create_node() - bn_std = Const(graph, {'name': node.name + '/std', 'value': np.ones(shape, dtype=np.float32), + bn_std = Const(graph, {'name': node_name + '/std', 'value': np.ones(shape, dtype=np.float32), 'override_output_shape': True}).create_node() node.in_port(3).get_connection().set_source(bn_mean.out_port(0)) node.in_port(4).get_connection().set_source(bn_std.out_port(0)) @@ -67,17 +69,22 @@ def replace_pattern(self, graph: Graph, match: dict): original_shape = Shape(graph, {'name': node.in_port(0).get_source().node.soft_get('name')}).create_node() original_shape.in_port(0).connect(node.in_port(0).get_source()) - mvn = MVN(graph, {'name': node.name + '/mvn_', 'eps': node.soft_get('eps', 1e-6), - 'override_output_shape': True}).create_node() + input_rank = len(node.in_port(0).data.get_shape()) + rng = create_op_with_const_inputs(graph, Range, + {0: int64_array(2), 1: int64_array(input_rank), 2: int64_array(1)}, + {'name': node_name + '/Range', 'output_type': np.int64}) + mvn = MVN(graph, {'name': node_name + '/mvn_', 'eps': node.soft_get('eps', 1e-6), 'eps_mode': 'outside_sqrt', + 'normalize_variance': 1, 'override_output_shape': True}).create_node() node.in_port(0).get_connection().insert_node(mvn) + mvn.in_port(1).connect(rng.out_port(0)) reshape_4d = create_op_node_with_second_input(graph, Reshape, int64_array([1, -1, 0, 0]), {'override_output_shape': True, - 'name': node.soft_get('name') + '/fused_batch_and_channels'}) + 'name': node_name + '/fused_batch_and_channels'}) mvn.in_port(0).get_connection().insert_node(reshape_4d) # restore original shape - reshape_back = Reshape(graph, {'name': mvn.soft_get('name') + '/restore_shape', + reshape_back = Reshape(graph, {'name': node_name + '/restore_shape', 'override_output_shape': True}).create_node() reshape_back.in_port(1).connect(original_shape.out_port(0)) mvn.out_port(0).get_connection().insert_node(reshape_back) diff --git a/model-optimizer/extensions/middle/FusedBatchNormTraining_test.py b/model-optimizer/extensions/middle/FusedBatchNormTraining_test.py index 6f8e39906794fa..9f5c52bbca3181 100644 --- a/model-optimizer/extensions/middle/FusedBatchNormTraining_test.py +++ b/model-optimizer/extensions/middle/FusedBatchNormTraining_test.py @@ -1,5 +1,5 @@ """ - Copyright (C) 2018-2020 Intel Corporation + Copyright (C) 2018-2021 Intel Corporation Licensed under the Apache License, Version 2.0 (the "License"); you may not use this file except in compliance with the License. @@ -65,6 +65,15 @@ 'reshape_to_orig': {'type': 'Reshape', 'value': None, 'kind': 'op', 'op': 'Reshape'}, 'reshape_to_orig_data': {'value': None, 'shape': None, 'kind': 'data'}, + 'start': {'kind': 'op', 'op': 'Const'}, + 'start_data': {'value': None, 'shape': None, 'kind': 'data'}, + 'stop': {'kind': 'op', 'op': 'Const'}, + 'stop_data': {'value': None, 'shape': None, 'kind': 'data'}, + 'step': {'kind': 'op', 'op': 'Const'}, + 'step_data': {'value': None, 'shape': None, 'kind': 'data'}, + 'mvn_axes': {'kind': 'op', 'op': 'Range'}, + 'mvn_axes_data': {'value': None, 'shape': None, 'kind': 'data'}, + 'mvn': {'type': 'MVN', 'value': None, 'kind': 'op', 'op': 'MVN', 'eps': 1e-3}, 'mvn_data': {'value': None, 'shape': None, 'kind': 'data'}, @@ -115,6 +124,14 @@ def test_transformation(self, op: str): ('reshape_1_data', 'mvn', {'in': 0}), ('mvn', 'mvn_data'), ('mvn_data', 'reshape_to_orig', {'in': 0}), + ('start', 'start_data'), + ('start_data', 'mvn_axes'), + ('stop', 'stop_data'), + ('stop_data', 'mvn_axes'), + ('step', 'step_data'), + ('step_data', 'mvn_axes'), + ('mvn_axes', 'mvn_axes_data'), + ('mvn_axes_data', 'mvn'), ('placeholder_data', 'shapeof', {'in': 0}), ('shapeof', 'shapeof_data'), ('shapeof_data', 'reshape_to_orig', {'in': 1}), diff --git a/model-optimizer/extensions/middle/GroupNorm.py b/model-optimizer/extensions/middle/GroupNorm.py index 166fcd55859b00..7cf7c9f130de4f 100644 --- a/model-optimizer/extensions/middle/GroupNorm.py +++ b/model-optimizer/extensions/middle/GroupNorm.py @@ -1,5 +1,5 @@ """ - Copyright (C) 2018-2020 Intel Corporation + Copyright (C) 2018-2021 Intel Corporation Licensed under the Apache License, Version 2.0 (the "License"); you may not use this file except in compliance with the License. @@ -21,7 +21,9 @@ from extensions.ops.Cast import Cast from extensions.ops.elementwise import Mul, Add from extensions.ops.mvn import MVN +from extensions.ops.range import Range from mo.front.common.partial_infer.utils import int64_array +from mo.front.tf.graph_utils import create_op_with_const_inputs from mo.graph.graph import Graph, Node from mo.middle.passes.convert_data_type import data_type_str_to_np from mo.middle.replacement import MiddleReplacementPattern @@ -29,7 +31,7 @@ from mo.ops.reshape import Reshape from mo.ops.shape import Shape from mo.utils.shape import node_to_get_spatial_dimensions_value, node_to_get_features_dimension_value, \ - node_to_get_batch_value, new_shape_node_from_shape_nodes + node_to_get_batch_value, new_shape_node_from_shape_nodes, get_shape_and_rank_nodes_by_port class GroupNormToMVN(MiddleReplacementPattern): @@ -115,11 +117,19 @@ def replace_pattern(self, graph: Graph, match: Dict[str, Node]): # MVN mvn_node = MVN(graph, {'name': group_norm_node.name + '/MVN', - 'across_channels': 1, 'normalize_variance': 1, - 'eps': group_norm_node.eps}).create_node() + 'eps': group_norm_node.eps, + 'eps_mode': 'inside_sqrt'}).create_node() mvn_node.in_port(0).connect(reshape_for_mvn_node.out_port(0)) + # MVN axes + _, rank = get_shape_and_rank_nodes_by_port(mvn_node.in_port(0), return_as_a_scalar=True) + rng = create_op_with_const_inputs(graph, Range, {0: int64_array(1), 2: int64_array(1)}, + {'name': group_norm_node.name + '/Range', 'output_type': np.int64}) + mvn_node.in_port(1).connect(rng.out_port(0)) + rng.in_port(1).connect(rank.out_port(0)) + mvn_node.in_port(0).get_connection().add_destination(rank.in_port(0)) + # reshape to the initial shape before multiplying with gamma and adding beta reshape_to_initial_shape_node = Reshape(graph, {}).create_node() reshape_to_initial_shape_node.in_port(0).connect(mvn_node.out_port(0)) diff --git a/model-optimizer/extensions/ops/BN.py b/model-optimizer/extensions/ops/BN.py new file mode 100644 index 00000000000000..2e3114d4fcb408 --- /dev/null +++ b/model-optimizer/extensions/ops/BN.py @@ -0,0 +1,35 @@ +""" + Copyright (C) 2018-2021 Intel Corporation + + Licensed under the Apache License, Version 2.0 (the "License"); + you may not use this file except in compliance with the License. + You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + + Unless required by applicable law or agreed to in writing, software + distributed under the License is distributed on an "AS IS" BASIS, + WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + See the License for the specific language governing permissions and + limitations under the License. +""" + +from mo.graph.graph import Graph +from mo.ops.op import Op + + +class BN(Op): + """ + BN operation comes from caffe and will be replaced by BNToScaleShift FrontReplacer. + """ + op = 'BN' + enabled = False + + def __init__(self, graph: Graph, attrs: dict): + super().__init__(graph, { + 'type': None, + 'op': self.op, + 'in_ports_count': 5, + 'out_ports_count': 1, + 'infer': None + }, attrs) diff --git a/model-optimizer/extensions/ops/DetectionOutput.py b/model-optimizer/extensions/ops/DetectionOutput.py index 9a7f52c72076a2..787a4b2d207949 100644 --- a/model-optimizer/extensions/ops/DetectionOutput.py +++ b/model-optimizer/extensions/ops/DetectionOutput.py @@ -1,5 +1,5 @@ """ - Copyright (C) 2018-2020 Intel Corporation + Copyright (C) 2018-2021 Intel Corporation Licensed under the Apache License, Version 2.0 (the "License"); you may not use this file except in compliance with the License. @@ -16,6 +16,7 @@ import numpy as np from mo.front.common.partial_infer.multi_box_detection import multi_box_detection_infer +from mo.front.extractor import bool_to_str from mo.graph.graph import Graph, Node from mo.ops.op import Op @@ -34,48 +35,32 @@ def __init__(self, graph: Graph, attrs: dict): 'infer': multi_box_detection_infer, 'input_width': 1, 'input_height': 1, - 'normalized': 1, - 'share_location': 1, - 'variance_encoded_in_target': 0, + 'normalized': True, + 'share_location': True, + 'clip_after_nms': False, + 'clip_before_nms': False, + 'decrease_label_id': False, + 'variance_encoded_in_target': False, 'type_infer': self.type_infer, }, attrs) def supported_attrs(self): return [ 'background_label_id', - 'clip_after_nms', - 'clip_before_nms', + ('clip_after_nms', lambda node: bool_to_str(node, 'clip_after_nms')), + ('clip_before_nms', lambda node: bool_to_str(node, 'clip_before_nms')), 'code_type', 'confidence_threshold', - 'decrease_label_id', - 'eta', - 'height', - 'height_scale', + ('decrease_label_id', lambda node: bool_to_str(node, 'decrease_label_id')), 'input_height', 'input_width', - 'interp_mode', 'keep_top_k', - 'label_map_file', - 'name_size_file', 'nms_threshold', - 'normalized', + ('normalized', lambda node: bool_to_str(node, 'normalized')), 'num_classes', - 'num_test_image', - 'output_directory', - 'output_format', - 'output_name_prefix', - 'pad_mode', - 'pad_value', - 'prob', - 'resize_mode', - 'save_file', - 'share_location', + ('share_location', lambda node: bool_to_str(node, 'share_location')), 'top_k', - 'variance_encoded_in_target', - 'visualize', - 'visualize_threshold', - 'width', - 'width_scale', + ('variance_encoded_in_target', lambda node: bool_to_str(node, 'variance_encoded_in_target')), 'objectness_score', ] diff --git a/model-optimizer/extensions/ops/GRUCell.py b/model-optimizer/extensions/ops/GRUCell.py index 67a48c4639d634..5ae69b339fdae3 100644 --- a/model-optimizer/extensions/ops/GRUCell.py +++ b/model-optimizer/extensions/ops/GRUCell.py @@ -1,5 +1,5 @@ """ - Copyright (C) 2018-2020 Intel Corporation + Copyright (C) 2018-2021 Intel Corporation Licensed under the Apache License, Version 2.0 (the "License"); you may not use this file except in compliance with the License. @@ -14,6 +14,7 @@ limitations under the License. """ from mo.front.common.partial_infer.utils import mark_input_bins +from mo.front.extractor import bool_to_str from mo.graph.graph import Node, Graph from mo.ops.op import Op from mo.utils.error import Error @@ -39,13 +40,13 @@ def __init__(self, graph: Graph, attrs: dict): mandatory_props = { 'type': __class__.op, 'op': __class__.op, - 'version': 'experimental', 'infer': __class__.infer, 'in_ports_count': 4, 'out_ports_count': 1, 'version': 'opset3', 'wr_input_id': 2, - 'gates_count': 3 + 'gates_count': 3, + 'linear_before_reset': False, } super().__init__(graph, mandatory_props, attrs) @@ -66,7 +67,7 @@ def backend_attrs(self): 'activation_alpha', 'activation_beta', 'clip', - 'linear_before_reset', + ('linear_before_reset', lambda node: bool_to_str(node, 'linear_before_reset')), ] @staticmethod diff --git a/model-optimizer/extensions/ops/MatMul.py b/model-optimizer/extensions/ops/MatMul.py index 4b0f1b4d1cde7e..7181ddc33167ee 100644 --- a/model-optimizer/extensions/ops/MatMul.py +++ b/model-optimizer/extensions/ops/MatMul.py @@ -1,5 +1,5 @@ """ - Copyright (C) 2018-2020 Intel Corporation + Copyright (C) 2018-2021 Intel Corporation Licensed under the Apache License, Version 2.0 (the "License"); you may not use this file except in compliance with the License. @@ -19,6 +19,7 @@ import numpy as np from mo.front.common.partial_infer.utils import assign_dims_to_weights, int64_array +from mo.front.extractor import bool_to_str from mo.graph.graph import Node, Graph from mo.ops.op import Op @@ -44,8 +45,8 @@ def __init__(self, graph: Graph, attrs: dict): def supported_attrs(self): return [ - 'transpose_a', - 'transpose_b', + ('transpose_a', lambda node: bool_to_str(node, 'transpose_a')), + ('transpose_b', lambda node: bool_to_str(node, 'transpose_b')), ] @staticmethod diff --git a/model-optimizer/extensions/ops/ReduceOps.py b/model-optimizer/extensions/ops/ReduceOps.py index 622b5eec3a3b13..3c8cc0714eca12 100644 --- a/model-optimizer/extensions/ops/ReduceOps.py +++ b/model-optimizer/extensions/ops/ReduceOps.py @@ -1,5 +1,5 @@ """ - Copyright (C) 2018-2020 Intel Corporation + Copyright (C) 2018-2021 Intel Corporation Licensed under the Apache License, Version 2.0 (the "License"); you may not use this file except in compliance with the License. @@ -17,6 +17,7 @@ import numpy as np from mo.front.common.partial_infer.utils import int64_array +from mo.front.extractor import bool_to_str from mo.graph.graph import Node, Graph from mo.graph.perm_inputs import PermuteInputs from mo.ops.op import Op @@ -106,7 +107,7 @@ def __init__(self, graph: Graph, attrs: dict): def supported_attrs(self): return [ - ('keep_dims', lambda node: str(node.keep_dims)), + ('keep_dims', lambda node: bool_to_str(node, 'keep_dims')), ] diff --git a/model-optimizer/extensions/ops/adaptive_avg_pooling.py b/model-optimizer/extensions/ops/adaptive_avg_pooling.py index 1d22a6fbc2c636..2e2460518b7ac7 100644 --- a/model-optimizer/extensions/ops/adaptive_avg_pooling.py +++ b/model-optimizer/extensions/ops/adaptive_avg_pooling.py @@ -1,5 +1,5 @@ """ - Copyright (C) 2018-2020 Intel Corporation + Copyright (C) 2018-2021 Intel Corporation Licensed under the Apache License, Version 2.0 (the "License"); you may not use this file except in compliance with the License. @@ -55,7 +55,7 @@ def infer(cls, node: Node): 'pad': int64_array([[0, 0], [0, 0], [0, 0], [0, 0]]), 'pad_spatial_shape': int64_array([[0, 0], [0, 0]]), 'pool_method': 'avg', - 'exclude_pad': 'false', + 'exclude_pad': False, 'output_spatial_shape': None, 'spatial_dims': None, 'channel_dims': int64_array([1]), diff --git a/model-optimizer/extensions/ops/bucketize.py b/model-optimizer/extensions/ops/bucketize.py index 850c1b9526f7e3..777a28b9c400d5 100644 --- a/model-optimizer/extensions/ops/bucketize.py +++ b/model-optimizer/extensions/ops/bucketize.py @@ -1,5 +1,5 @@ """ - Copyright (C) 2018-2020 Intel Corporation + Copyright (C) 2018-2021 Intel Corporation Licensed under the Apache License, Version 2.0 (the "License"); you may not use this file except in compliance with the License. @@ -16,6 +16,7 @@ import numpy as np +from mo.front.extractor import bool_to_str from mo.graph.graph import Node, Graph from mo.middle.passes.convert_data_type import np_data_type_to_destination_type from mo.ops.op import Op @@ -42,10 +43,10 @@ def __init__(self, graph: Graph, attrs: dict): def backend_attrs(self): version = self.get_opset() if version == "extension": - return ['with_right_bound'] + return [('with_right_bound', lambda node: bool_to_str(node, 'with_right_bound'))] else: return [ - 'with_right_bound', + ('with_right_bound', lambda node: bool_to_str(node, 'with_right_bound')), ('output_type', lambda node: np_data_type_to_destination_type(node.output_type)), ] diff --git a/model-optimizer/extensions/ops/ctc_greedy_decoder.py b/model-optimizer/extensions/ops/ctc_greedy_decoder.py index 74f523cc16e6d7..9d0fda38ae666d 100644 --- a/model-optimizer/extensions/ops/ctc_greedy_decoder.py +++ b/model-optimizer/extensions/ops/ctc_greedy_decoder.py @@ -1,5 +1,5 @@ """ - Copyright (C) 2018-2020 Intel Corporation + Copyright (C) 2018-2021 Intel Corporation Licensed under the Apache License, Version 2.0 (the "License"); you may not use this file except in compliance with the License. @@ -15,6 +15,7 @@ """ from mo.front.common.partial_infer.utils import int64_array +from mo.front.extractor import bool_to_str from mo.graph.graph import Node, Graph from mo.ops.op import Op @@ -32,13 +33,15 @@ def __init__(self, graph: Graph, attrs: dict): 'reinterp_shape': True, 'in_ports_count': 2, - 'out_ports_count': 1 + 'out_ports_count': 1, + + 'ctc_merge_repeated': True } super().__init__(graph, mandatory_props, attrs) def supported_attrs(self): return [ - 'ctc_merge_repeated' + ('ctc_merge_repeated', lambda node: bool_to_str(node, 'ctc_merge_repeated')) ] @staticmethod diff --git a/model-optimizer/extensions/ops/ctc_loss.py b/model-optimizer/extensions/ops/ctc_loss.py index 6004c634fe052d..1a5bdce41aab75 100644 --- a/model-optimizer/extensions/ops/ctc_loss.py +++ b/model-optimizer/extensions/ops/ctc_loss.py @@ -1,5 +1,5 @@ """ - Copyright (C) 2020 Intel Corporation + Copyright (C) 2020-2021 Intel Corporation Licensed under the Apache License, Version 2.0 (the "License"); you may not use this file except in compliance with the License. @@ -17,6 +17,7 @@ import numpy as np from mo.front.common.partial_infer.utils import int64_array +from mo.front.extractor import bool_to_str from mo.graph.graph import Node, Graph from mo.ops.op import Op @@ -35,11 +36,17 @@ def __init__(self, graph: Graph, attrs: dict): 'in_ports_count': 5, 'out_ports_count': 1, + + 'preprocess_collapse_repeated': False, + 'ctc_merge_repeated': True, + 'unique': False } super().__init__(graph, mandatory_props, attrs) def backend_attrs(self): - return ['preprocess_collapse_repeated', 'ctc_merge_repeated', 'unique'] + return [('preprocess_collapse_repeated', lambda node: bool_to_str(node, 'preprocess_collapse_repeated')), + ('ctc_merge_repeated', lambda node: bool_to_str(node, 'ctc_merge_repeated')), + ('unique', lambda node: bool_to_str(node, 'unique'))] @staticmethod def type_infer(node): diff --git a/model-optimizer/extensions/ops/cumsum.py b/model-optimizer/extensions/ops/cumsum.py index 6cc192ac90b016..f7a6dc130b5e6c 100644 --- a/model-optimizer/extensions/ops/cumsum.py +++ b/model-optimizer/extensions/ops/cumsum.py @@ -1,5 +1,5 @@ """ - Copyright (C) 2018-2020 Intel Corporation + Copyright (C) 2018-2021 Intel Corporation Licensed under the Apache License, Version 2.0 (the "License"); you may not use this file except in compliance with the License. @@ -15,6 +15,7 @@ """ import numpy as np +from mo.front.extractor import bool_to_str from mo.graph.graph import Node, Graph from mo.ops.op import Op @@ -48,7 +49,8 @@ def __init__(self, graph: Graph, attrs: dict): }, attrs) def supported_attrs(self): - return ["exclusive", "reverse"] + return [('exclusive', lambda node: bool_to_str(node, 'exclusive')), + ('reverse', lambda node: bool_to_str(node, 'reverse'))] @staticmethod def infer(node: Node): diff --git a/model-optimizer/extensions/ops/interpolate.py b/model-optimizer/extensions/ops/interpolate.py index 95e62d20d5cb50..c0d807eb39fb5b 100644 --- a/model-optimizer/extensions/ops/interpolate.py +++ b/model-optimizer/extensions/ops/interpolate.py @@ -1,5 +1,5 @@ """ - Copyright (C) 2018-2020 Intel Corporation + Copyright (C) 2018-2021 Intel Corporation Licensed under the Apache License, Version 2.0 (the "License"); you may not use this file except in compliance with the License. @@ -16,9 +16,11 @@ import math + import numpy as np from mo.front.common.partial_infer.utils import int64_array +from mo.front.extractor import bool_to_str from mo.graph.graph import Node, Graph from mo.ops.op import Op, PermuteAttrs @@ -116,11 +118,14 @@ def __init__(self, graph: Graph, attrs: dict): self.attributes_for_opsets = { 'opset1': [ ('axes', lambda node: ','.join(map(str, node.axes))), - 'mode', 'align_corners', 'antialias', 'pads_begin', 'pads_end', + ('antialias', lambda node: bool_to_str(node, 'antialias')), + ('align_corners', lambda node: bool_to_str(node, 'align_corners')), + 'mode', 'pads_begin', 'pads_end', ], 'opset4': [ - 'mode', 'antialias', 'nearest_mode', 'cube_coeff', 'coordinate_transformation_mode', + 'mode', 'nearest_mode', 'cube_coeff', 'coordinate_transformation_mode', 'shape_calculation_mode', + ('antialias', lambda node: bool_to_str(node, 'antialias')), ('pads_begin', lambda node: pad_attribute_to_str(node, 'pads_begin')), ('pads_end', lambda node: pad_attribute_to_str(node, 'pads_end')), ] diff --git a/model-optimizer/extensions/ops/loop.py b/model-optimizer/extensions/ops/loop.py index 39c2fb6ac18284..380282841555dc 100644 --- a/model-optimizer/extensions/ops/loop.py +++ b/model-optimizer/extensions/ops/loop.py @@ -20,7 +20,6 @@ from extensions.ops.tensor_iterator import TensorIterator from mo.front.common.partial_infer.utils import int64_array from mo.graph.graph import Node, Graph -from mo.graph.port import Port from mo.middle.passes.infer import partial_infer from mo.ops.const import Const @@ -95,9 +94,9 @@ def updated_body_parameters_shape(loop_node: Node): if loop_port_idx != -1: input_shape = loop_node.in_port(loop_port_idx).get_connection().get_source().data.get_shape() slice_axis = record['axis'] + body_node.shape = input_shape.copy() if slice_axis is not None: - input_shape[slice_axis] = 1 - body_node.shape = input_shape + body_node.shape[slice_axis] = 1 log.debug('Updated shape for the body node with internal_id "{}" with value {}' ''.format(record['internal_layer_id'], body_node.shape)) @@ -252,42 +251,53 @@ def external_port_id_to_body_node(loop_node: Node, external_port_id: int, port_m return result_nodes[0] @staticmethod - def connect_body_input(loop_input_port: Port, internal_parameter: Node, external_node_out_port: Port = None, + def connect_body_input(loop_node: Node, loop_input_port_idx: int, body_parameter: Node, axis: [int, None] = None, start: [int, None] = None, end: [int, None] = None, stride: [int, None] = None, part_size: [int, None] = None): - loop_node = loop_input_port.node - assert loop_node.soft_get('op') == 'Loop' - assert loop_input_port.type == 'in' - assert internal_parameter.soft_get('op') == 'Parameter' - assert internal_parameter.id in loop_node.body + """ + Update the input port map to connect the input port with the specified body parameter - if external_node_out_port is not None: - assert loop_input_port.disconnected() - assert external_node_out_port.node.id not in loop_node.body - loop_input_port.connect(external_node_out_port) + :param loop_node: the Loop node + :param loop_input_port_idx: the input port index to connect + :param body_parameter: the body parameter node to connect + :param axis: dimension for input slicing + :param start: start value of dimension from which to start slicing + :param end: end value of dimension when to finish slicing + :param stride: a step value for slicing + :param part_size: a partial size for slicing, i.e. slicing [start; start + part_size) + :return: None + """ + assert loop_node.soft_get('op') == 'Loop' + assert body_parameter.soft_get('op') == 'Parameter' + assert body_parameter.id in loop_node.body loop_node.input_port_map.append({'axis': axis, 'stride': stride, 'part_size': part_size, 'start': start, - 'end': end, 'external_port_id': loop_input_port.idx, - 'internal_layer_id': internal_parameter['internal_layer_id']}) + 'end': end, 'external_port_id': loop_input_port_idx, + 'internal_layer_id': body_parameter['internal_layer_id']}) @staticmethod - def connect_body_output(loop_output_port: Port, internal_result: Node, external_node_input_ports: list = None, - axis: [int, None] = None, start: [int, None] = None, end: [int, None] = None, - stride: [int, None] = None, part_size: [int, None] = None): - loop_node = loop_output_port.node + def connect_body_output(loop_node: Node, loop_output_port_idx: int, internal_result: Node, axis: [int, None] = None, + start: [int, None] = None, end: [int, None] = None, stride: [int, None] = None, + part_size: [int, None] = None): + """ + Update the output port map to connect the body Result node with the specified output port + + :param loop_node: the Loop node + :param loop_output_port_idx: the output port index to connect + :param internal_result: the body Result node to connect + :param axis: dimension for output concatenation + :param start: start value of dimension from which to start concatenation + :param end: end value of dimension when to finish concatenation + :param stride: a step value for concatenation + :param part_size: a partial size for concatenation, i.e. concatenation [start; start + part_size) + :return: None + """ assert loop_node.soft_get('op') == 'Loop' - assert loop_output_port.type == 'out' assert internal_result.soft_get('op') == 'Result' assert internal_result.id in loop_node.body - if external_node_input_ports is not None: - assert loop_output_port.disconnected() - assert all([port.node.id not in loop_node.body for port in external_node_input_ports]) - for port in external_node_input_ports: - port.disconnect() - loop_output_port.connect(port) loop_node.output_port_map.append({'axis': axis, 'stride': stride, 'part_size': part_size, 'start': start, - 'end': end, 'external_port_id': loop_output_port.idx, + 'end': end, 'external_port_id': loop_output_port_idx, 'internal_layer_id': internal_result['internal_layer_id']}) @staticmethod diff --git a/model-optimizer/extensions/ops/mvn.py b/model-optimizer/extensions/ops/mvn.py index a2ad9ff49ee1c1..bc58a44ec2cbc0 100644 --- a/model-optimizer/extensions/ops/mvn.py +++ b/model-optimizer/extensions/ops/mvn.py @@ -1,5 +1,5 @@ """ - Copyright (C) 2018-2020 Intel Corporation + Copyright (C) 2018-2021 Intel Corporation Licensed under the Apache License, Version 2.0 (the "License"); you may not use this file except in compliance with the License. @@ -13,10 +13,12 @@ See the License for the specific language governing permissions and limitations under the License. """ +from mo.front.caffe.extractors.utils import get_canonical_axis_index from mo.front.common.layout import get_features_dim from mo.front.common.partial_infer.elemental import copy_shape_infer -from mo.front.caffe.extractors.utils import get_canonical_axis_index +from mo.front.extractor import bool_to_str from mo.graph.graph import Graph +from mo.graph.perm_inputs import PermuteInputs from mo.ops.op import Op from mo.utils.error import Error @@ -28,53 +30,58 @@ class MVN(Op): def __init__(self, graph: Graph, attrs: dict): super().__init__(graph, { 'kind': 'op', - 'type': __class__.op, - 'op': __class__.op, - 'version': 'opset2', + 'type': self.op, + 'op': self.op, + 'version': 'opset6', 'eps': None, - 'across_channels': None, - 'normalize_variance': 1, - 'axes': None, - 'in_ports_count': 1, + 'normalize_variance': None, + 'eps_mode': None, + 'in_ports_count': 2, 'out_ports_count': 1, - 'infer': __class__.infer + 'infer': self.infer }, attrs) def supported_attrs(self): - return ['eps', 'across_channels', 'normalize_variance', 'axes'] + return ['eps', 'eps_mode', 'normalize_variance'] def backend_attrs(self): - return ['eps', 'across_channels', 'normalize_variance'] + version = self.get_opset() + if version == 'opset2': + return ['eps', + ('across_channels', lambda node: bool_to_str(node, 'across_channels')), + ('normalize_variance', lambda node: bool_to_str(node, 'normalize_variance'))] + elif version == 'opset6': + return ['eps', 'eps_mode', ('normalize_variance', lambda node: bool_to_str(node, 'normalize_variance'))] + else: + raise Error('Unsupported MVN opset version "{}"'.format(version)) @staticmethod def infer(node: None): - input_shape = node.in_node(0).shape name = node.soft_get('name', node.id) - if node.axes is not None and node.across_channels is not None: - raise Error('Either axes or across_channels can be set for the MVN in node "{}".'.format(name)) + assert node.eps is not None, 'MVN required attribute `eps` unspecified for node {}'.format(name) + assert node.eps_mode is not None, 'MVN required attribute `eps_mode` unspecified for node {}'.format(name) + assert node.normalize_variance is not None, \ + 'MVN required attribute `normalize_variance` unspecified for node {}'.format(name) - if node.across_channels is None: - if node.axes is not None: - # normalizing (replacing -1 with actual index) - axes_data_value = node.axes - axes = [axes_data_value.item()] if axes_data_value.size == 1 else axes_data_value - axes = [get_canonical_axis_index(input_shape, a) for a in axes] - # deduce across_channels from the axes, e.g. if the first axis is included (assuming batch is zero axis) - feature_dim = get_features_dim(node.graph.graph['layout'], len(input_shape)) \ - if (4 <= len(input_shape) <= 5) \ - else 1 - node.across_channels = int(feature_dim in axes) + PermuteInputs().set_input_permutation(node.in_node(1), node, 'input:0', 'axis') + copy_shape_infer(node) - if 0 in axes: - raise Error('Reduction over the batch dimension in node "{}" ' - 'is not supported by the backend.'.format(name)) - for i in range(2, len(input_shape)): - if i not in axes: - raise Error( - 'Reduction over spatial dimensions in node "{}" ' - 'is obligatory for the backend.'.format(name)) - else: - node.across_channels = 0 # default - copy_shape_infer(node) +class MVNCaffe(Op): + op = 'MVNCaffe' + enabled = False + + def __init__(self, graph: Graph, attrs: dict): + super().__init__(graph, { + 'kind': 'op', + 'type': None, + 'op': self.op, + 'version': None, + 'eps': 1e-9, + 'normalize_variance': 1, + 'across_channels': 0, + 'in_ports_count': 1, + 'out_ports_count': 1, + 'infer': None + }, attrs) diff --git a/model-optimizer/extensions/ops/non_max_suppression.py b/model-optimizer/extensions/ops/non_max_suppression.py index 6777bcfd3e5242..3273fea409f80b 100644 --- a/model-optimizer/extensions/ops/non_max_suppression.py +++ b/model-optimizer/extensions/ops/non_max_suppression.py @@ -1,5 +1,5 @@ """ - Copyright (C) 2018-2020 Intel Corporation + Copyright (C) 2018-2021 Intel Corporation Licensed under the Apache License, Version 2.0 (the "License"); you may not use this file except in compliance with the License. @@ -18,6 +18,7 @@ import numpy as np from mo.front.common.partial_infer.utils import int64_array +from mo.front.extractor import bool_to_str from mo.graph.graph import Node, Graph from mo.middle.passes.convert_data_type import np_data_type_to_destination_type from mo.ops.op import Op @@ -53,10 +54,12 @@ def __init__(self, graph: Graph, attrs: dict): def backend_attrs(self): version = self.get_opset() if version in ['opset3', 'opset4', 'opset5']: - return ['sort_result_descending', 'box_encoding', + return [('sort_result_descending', lambda node: bool_to_str(node, 'sort_result_descending')), + 'box_encoding', ('output_type', lambda node: np_data_type_to_destination_type(node.output_type))] elif version == 'opset1': - return ['sort_result_descending', 'box_encoding'] + return [('sort_result_descending', lambda node: bool_to_str(node, 'sort_result_descending')), + 'box_encoding'] else: raise Error('Unsupported operation opset version "{}"'.format(version)) diff --git a/model-optimizer/extensions/ops/priorbox.py b/model-optimizer/extensions/ops/priorbox.py index e21d0f66435749..50b4e54f4ec5a6 100644 --- a/model-optimizer/extensions/ops/priorbox.py +++ b/model-optimizer/extensions/ops/priorbox.py @@ -1,5 +1,5 @@ """ - Copyright (C) 2018-2020 Intel Corporation + Copyright (C) 2018-2021 Intel Corporation Licensed under the Apache License, Version 2.0 (the "License"); you may not use this file except in compliance with the License. @@ -17,7 +17,7 @@ import numpy as np from mo.front.common.layout import get_width_dim, get_height_dim -from mo.front.extractor import attr_getter +from mo.front.extractor import attr_getter, bool_to_str from mo.graph.graph import Node, Graph from mo.ops.op import Op @@ -30,7 +30,9 @@ def __init__(self, graph: Graph, attrs: dict): 'type': self.op, 'op': self.op, 'version': 'opset1', - 'flip': 1, + 'flip': True, + 'clip': True, + 'scale_all_sizes': True, 'max_size': np.array([]), 'min_size': np.array([]), 'aspect_ratio': np.array([]), @@ -66,11 +68,11 @@ def supported_attrs(self): def backend_attrs(self): return [ - 'flip', - 'clip', + ('flip', lambda node: bool_to_str(node, 'flip')), + ('clip', lambda node: bool_to_str(node, 'clip')), 'step', 'offset', - 'scale_all_sizes', + ('scale_all_sizes', lambda node: bool_to_str(node, 'scale_all_sizes')), ('min_size', lambda node: attr_getter(node, 'min_size')), ('max_size', lambda node: attr_getter(node, 'max_size')), ('aspect_ratio', lambda node: attr_getter(node, 'aspect_ratio')), diff --git a/model-optimizer/extensions/ops/priorbox_clustered.py b/model-optimizer/extensions/ops/priorbox_clustered.py index 3f9847eb5b0c95..dd3ee398ace389 100644 --- a/model-optimizer/extensions/ops/priorbox_clustered.py +++ b/model-optimizer/extensions/ops/priorbox_clustered.py @@ -1,5 +1,5 @@ """ - Copyright (C) 2018-2020 Intel Corporation + Copyright (C) 2018-2021 Intel Corporation Licensed under the Apache License, Version 2.0 (the "License"); you may not use this file except in compliance with the License. @@ -17,7 +17,7 @@ import numpy as np from mo.front.common.layout import get_width_dim, get_height_dim -from mo.front.extractor import attr_getter +from mo.front.extractor import attr_getter, bool_to_str from mo.graph.graph import Node, Graph from mo.ops.op import Op @@ -34,6 +34,7 @@ def __init__(self, graph: Graph, attrs: dict): 'out_ports_count': 1, 'infer': self.priorbox_clustered_infer, 'type_infer': self.type_infer, + 'clip': True, } super().__init__(graph, mandatory_props, attrs) @@ -55,9 +56,7 @@ def supported_attrs(self): def backend_attrs(self): return [ - 'flip', - 'clip', - 'img_size', + ('clip', lambda node: bool_to_str(node, 'clip')), 'img_h', 'img_w', 'step', diff --git a/model-optimizer/extensions/ops/proposal.py b/model-optimizer/extensions/ops/proposal.py index 928b32d8dc3ba8..b452324169a2ae 100644 --- a/model-optimizer/extensions/ops/proposal.py +++ b/model-optimizer/extensions/ops/proposal.py @@ -1,5 +1,5 @@ """ - Copyright (C) 2018-2020 Intel Corporation + Copyright (C) 2018-2021 Intel Corporation Licensed under the Apache License, Version 2.0 (the "License"); you may not use this file except in compliance with the License. @@ -15,7 +15,7 @@ """ from mo.front.common.partial_infer.utils import int64_array -from mo.front.extractor import attr_getter +from mo.front.extractor import attr_getter, bool_to_str from mo.graph.graph import Node, Graph from mo.ops.op import Op @@ -32,7 +32,9 @@ def __init__(self, graph: Graph, attrs: dict): 'infer': ProposalOp.proposal_infer, 'in_ports_count': 3, 'out_ports_count': 2, - 'normalize': 0, + 'normalize': False, + 'clip_before_nms': True, + 'clip_after_nms': False, } super().__init__(graph, mandatory_props, attrs) @@ -61,9 +63,9 @@ def backend_attrs(self): 'framework', 'box_coordinate_scale', 'box_size_scale', - 'normalize', - 'clip_after_nms', - 'clip_before_nms', + ('normalize', lambda node: bool_to_str(node, 'normalize')), + ('clip_after_nms', lambda node: bool_to_str(node, 'clip_after_nms')), + ('clip_before_nms', lambda node: bool_to_str(node, 'clip_before_nms')), ] @staticmethod diff --git a/model-optimizer/extensions/ops/psroipooling.py b/model-optimizer/extensions/ops/psroipooling.py index 249dcac01c4ef1..ef569bdb23b68b 100644 --- a/model-optimizer/extensions/ops/psroipooling.py +++ b/model-optimizer/extensions/ops/psroipooling.py @@ -1,5 +1,5 @@ """ - Copyright (C) 2018-2020 Intel Corporation + Copyright (C) 2018-2021 Intel Corporation Licensed under the Apache License, Version 2.0 (the "License"); you may not use this file except in compliance with the License. @@ -45,8 +45,6 @@ def supported_attrs(self): 'mode', 'spatial_bins_x', 'spatial_bins_y', - 'pooled_width', - 'pooled_height', ] @staticmethod diff --git a/model-optimizer/extensions/ops/regionyolo.py b/model-optimizer/extensions/ops/regionyolo.py index bab2835d7e32fd..30944715495677 100644 --- a/model-optimizer/extensions/ops/regionyolo.py +++ b/model-optimizer/extensions/ops/regionyolo.py @@ -1,5 +1,5 @@ """ - Copyright (C) 2018-2020 Intel Corporation + Copyright (C) 2018-2021 Intel Corporation Licensed under the Apache License, Version 2.0 (the "License"); you may not use this file except in compliance with the License. @@ -18,7 +18,7 @@ from mo.front.caffe.extractors.utils import get_canonical_axis_index from mo.front.common.layout import get_batch_dim, get_height_dim, get_width_dim, shape_for_layout -from mo.front.extractor import attr_getter +from mo.front.extractor import attr_getter, bool_to_str from mo.graph.graph import Node, Graph from mo.ops.op import Op @@ -56,7 +56,7 @@ def backend_attrs(self): 'num', 'axis', 'end_axis', - 'do_softmax', + ('do_softmax', lambda node: bool_to_str(node, 'do_softmax')), ('anchors', lambda node: attr_getter(node, 'anchors')), ('mask', lambda node: attr_getter(node, 'mask')) ] diff --git a/model-optimizer/extensions/ops/tensor_iterator.py b/model-optimizer/extensions/ops/tensor_iterator.py index 4e5a92e2985c08..6f268997cc9730 100644 --- a/model-optimizer/extensions/ops/tensor_iterator.py +++ b/model-optimizer/extensions/ops/tensor_iterator.py @@ -1,5 +1,5 @@ """ - Copyright (C) 2017-2020 Intel Corporation + Copyright (C) 2017-2021 Intel Corporation Licensed under the Apache License, Version 2.0 (the "License"); you may not use this file except in compliance with the License. @@ -254,9 +254,7 @@ def substitute_ie_attrs(self, new_attrs: dict): back_edges_attrs = [ ('from-layer', 'from_layer'), - ('from-port', 'from_port'), ('to-layer', 'to_layer'), - ('to-port', 'to_port'), ] new_attrs.update({ diff --git a/model-optimizer/mo/front/caffe/extractor.py b/model-optimizer/mo/front/caffe/extractor.py index d66dac26388de6..5570efd443206c 100644 --- a/model-optimizer/mo/front/caffe/extractor.py +++ b/model-optimizer/mo/front/caffe/extractor.py @@ -1,5 +1,5 @@ """ - Copyright (C) 2018-2020 Intel Corporation + Copyright (C) 2018-2021 Intel Corporation Licensed under the Apache License, Version 2.0 (the "License"); you may not use this file except in compliance with the License. @@ -14,11 +14,7 @@ limitations under the License. """ -from mo.front.caffe.extractors.batchnorm import batch_norm_ext -from mo.front.caffe.extractors.concat import concat_ext from mo.front.caffe.extractors.native_caffe import native_caffe_node_extractor -from mo.front.caffe.extractors.roipooling import roipooling_ext -from mo.front.caffe.extractors.scale import scale_ext from mo.front.common.partial_infer.elemental import copy_shape_infer from mo.front.common.register_custom_ops import extension_op_extractor from mo.front.extractor import CaffePythonFrontExtractorOp @@ -36,22 +32,8 @@ def node_pb_arg(pb_extractor): Keys are names that appear as layer names in .prototxt. Full list is available here: http://caffe.berkeleyvision.org/tutorial/layers.html """ -caffe_type_extractors = { - # Common Layers - 'dropout': node_pb_arg(lambda _, __: dict(op='Dropout', infer=copy_shape_infer)), - # Normalization Layers - 'batchnorm': node_pb_arg(batch_norm_ext), - - # Activation Layers - 'scale': node_pb_arg(scale_ext), - - # Utility Layers - 'concat': node_pb_arg(concat_ext), - - # Custom, implemented in IE, Fast-RCNN-specific - 'roipooling': node_pb_arg(roipooling_ext), -} +caffe_type_extractors = {} def common_caffe_fields(node: Node) -> dict: @@ -62,6 +44,7 @@ def common_caffe_fields(node: Node) -> dict: if isinstance(layer_type, int): layer_type = pb.LayerType.DESCRIPTOR.values_by_number[layer_type].name layer_type = str(layer_type) + return { 'kind': 'op', 'name': pb.name, diff --git a/model-optimizer/mo/front/caffe/extractors/batchnorm.py b/model-optimizer/mo/front/caffe/extractors/batchnorm.py deleted file mode 100644 index 02bb833aa0d6e9..00000000000000 --- a/model-optimizer/mo/front/caffe/extractors/batchnorm.py +++ /dev/null @@ -1,63 +0,0 @@ -""" - Copyright (C) 2018-2020 Intel Corporation - - Licensed under the Apache License, Version 2.0 (the "License"); - you may not use this file except in compliance with the License. - You may obtain a copy of the License at - - http://www.apache.org/licenses/LICENSE-2.0 - - Unless required by applicable law or agreed to in writing, software - distributed under the License is distributed on an "AS IS" BASIS, - WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - See the License for the specific language governing permissions and - limitations under the License. -""" - -import numpy as np - -from mo.front.caffe.extractors.utils import embed_input -from mo.front.common.partial_infer.elemental import copy_shape_infer - - -def batch_norm_ext(pb_layer, pb_model): - """ - Extracts properties of the BatchNorm layer. - In case of scale, scale is merged into mean and variance - Args: - pb_layer: proto layer, contains own properties of the layer, i.e epsilon - pb_model: caffemodel layer, contains blobs with 0: mean, 1: variance, (opt)2: scale - - Returns: - attrs object with type, partial inference function and mean/variance properties. - """ - assert pb_layer, 'Protobuf layer can not be empty' - param = pb_layer.batch_norm_param - attrs = { - 'op': 'BatchNormalization', - 'type': 'BatchNormalization', - 'eps': param.eps, - 'infer': copy_shape_infer - } - - if not pb_model: - return attrs - - blobs = pb_model.blobs - assert len(blobs) >= 2, 'BatchNorm accepts not less then two input blobs' - mean = np.array(blobs[0].data) - variance = np.array(blobs[1].data) - - if len(blobs) == 3: - scale = blobs[2].data[0] - if scale != 0: - scale = 1.0 / scale - mean *= scale - variance *= scale - - embed_input(attrs, 1, 'gamma', np.ones(mean.shape), 'gamma') - embed_input(attrs, 2, 'beta', np.zeros(variance.shape), 'beta') - embed_input(attrs, 3, 'mean', mean, 'biases') - embed_input(attrs, 4, 'variance', variance, 'weights') - - return attrs diff --git a/model-optimizer/mo/front/caffe/extractors/batchnorm_test.py b/model-optimizer/mo/front/caffe/extractors/batchnorm_test.py deleted file mode 100644 index b852cdeff9c6b9..00000000000000 --- a/model-optimizer/mo/front/caffe/extractors/batchnorm_test.py +++ /dev/null @@ -1,147 +0,0 @@ -""" - Copyright (C) 2018-2020 Intel Corporation - - Licensed under the Apache License, Version 2.0 (the "License"); - you may not use this file except in compliance with the License. - You may obtain a copy of the License at - - http://www.apache.org/licenses/LICENSE-2.0 - - Unless required by applicable law or agreed to in writing, software - distributed under the License is distributed on an "AS IS" BASIS, - WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - See the License for the specific language governing permissions and - limitations under the License. -""" - -import unittest - -import numpy as np - -from mo.front.caffe.extractors.batchnorm import batch_norm_ext -from mo.front.common.partial_infer.elemental import copy_shape_infer -from mo.utils.unittest.extractors import FakeParam, FakeModelLayer - - -class FakeBNProtoLayer: - def __init__(self, eps): - self.batch_norm_param = FakeParam('eps', eps) - - -class TestShapesParsing(unittest.TestCase): - def test_bn_ext_no_ml_no_pb(self): - self.assertRaises(AssertionError, batch_norm_ext, None, None) - - def test_bn_ext_no_ml(self): - res = batch_norm_ext(FakeBNProtoLayer(10), None) - exp_res = { - 'op': 'BatchNormalization', - 'type': 'BatchNormalization', - 'eps': 10, - 'infer': copy_shape_infer - } - self.assertEqual(res, exp_res) - - def test_bn_ext_ml_one_blob(self): - self.assertRaises(AssertionError, batch_norm_ext, FakeBNProtoLayer(10), FakeModelLayer([np.array([1, 2])])) - - def test_bn_ext_ml_two_blobs(self): - mean_blob = np.array([1., 2.]) - variance_blob = np.array([3., 4.]) - blobs = [mean_blob, variance_blob] - res = batch_norm_ext(FakeBNProtoLayer(10), - FakeModelLayer(blobs)) - exp_res = { - 'type': 'BatchNormalization', - 'eps': 10, - 'infer': copy_shape_infer, - 'mean': mean_blob, - 'variance': variance_blob, - 'embedded_inputs': [ - (1, 'gamma', { - 'bin': 'gamma' - }), - (2, 'beta', { - 'bin': 'beta' - }), - (3, 'mean', { - 'bin': 'biases' - }), - (4, 'variance', { - 'bin': 'weights' - }) - ] - } - for i in exp_res: - if i in ('mean', 'variance'): - np.testing.assert_array_equal(res[i], exp_res[i]) - else: - self.assertEqual(res[i], exp_res[i]) - - def test_bn_ext_ml_three_blobs(self): - mean_blob = np.array([1., 2.]) - variance_blob = np.array([3., 4.]) - scale_blob = np.array([5., ]) - blobs = [mean_blob, variance_blob, scale_blob] - res = batch_norm_ext(FakeBNProtoLayer(10), - FakeModelLayer(blobs)) - exp_res = { - 'type': 'BatchNormalization', - 'eps': 10, - 'infer': copy_shape_infer, - 'mean': mean_blob * 0.2, - 'variance': variance_blob * 0.2, - 'embedded_inputs': [ - (1, 'gamma', { - 'bin': 'gamma' - }), - (2, 'beta', { - 'bin': 'beta' - }), - (3, 'mean', { - 'bin': 'biases' - }), - (4, 'variance', { - 'bin': 'weights' - }) - ] - } - for i in exp_res: - if i in ('mean', 'variance'): - np.testing.assert_array_equal(res[i], exp_res[i]) - else: - self.assertEqual(res[i], exp_res[i]) - - def test_bn_ext_ml_three_blobs_zero_scale(self): - mean_blob = np.array([1., 2.]) - variance_blob = np.array([3., 4.]) - scale_blob = np.array([0., ]) - blobs = [mean_blob, variance_blob, scale_blob] - res = batch_norm_ext(FakeBNProtoLayer(10), - FakeModelLayer(blobs)) - exp_res = { - 'type': 'BatchNormalization', - 'eps': 10, - 'infer': copy_shape_infer, - 'mean': mean_blob * 0., - 'variance': variance_blob * 0., - 'embedded_inputs': [ - (1, 'gamma', { - 'bin': 'gamma' - }), - (2, 'beta', { - 'bin': 'beta' - }), - (3, 'mean', { - 'bin': 'biases' - }), - (4, 'variance', { - 'bin': 'weights' - }) - ] - } - for i in exp_res: - if i in ('mean', 'variance'): - np.testing.assert_array_equal(res[i], exp_res[i]) - else: - self.assertEqual(res[i], exp_res[i]) \ No newline at end of file diff --git a/model-optimizer/mo/front/caffe/extractors/concat_test.py b/model-optimizer/mo/front/caffe/extractors/concat_test.py deleted file mode 100644 index 1840c029740455..00000000000000 --- a/model-optimizer/mo/front/caffe/extractors/concat_test.py +++ /dev/null @@ -1,37 +0,0 @@ -""" - Copyright (C) 2018-2020 Intel Corporation - - Licensed under the Apache License, Version 2.0 (the "License"); - you may not use this file except in compliance with the License. - You may obtain a copy of the License at - - http://www.apache.org/licenses/LICENSE-2.0 - - Unless required by applicable law or agreed to in writing, software - distributed under the License is distributed on an "AS IS" BASIS, - WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - See the License for the specific language governing permissions and - limitations under the License. -""" - -import unittest - -from mo.front.caffe.extractors.concat import concat_ext -from mo.front.common.partial_infer.concat import concat_infer -from mo.utils.unittest.extractors import FakeParam - - -class FakeProtoLayer: - def __init__(self, axis): - self.concat_param = FakeParam('axis', axis) - - -class TestConcat(unittest.TestCase): - def test_concat(self): - res = concat_ext(FakeProtoLayer(10), None) - exp_res = { - 'axis': 10, - 'infer': concat_infer, - 'type': 'Concat' - } - self.assertEqual(res, exp_res) diff --git a/model-optimizer/mo/front/caffe/extractors/scale.py b/model-optimizer/mo/front/caffe/extractors/scale.py deleted file mode 100644 index 59a2efd3a4b8b7..00000000000000 --- a/model-optimizer/mo/front/caffe/extractors/scale.py +++ /dev/null @@ -1,47 +0,0 @@ -""" - Copyright (C) 2018-2020 Intel Corporation - - Licensed under the Apache License, Version 2.0 (the "License"); - you may not use this file except in compliance with the License. - You may obtain a copy of the License at - - http://www.apache.org/licenses/LICENSE-2.0 - - Unless required by applicable law or agreed to in writing, software - distributed under the License is distributed on an "AS IS" BASIS, - WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - See the License for the specific language governing permissions and - limitations under the License. -""" - -import numpy as np - -from mo.front.caffe.extractors.utils import embed_input, weights_biases -from mo.front.common.partial_infer.elemental import copy_shape_infer -from mo.utils.utils import NamedAttrsClass - - -def scale_ext(pl, ml): - param = pl.scale_param - attrs = { - 'op': 'ScaleShift', - 'type': 'ScaleShift', - 'axis': param.axis, - 'infer': copy_shape_infer - } - if ml is None and len(pl.bottom) == 1: - # default weights and biases for scale layer if the caffemodel file doesn't contain them - ml = NamedAttrsClass({'blobs': np.array([NamedAttrsClass({'data': np.array([1])}), - NamedAttrsClass({'data': np.array([0])})])}) - # scale with 1 input and 1 or 2 blobs - if ml and len(ml.blobs) != 0 and len(pl.bottom) == 1: - attrs.update(weights_biases(param.bias_term, ml)) - # 2 inputs + bias - elif len(pl.bottom) == 2 and param.bias_term: - if ml is None or len(ml.blobs) == 0: - # default bias for scale layer with 2 inputs if the caffemodel file doesn't contain them - ml = NamedAttrsClass({'blobs': np.array([NamedAttrsClass({'data': np.array([0])})])}) - - embed_input(attrs, 1, 'biases', ml.blobs[0].data) - - return attrs diff --git a/model-optimizer/mo/front/caffe/extractors/scale_test.py b/model-optimizer/mo/front/caffe/extractors/scale_test.py deleted file mode 100644 index f1c1baf84c69c9..00000000000000 --- a/model-optimizer/mo/front/caffe/extractors/scale_test.py +++ /dev/null @@ -1,144 +0,0 @@ -""" - Copyright (C) 2018-2020 Intel Corporation - - Licensed under the Apache License, Version 2.0 (the "License"); - you may not use this file except in compliance with the License. - You may obtain a copy of the License at - - http://www.apache.org/licenses/LICENSE-2.0 - - Unless required by applicable law or agreed to in writing, software - distributed under the License is distributed on an "AS IS" BASIS, - WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - See the License for the specific language governing permissions and - limitations under the License. -""" - -import unittest - -import numpy as np - -from mo.front.caffe.extractors.scale import scale_ext -from mo.front.common.partial_infer.elemental import copy_shape_infer -from mo.utils.unittest.extractors import FakeMultiParam, FakeModelLayer - - -class FakeProtoLayer: - def __init__(self, val, bottom2=False): - self.scale_param = val - if bottom2: - self.bottom = {"bottom1", "bottom2"} - else: - self.bottom = {"bottom1"} - - -class TestScale(unittest.TestCase): - def test_scale_ext(self): - mean_blob = np.array([1., 2.]) - variance_blob = np.array([3., 4.]) - blobs = [mean_blob, variance_blob] - params = { - 'type': 'Scale', - 'axis': 0, - 'bias_term': True - } - - res = scale_ext(FakeProtoLayer(FakeMultiParam(params)), FakeModelLayer(blobs)) - exp_res = { - 'op': 'ScaleShift', - 'type': 'ScaleShift', - 'axis': 0, - 'infer': copy_shape_infer, - 'weights': mean_blob, - 'biases': variance_blob, - 'embedded_inputs': [ - (1, 'weights', { - 'bin': 'weights' - }), - (2, 'biases', { - 'bin': 'biases' - }) - ] - } - for i in exp_res: - if i in ('weights', 'biases'): - np.testing.assert_array_equal(res[i], exp_res[i]) - else: - self.assertEqual(res[i], exp_res[i]) - - def test_scale_2inputs_ext(self): - params = { - 'type': 'Scale', - 'axis': 0, - 'bias_term': False - } - - res = scale_ext(FakeProtoLayer(FakeMultiParam(params), True), None) - exp_res = { - 'op': 'ScaleShift', - 'type': 'ScaleShift', - 'axis': 0, - 'infer': copy_shape_infer, - } - for i in exp_res: - self.assertEqual(res[i], exp_res[i]) - - def test_scale_2inputs_bias_ext(self): - variance_blob = np.array([3., 4.]) - blobs = [variance_blob] - - params = { - 'type': 'Scale', - 'axis': 0, - 'bias_term': True - } - - res = scale_ext(FakeProtoLayer(FakeMultiParam(params), True), FakeModelLayer(blobs)) - exp_res = { - 'op': 'ScaleShift', - 'type': 'ScaleShift', - 'axis': 0, - 'infer': copy_shape_infer, - 'biases': variance_blob, - 'embedded_inputs': [ - (1, 'biases', { - 'bin': 'biases' - })] - } - for i in exp_res: - if i in ('biases'): - np.testing.assert_array_equal(res[i], exp_res[i]) - else: - self.assertEqual(res[i], exp_res[i]) - - def test_create_default_weights(self): - """ - There are situations when scale layer doesn't have weights and biases. This test checks that if they are not - available in the caffemodel file then default values [1] and [0] are generated. - """ - scale_blob = np.array([1]) - bias_blob = np.array([0]) - params = { - 'type': 'Scale', - 'axis': 0, - 'bias_term': True - } - - res = scale_ext(FakeProtoLayer(FakeMultiParam(params)), None) - exp_res = { - 'op': 'ScaleShift', - 'type': 'ScaleShift', - 'axis': 0, - 'infer': copy_shape_infer, - 'weights': scale_blob, - 'biases': bias_blob, - 'embedded_inputs': [ - (1, 'weights', { - 'bin': 'weights' - }), - (2, 'biases', { - 'bin': 'biases' - }) - ] - } - self.assertDictEqual(exp_res, res) diff --git a/model-optimizer/mo/front/extractor.py b/model-optimizer/mo/front/extractor.py index b94badaa45ef3c..19679614d0f80a 100644 --- a/model-optimizer/mo/front/extractor.py +++ b/model-optimizer/mo/front/extractor.py @@ -1,5 +1,5 @@ """ - Copyright (C) 2018-2020 Intel Corporation + Copyright (C) 2018-2021 Intel Corporation Licensed under the Apache License, Version 2.0 (the "License"); you may not use this file except in compliance with the License. @@ -131,6 +131,20 @@ def attr_getter(node: Node, name: str): return None +def bool_to_str(node: Node, attr: str): + # Function converts 0/1 or bool False/True values to str 'false'/'true' which need to appear in IR + attribute_name = node.soft_get(attr, None) + if attribute_name is None: + return None + if isinstance(attribute_name, bool): + return str(attribute_name).lower() + elif attribute_name in [0, 1]: + return str(bool(attribute_name)).lower() + else: + raise Error('Wrong value {} for boolean attribute {} in node {}'.format( + attribute_name, attr, node.soft_get('name'))) + + def kernel_getter(node: Node, dim: int): if node.kind == 'op' and node.op in ['Conv2D', 'DepthwiseConv2dNative', 'Deconv2D']: if node.has('kernel_spatial'): diff --git a/model-optimizer/mo/front/extractor_test.py b/model-optimizer/mo/front/extractor_test.py index 44f4b3b25058d9..0950ddc9e963df 100644 --- a/model-optimizer/mo/front/extractor_test.py +++ b/model-optimizer/mo/front/extractor_test.py @@ -1,5 +1,5 @@ """ - Copyright (C) 2018-2020 Intel Corporation + Copyright (C) 2018-2021 Intel Corporation Licensed under the Apache License, Version 2.0 (the "License"); you may not use this file except in compliance with the License. @@ -22,7 +22,7 @@ from mo.front.extractor import input_user_data_repack, output_user_data_repack, update_ie_fields, add_input_op, \ get_node_id_with_ports from mo.front.extractor import spatial_attr_getter, add_input_ops, attr_getter, CaffePythonFrontExtractorOp, \ - add_output_ops + add_output_ops, bool_to_str from mo.graph.graph import Node from mo.utils.error import Error from mo.utils.ir_engine.compare_graphs import compare_graphs @@ -672,3 +672,18 @@ def test_get_attrs(self): param_str = "'test_attr_1': 12, 'test_attr_2': 'sdf sdf'" attrs = CaffePythonFrontExtractorOp.get_attrs(FakePythonParam(FakeMultiParam({'param_str': param_str}))) self.assertEqual(exp_attrs, attrs) + +class TestBoolToSrtFunction(unittest.TestCase): + def test_bool_to_str(self): + graph = build_graph(nodes_attributes, + [('input', 'pool_1'), + ('pool_1', 'output'), + ('output', 'op_output') + ], + {'pool_1': {'bool_attr': None} + }) + pool_1_node = Node(graph, 'pool_1') + attrs = [(True, 'true'), (False, 'false'), (1, 'true'), (0, 'false')] + for attr in attrs: + pool_1_node.bool_attr = attr[0] + self.assertEqual(attr[1], bool_to_str(pool_1_node, 'bool_attr')) diff --git a/model-optimizer/mo/middle/passes/convert_data_type.py b/model-optimizer/mo/middle/passes/convert_data_type.py index 4fdc254cf814df..231be0bca84328 100644 --- a/model-optimizer/mo/middle/passes/convert_data_type.py +++ b/model-optimizer/mo/middle/passes/convert_data_type.py @@ -39,6 +39,7 @@ class packed_U1(np.generic): 'FP16': (np.float16, 'FP16', 'f16'), 'I32': (np.int32, 'I32', 'i32'), 'I64': (np.int64, 'I64', 'i64'), + 'int8': (np.int8, 'I8', 'i8'), 'uint8': (np.uint8, 'U8', 'u8'), 'int32': (np.int32, 'I32', 'i32'), 'int64': (np.int64, 'I64', 'i64'), diff --git a/model-optimizer/mo/ops/deformable_convolution.py b/model-optimizer/mo/ops/deformable_convolution.py index e2e3f436a350b7..3c4860d3218cbe 100644 --- a/model-optimizer/mo/ops/deformable_convolution.py +++ b/model-optimizer/mo/ops/deformable_convolution.py @@ -1,5 +1,5 @@ """ - Copyright (C) 2018-2020 Intel Corporation + Copyright (C) 2018-2021 Intel Corporation Licensed under the Apache License, Version 2.0 (the "License"); you may not use this file except in compliance with the License. @@ -28,6 +28,8 @@ def __init__(self, graph: Graph, attrs: dict): 'op': __class__.op, 'version': 'opset1', 'infer': Convolution.infer, + 'group': 1, + 'deformable_group': 1, 'multiplication_transparent': True, 'multiplication_transparent_ports': [(0, 0), (2, 0)], 'in_ports_count': 3, diff --git a/model-optimizer/mo/ops/pooling.py b/model-optimizer/mo/ops/pooling.py index f8bc2c7f922b38..01c2f789fe6749 100644 --- a/model-optimizer/mo/ops/pooling.py +++ b/model-optimizer/mo/ops/pooling.py @@ -1,5 +1,5 @@ """ - Copyright (C) 2018-2020 Intel Corporation + Copyright (C) 2018-2021 Intel Corporation Licensed under the Apache License, Version 2.0 (the "License"); you may not use this file except in compliance with the License. @@ -21,6 +21,7 @@ from mo.graph.graph import Node, Graph from mo.ops.op import Op, PermuteAttrs from mo.utils.error import Error +from mo.front.extractor import bool_to_str class Pooling(Op): @@ -44,8 +45,7 @@ def backend_attrs(self): ('pads_begin', lambda node: ','.join(map(str, get_backend_pad(node.pad, node.spatial_dims, 0)))), ('pads_end', lambda node: ','.join(map(str, get_backend_pad(node.pad, node.spatial_dims, 1)))), - ('pool-method', 'pool_method'), - ('exclude-pad', 'exclude_pad'), + ('exclude-pad', lambda node: bool_to_str(node, 'exclude_pad')), 'rounding_type', ('auto_pad', lambda node: node.auto_pad if node.has_valid('auto_pad') else 'explicit'), diff --git a/model-optimizer/mo/ops/pooling_test.py b/model-optimizer/mo/ops/pooling_test.py index 456e31b92669b8..ab365ec06fe00d 100644 --- a/model-optimizer/mo/ops/pooling_test.py +++ b/model-optimizer/mo/ops/pooling_test.py @@ -1,5 +1,5 @@ """ - Copyright (C) 2018-2020 Intel Corporation + Copyright (C) 2018-2021 Intel Corporation Licensed under the Apache License, Version 2.0 (the "License"); you may not use this file except in compliance with the License. @@ -42,7 +42,7 @@ def test_pooling_infer(self): 'pool': {'window': np.array([1, 1, 1, 1]), 'stride': np.array([1, 1, 2, 2]), 'pad': np.array([[0, 0], [0, 0], [3, 3], [3, 3]]), 'pad_spatial_shape': np.array([[3, 3], [3, 3]]), - 'pool_method': 'avg', 'exclude_pad': 'false', 'global_pool': 0, + 'pool_method': 'avg', 'exclude_pad': False, 'global_pool': False, 'output_spatial_shape': None, 'output_shape': None, 'kernel_spatial': np.array([3, 3]), 'spatial_dims': np.array([2, 3]), 'channel_dims': np.array([1]), 'batch_dims': np.array([0]), @@ -68,7 +68,7 @@ def test_pooling_infer_decrement_input_spatial(self): 'pool': {'window': np.array([1, 1, 1, 1]), 'stride': np.array([1, 1, 3, 3]), 'pad': np.array([[0, 0], [0, 0], [3, 3], [3, 3]]), 'pad_spatial_shape': np.array([[1, 1], [1, 1]]), - 'pool_method': 'avg', 'exclude_pad': 'false', 'global_pool': 0, + 'pool_method': 'avg', 'exclude_pad': False, 'global_pool': False, 'output_spatial_shape': None, 'output_shape': None, 'kernel_spatial': np.array([3, 3]), 'spatial_dims': np.array([2, 3]), 'channel_dims': np.array([1]), 'batch_dims': np.array([0]), @@ -94,7 +94,7 @@ def test_pooling_infer_no_convention(self): 'pool': {'window': np.array([1, 1, 1, 1]), 'stride': np.array([1, 1, 2, 2]), 'pad': np.array([[0, 0], [0, 0], [3, 3], [3, 3]]), 'pad_spatial_shape': np.array([[3, 3], [3, 3]]), - 'pool_method': 'avg', 'exclude_pad': 'false', 'global_pool': 0, + 'pool_method': 'avg', 'exclude_pad': False, 'global_pool': False, 'output_spatial_shape': None, 'output_shape': None, 'kernel_spatial': np.array([3, 3]), 'spatial_dims': np.array([2, 3]), 'channel_dims': np.array([1]), 'batch_dims': np.array([0])} @@ -119,7 +119,7 @@ def test_pooling_infer_no_shape(self): 'pool': {'window': np.array([1, 1, 1, 1]), 'stride': np.array([1, 1, 2, 2]), 'pad': np.array([[0, 0], [0, 0], [3, 3], [3, 3]]), 'pad_spatial_shape': np.array([[3, 3], [3, 3]]), - 'pool_method': 'avg', 'exclude_pad': 'false', + 'pool_method': 'avg', 'exclude_pad': False, 'output_spatial_shape': None, 'output_shape': None, 'kernel_spatial': np.array([3, 3]), 'spatial_dims': np.array([2, 3]), 'channel_dims': np.array([1]), 'batch_dims': np.array([0]), @@ -142,7 +142,7 @@ def test_pooling_infer_wrong_input_shape(self): 'pool': {'window': np.array([1, 1, 5, 5]), 'stride': np.array([1, 1, 2, 2]), 'pad': np.array([[0, 0], [0, 0], [1, 1], [1, 1]]), 'pad_spatial_shape': np.array([[1, 1], [1, 1]]), - 'pool_method': 'avg', 'exclude_pad': 'false', 'global_pool': 0, + 'pool_method': 'avg', 'exclude_pad': False, 'global_pool': False, 'output_spatial_shape': None, 'output_shape': None, 'kernel_spatial': np.array([3, 3]), 'spatial_dims': np.array([2, 3]), 'channel_dims': np.array([1]), 'batch_dims': np.array([0]), diff --git a/model-optimizer/mo/ops/reshape.py b/model-optimizer/mo/ops/reshape.py index 55329533cd0fa7..018b1d68935513 100644 --- a/model-optimizer/mo/ops/reshape.py +++ b/model-optimizer/mo/ops/reshape.py @@ -1,5 +1,5 @@ """ - Copyright (C) 2018-2020 Intel Corporation + Copyright (C) 2018-2021 Intel Corporation Licensed under the Apache License, Version 2.0 (the "License"); you may not use this file except in compliance with the License. @@ -15,6 +15,7 @@ """ import numpy as np +from mo.front.extractor import bool_to_str from mo.graph.graph import Node, Graph from mo.graph.perm_inputs import PermuteInputs from mo.ops.op import Op @@ -40,7 +41,7 @@ def __init__(self, graph: Graph, attrs: dict): }, attrs) def supported_attrs(self): - return ['special_zero'] + return [('special_zero', lambda node: bool_to_str(node, 'special_zero'))] @staticmethod def infer(node: Node): diff --git a/model-optimizer/mo/ops/roipooling.py b/model-optimizer/mo/ops/roipooling.py index 713953990426a8..6c48639769a973 100644 --- a/model-optimizer/mo/ops/roipooling.py +++ b/model-optimizer/mo/ops/roipooling.py @@ -1,5 +1,5 @@ """ - Copyright (C) 2018-2020 Intel Corporation + Copyright (C) 2018-2021 Intel Corporation Licensed under the Apache License, Version 2.0 (the "License"); you may not use this file except in compliance with the License. @@ -30,6 +30,7 @@ def __init__(self, graph, attrs: dict): 'pooled_h': None, 'pooled_w': None, 'spatial_scale': 0.0625, + 'method': 'max', 'infer': roipooling_infer, 'in_ports_count': 2, 'out_ports_count': 1, diff --git a/model-optimizer/mo/utils/unittest/graph.py b/model-optimizer/mo/utils/unittest/graph.py index 7e95dd6da9125c..21bb0a9b1606ef 100644 --- a/model-optimizer/mo/utils/unittest/graph.py +++ b/model-optimizer/mo/utils/unittest/graph.py @@ -287,7 +287,8 @@ def __getitem__(self, item): 'shape': int64_array(shape) if shape is not None else None}} empty_data = lambda name: valued_data(name, None) -result = lambda name=None: {name if name is not None else 'output': {'kind': 'op', 'type': 'Result', 'op': 'Result'}} +result = lambda name=None: {name if name is not None else 'output': {'kind': 'op', 'type': 'Result', 'op': 'Result', + 'infer': lambda x: 0}} regular_op_with_shaped_data = lambda name, shape, kwargs: {**regular_op(name, kwargs), **shaped_data(name + '_d', shape)} diff --git a/ngraph/cmake/external_onnx.cmake b/ngraph/cmake/external_onnx.cmake index 54c5000f036c4a..48c9106560199f 100644 --- a/ngraph/cmake/external_onnx.cmake +++ b/ngraph/cmake/external_onnx.cmake @@ -20,7 +20,7 @@ include(FetchContent) # ONNX.proto definition version #------------------------------------------------------------------------------ -set(ONNX_VERSION 1.8.0) +set(ONNX_VERSION 1.8.1) #------------------------------------------------------------------------------ # Download and install libonnx ... diff --git a/ngraph/core/include/ngraph/descriptor/output.hpp b/ngraph/core/include/ngraph/descriptor/output.hpp index c7c2fc875a6ba3..611961c5e3e3bc 100644 --- a/ngraph/core/include/ngraph/descriptor/output.hpp +++ b/ngraph/core/include/ngraph/descriptor/output.hpp @@ -17,6 +17,8 @@ #pragma once #include +#include +#include #include #include "ngraph/descriptor/input.hpp" diff --git a/ngraph/core/include/ngraph/descriptor/tensor.hpp b/ngraph/core/include/ngraph/descriptor/tensor.hpp index 123b2ec507b66f..4fe09f987585bb 100644 --- a/ngraph/core/include/ngraph/descriptor/tensor.hpp +++ b/ngraph/core/include/ngraph/descriptor/tensor.hpp @@ -18,6 +18,7 @@ #include #include +#include #include "ngraph/partial_shape.hpp" #include "ngraph/shape.hpp" @@ -27,6 +28,11 @@ namespace ngraph { class Node; + namespace runtime + { + class HostTensor; + } + using HostTensorPtr = std::shared_ptr; namespace descriptor { /// \brief Compile-time descriptor of a first-class value that is a tensor. @@ -44,15 +50,33 @@ namespace ngraph Node* node, size_t node_output_number); + NGRAPH_DEPRECATED("get_name() is deprecated! Please use get_names() instead.") const std::string& get_name() const; + NGRAPH_DEPRECATED("set_name() is deprecated! Please use set_names() instead.") void set_name(const std::string& name); + + const std::unordered_set& get_names() const; + void set_names(const std::unordered_set& names); void set_tensor_type(const element::Type& element_type, const PartialShape& pshape); void set_element_type(const element::Type& elemenet_type); void set_partial_shape(const PartialShape& partial_shape); + /// \brief sets lower bound value description + void set_lower_value(const HostTensorPtr& value); + /// \brief sets upper bound value description + void set_upper_value(const HostTensorPtr& value); + /// \brief unsets bound value descriptions + void invalidate_values(); + const element::Type& get_element_type() const { return m_element_type; } const Shape& get_shape() const; const PartialShape& get_partial_shape() const { return m_partial_shape; } + HostTensorPtr get_lower_value() const { return m_lower_value; } + HostTensorPtr get_upper_value() const { return m_upper_value; } + bool has_and_set_bound() const + { + return m_upper_value != nullptr && m_upper_value == m_lower_value; + } size_t size() const; protected: @@ -65,9 +89,11 @@ namespace ngraph Shape m_shape; PartialShape m_partial_shape; Node* m_node{nullptr}; + HostTensorPtr m_lower_value, m_upper_value; size_t m_node_output_number{0}; std::string m_name; + std::unordered_set m_names; }; NGRAPH_API diff --git a/ngraph/core/include/ngraph/function.hpp b/ngraph/core/include/ngraph/function.hpp index 922cfcd7966162..f6c84c9cef3fad 100644 --- a/ngraph/core/include/ngraph/function.hpp +++ b/ngraph/core/include/ngraph/function.hpp @@ -170,10 +170,39 @@ namespace ngraph /// \param result Result node to delete void remove_result(const std::shared_ptr& result); + /// \brief Add new Parameter nodes to the list. + /// + /// Method doesn't change or validate graph, it should be done manually. + /// For example, if you want to replace `ReadValue` node by `Parameter`, you should do the + /// following steps: + /// * replace node `ReadValue` by `Parameter` in graph + /// * call add_parameter() to add new input to the list + /// * call graph validation to check correctness of changes + /// + /// \param params new Parameter nodes + void add_parameters(const ParameterVector& params); + + /// \brief Delete Parameter node from the list of parameters. Method will not delete node + /// from graph. You need to replace Parameter with other operation manually. + /// Attention: Indexing of parameters can be changed. + /// + /// Possible use of method is to replace input by variable. For it the following steps + /// should be done: + /// * `Parameter` node should be replaced by `ReadValue` + /// * call remove_parameter(param) to remove input from the list + /// * check if any parameter indexes are saved/used somewhere, update it for all inputs + /// because indexes can be changed + /// * call graph validation to check all changes + /// + /// \param param Parameter node to delete + void remove_parameter(const std::shared_ptr& param); + private: Function(const Function&) = delete; Function(const Function&&) = delete; Function& operator=(const Function&) = delete; + /// \brief Checks all the Parameter nodes are registered in the list of Function parameters + void check_all_parameters_registered() const; static std::atomic m_next_instance_id; std::string m_name; @@ -203,4 +232,4 @@ namespace ngraph 0}; const DiscreteTypeInfo& get_type_info() const override { return type_info; } }; -} +} // namespace ngraph diff --git a/ngraph/core/include/ngraph/node.hpp b/ngraph/core/include/ngraph/node.hpp index 628a9c26866bea..a8538319754fc7 100644 --- a/ngraph/core/include/ngraph/node.hpp +++ b/ngraph/core/include/ngraph/node.hpp @@ -208,6 +208,9 @@ namespace ngraph /// \returns true if successful virtual bool evaluate(const HostTensorVector& output_values, const HostTensorVector& input_values) const; + virtual bool evaluate_lower(const HostTensorVector& output_values) const; + virtual bool evaluate_upper(const HostTensorVector& output_values) const; + virtual bool constant_fold(OutputVector& output_values, const OutputVector& inputs_values); /// \brief Decomposes the FusedOp into a sub-graph consisting of core ngraph ops /// @@ -233,7 +236,12 @@ namespace ngraph /// Sets the number of outputs void set_output_size(size_t output_size); - void revalidate_and_infer_types() { validate_and_infer_types(); } + void invalidate_values(); + void revalidate_and_infer_types() + { + invalidate_values(); + validate_and_infer_types(); + } /// \brief Get the string name for the type of the node, such as `Add` or `Multiply`. /// The class name, must not contain spaces as it is used for codegen. /// \returns A const reference to the node's type name @@ -327,6 +335,8 @@ namespace ngraph descriptor::Tensor& get_input_tensor(size_t i) const; /// Returns the tensor name for output i + NGRAPH_DEPRECATED( + "The tensor name was deprecated. Use get_output_tensor(i).get_names() instead.") const std::string& get_output_tensor_name(size_t i) const; std::set> get_output_target_inputs(size_t i) const; @@ -347,6 +357,8 @@ namespace ngraph const PartialShape& get_input_partial_shape(size_t i) const; /// Returns the tensor name for input i + NGRAPH_DEPRECATED( + "The tensor name was deprecated. Use get_input_tensor(i).get_names() instead.") const std::string& get_input_tensor_name(size_t i) const; std::unordered_set liveness_new_list; diff --git a/ngraph/core/include/ngraph/node_output.hpp b/ngraph/core/include/ngraph/node_output.hpp index 359a1441cfc4b2..bcaed7812d3b2e 100644 --- a/ngraph/core/include/ngraph/node_output.hpp +++ b/ngraph/core/include/ngraph/node_output.hpp @@ -17,6 +17,7 @@ #pragma once #include +#include #include "ngraph/descriptor/tensor.hpp" #include "ngraph/partial_shape.hpp" diff --git a/ngraph/core/include/ngraph/op/concat.hpp b/ngraph/core/include/ngraph/op/concat.hpp index f0c99c6c9b544b..db964d3059960b 100644 --- a/ngraph/core/include/ngraph/op/concat.hpp +++ b/ngraph/core/include/ngraph/op/concat.hpp @@ -63,6 +63,8 @@ namespace ngraph void set_axis(int64_t axis) { m_axis = axis; } bool evaluate(const HostTensorVector& outputs, const HostTensorVector& inputs) const override; + bool evaluate_lower(const HostTensorVector& output_values) const override; + bool evaluate_upper(const HostTensorVector& output_values) const override; protected: /// \ brief m_axis stores default value for all iterations diff --git a/ngraph/core/include/ngraph/op/constant.hpp b/ngraph/core/include/ngraph/op/constant.hpp index b411e2caa3da49..93724d8cf16335 100644 --- a/ngraph/core/include/ngraph/op/constant.hpp +++ b/ngraph/core/include/ngraph/op/constant.hpp @@ -54,7 +54,9 @@ namespace ngraph /// \param values A vector of literals for initializing the tensor constant. The /// size of values must match the size of the shape. template - Constant(const element::Type& type, Shape shape, const std::vector& values) + Constant(const element::Type& type, + const Shape& shape, + const std::vector& values) : Constant(type, shape) { NODE_VALIDATION_CHECK( @@ -91,7 +93,7 @@ namespace ngraph /// value is broadcast to the specified shape. template ::value>::type> - Constant(const element::Type& type, Shape shape, T value) + Constant(const element::Type& type, const Shape& shape, T value) : Constant(type, shape) { auto size = shape_size(m_shape); @@ -224,7 +226,7 @@ namespace ngraph /// \param shape The shape of the tensor constant. /// \param values A list of string values to use as the constant data. Constant(const element::Type& type, - Shape shape, + const Shape& shape, const std::vector& values); /// \brief Constructs a tensor constant with the supplied data @@ -265,6 +267,8 @@ namespace ngraph bool evaluate(const HostTensorVector& outputs, const HostTensorVector& inputs) const override; + bool evaluate_lower(const HostTensorVector& outputs) const override; + bool evaluate_upper(const HostTensorVector& outputs) const override; // Don't constant fold a constant; it would make a copy bool constant_fold(OutputVector& outputs, const OutputVector& inputs) override diff --git a/ngraph/core/include/ngraph/op/convert.hpp b/ngraph/core/include/ngraph/op/convert.hpp index e527dfeb43ebf4..651b68e027544b 100644 --- a/ngraph/core/include/ngraph/op/convert.hpp +++ b/ngraph/core/include/ngraph/op/convert.hpp @@ -56,6 +56,8 @@ namespace ngraph bool evaluate(const HostTensorVector& outputs, const HostTensorVector& inputs) const override; + bool evaluate_lower(const HostTensorVector& outputs) const override; + bool evaluate_upper(const HostTensorVector& outputs) const override; protected: ngraph::element::Type m_destination_type; diff --git a/ngraph/core/include/ngraph/op/gather.hpp b/ngraph/core/include/ngraph/op/gather.hpp index 3c8a34927c5186..dbc72643333068 100644 --- a/ngraph/core/include/ngraph/op/gather.hpp +++ b/ngraph/core/include/ngraph/op/gather.hpp @@ -49,6 +49,8 @@ namespace ngraph bool evaluate(const HostTensorVector& outputs, const HostTensorVector& inputs) const override; + bool evaluate_lower(const HostTensorVector& outputs) const override; + bool evaluate_upper(const HostTensorVector& outputs) const override; bool constant_fold(OutputVector& output_values, const OutputVector& inputs_values) override; diff --git a/ngraph/core/include/ngraph/op/loop.hpp b/ngraph/core/include/ngraph/op/loop.hpp index 1f1daee45d5c3b..1a4fc794ede13e 100644 --- a/ngraph/core/include/ngraph/op/loop.hpp +++ b/ngraph/core/include/ngraph/op/loop.hpp @@ -86,7 +86,12 @@ namespace ngraph bool evaluate(const HostTensorVector& outputs, const HostTensorVector& inputs) const override; + protected: + Loop(const Loop&); + private: + void clone_to(Loop& dst, const OutputVector& new_args) const; + SpecialBodyPorts m_special_body_ports; int64_t m_num_iterations = -1; // -1 means infinity }; diff --git a/ngraph/core/include/ngraph/op/min.hpp b/ngraph/core/include/ngraph/op/min.hpp index bed5ba3efac13a..59d2b2e34bb77d 100644 --- a/ngraph/core/include/ngraph/op/min.hpp +++ b/ngraph/core/include/ngraph/op/min.hpp @@ -46,6 +46,8 @@ namespace ngraph bool evaluate(const HostTensorVector& outputs, const HostTensorVector& inputs) const override; + bool evaluate_lower(const HostTensorVector& outputs) const override; + bool evaluate_upper(const HostTensorVector& outputs) const override; }; } } diff --git a/ngraph/core/include/ngraph/op/reduce_prod.hpp b/ngraph/core/include/ngraph/op/reduce_prod.hpp index 44c620007b272f..7543b92eb50808 100644 --- a/ngraph/core/include/ngraph/op/reduce_prod.hpp +++ b/ngraph/core/include/ngraph/op/reduce_prod.hpp @@ -52,6 +52,8 @@ namespace ngraph bool evaluate(const HostTensorVector& outputs, const HostTensorVector& inputs) const override; + bool evaluate_lower(const HostTensorVector& outputs) const override; + bool evaluate_upper(const HostTensorVector& outputs) const override; }; } } diff --git a/ngraph/core/include/ngraph/op/reshape.hpp b/ngraph/core/include/ngraph/op/reshape.hpp index 761c2ee42e7f70..d0951c005bcb42 100644 --- a/ngraph/core/include/ngraph/op/reshape.hpp +++ b/ngraph/core/include/ngraph/op/reshape.hpp @@ -66,6 +66,8 @@ namespace ngraph void set_special_zero(bool special_zero) { m_special_zero = special_zero; } bool evaluate(const HostTensorVector& outputs, const HostTensorVector& inputs) const override; + bool evaluate_lower(const HostTensorVector& outputs) const override; + bool evaluate_upper(const HostTensorVector& outputs) const override; bool constant_fold(OutputVector& output_values, const OutputVector& inputs_values) override; diff --git a/ngraph/core/include/ngraph/op/shape_of.hpp b/ngraph/core/include/ngraph/op/shape_of.hpp index 52a656a24b0b3b..9eeb87768a1925 100644 --- a/ngraph/core/include/ngraph/op/shape_of.hpp +++ b/ngraph/core/include/ngraph/op/shape_of.hpp @@ -55,6 +55,8 @@ namespace ngraph bool get_is_foldable() const { return m_is_foldable; } bool evaluate(const HostTensorVector& output_values, const HostTensorVector& input_values) const override; + bool evaluate_lower(const HostTensorVector& output_values) const override; + bool evaluate_upper(const HostTensorVector& output_values) const override; bool constant_fold(OutputVector& output_values, const OutputVector& input_values) override; @@ -91,6 +93,8 @@ namespace ngraph bool get_is_foldable() const { return m_is_foldable; } bool evaluate(const HostTensorVector& output_values, const HostTensorVector& input_values) const override; + bool evaluate_lower(const HostTensorVector& output_values) const override; + bool evaluate_upper(const HostTensorVector& output_values) const override; bool constant_fold(OutputVector& output_values, const OutputVector& input_values) override; diff --git a/ngraph/core/include/ngraph/op/squeeze.hpp b/ngraph/core/include/ngraph/op/squeeze.hpp index 8a118df26ccce6..6b64d46549cc56 100644 --- a/ngraph/core/include/ngraph/op/squeeze.hpp +++ b/ngraph/core/include/ngraph/op/squeeze.hpp @@ -44,6 +44,8 @@ namespace ngraph virtual void pre_validate_and_infer_types() override; bool evaluate(const HostTensorVector& outputs, const HostTensorVector& inputs) const override; + bool evaluate_lower(const HostTensorVector& outputs) const override; + bool evaluate_upper(const HostTensorVector& outputs) const override; bool constant_fold(OutputVector& output_values, const OutputVector& inputs_values) override; diff --git a/ngraph/core/include/ngraph/op/strided_slice.hpp b/ngraph/core/include/ngraph/op/strided_slice.hpp index fe0c048e28cef6..8611273e9255aa 100644 --- a/ngraph/core/include/ngraph/op/strided_slice.hpp +++ b/ngraph/core/include/ngraph/op/strided_slice.hpp @@ -105,6 +105,8 @@ namespace ngraph size_t get_version() const override { return 1; } bool evaluate(const HostTensorVector& output_values, const HostTensorVector& input_values) const override; + bool evaluate_lower(const HostTensorVector& outputs) const override; + bool evaluate_upper(const HostTensorVector& outputs) const override; private: AxisSet convert_mask_to_axis_set(const std::vector& mask) const; diff --git a/ngraph/core/include/ngraph/op/unsqueeze.hpp b/ngraph/core/include/ngraph/op/unsqueeze.hpp index 14b3144cb11ff6..46529757794d35 100644 --- a/ngraph/core/include/ngraph/op/unsqueeze.hpp +++ b/ngraph/core/include/ngraph/op/unsqueeze.hpp @@ -40,6 +40,9 @@ namespace ngraph bool visit_attributes(AttributeVisitor& visitor) override; bool evaluate(const HostTensorVector& outputs, const HostTensorVector& inputs) const override; + bool evaluate_lower(const HostTensorVector& output_values) const override; + bool evaluate_upper(const HostTensorVector& output_values) const override; + bool constant_fold(OutputVector& output_values, const OutputVector& inputs_values) override; diff --git a/ngraph/core/include/ngraph/op/util/binary_elementwise_arithmetic.hpp b/ngraph/core/include/ngraph/op/util/binary_elementwise_arithmetic.hpp index 62d0dd6950a513..1eb0b18e33c022 100644 --- a/ngraph/core/include/ngraph/op/util/binary_elementwise_arithmetic.hpp +++ b/ngraph/core/include/ngraph/op/util/binary_elementwise_arithmetic.hpp @@ -72,6 +72,8 @@ namespace ngraph const AutoBroadcastSpec& get_autob() const override { return m_autob; } void set_autob(const AutoBroadcastSpec& autob) { m_autob = autob; } bool visit_attributes(AttributeVisitor& visitor) override; + bool evaluate_lower(const HostTensorVector& outputs) const override; + bool evaluate_upper(const HostTensorVector& outputs) const override; private: AutoBroadcastSpec m_autob; diff --git a/ngraph/core/include/ngraph/op/util/broadcast_base.hpp b/ngraph/core/include/ngraph/op/util/broadcast_base.hpp index 8e72c3ddb8d3df..c6c8b2eac045fc 100644 --- a/ngraph/core/include/ngraph/op/util/broadcast_base.hpp +++ b/ngraph/core/include/ngraph/op/util/broadcast_base.hpp @@ -74,14 +74,16 @@ namespace ngraph bool evaluate(const HostTensorPtr& arg0, const HostTensorPtr& out, const AxisSet& broadcast_axes) const; + bool evaluate_lower(const HostTensorVector& outputs) const override; + bool evaluate_upper(const HostTensorVector& outputs) const override; PartialShape get_result_shape_pdpd(const PartialShape& arg0_shape, - const Shape& target_shape, + const PartialShape& target_shape, const op::BroadcastModeSpec& broadcast_spec) const; void validate_target_shape_numpy(const PartialShape& arg_shape, - const Shape& target_shape) const; + const PartialShape& target_shape) const; static std::pair get_broadcast_axes_numpy_pdpd(const Shape& arg_shape, @@ -92,9 +94,9 @@ namespace ngraph get_broadcast_axes_none(const AxisVector axes_mapping_val, const size_t target_shape); - void validate_target_shape_none(const Shape& arg_shape, + void validate_target_shape_none(const PartialShape& arg_shape, const AxisVector& axes_mapping_val, - const Shape& target_shape) const; + const PartialShape& target_shape) const; Shape get_target_shape(const HostTensorPtr& input1) const; }; diff --git a/ngraph/core/include/ngraph/op/util/sub_graph_base.hpp b/ngraph/core/include/ngraph/op/util/sub_graph_base.hpp index b8a2af05544d7b..2dc91539117d6c 100644 --- a/ngraph/core/include/ngraph/op/util/sub_graph_base.hpp +++ b/ngraph/core/include/ngraph/op/util/sub_graph_base.hpp @@ -218,6 +218,7 @@ namespace ngraph }; virtual std::shared_ptr get_function() { return m_body; }; + virtual std::shared_ptr get_function() const { return m_body; }; virtual void set_function(const std::shared_ptr& func) { m_body = func; }; /// \return a reference to the input descriptions. const std::vector>& get_input_descriptions() const @@ -319,6 +320,12 @@ namespace ngraph int64_t end, int64_t axis); + SubGraphOp(const SubGraphOp&) = delete; + SubGraphOp(SubGraphOp&&) = default; + + SubGraphOp& operator=(const SubGraphOp&) = delete; + SubGraphOp& operator=(SubGraphOp&&) = default; + protected: // Find an input corresponding to value, adding one if necessary. Input input_for_value(const Output& value); diff --git a/ngraph/core/include/ngraph/pass/constant_folding.hpp b/ngraph/core/include/ngraph/pass/constant_folding.hpp index 8119e6da4cb094..f36874bd4e55bb 100644 --- a/ngraph/core/include/ngraph/pass/constant_folding.hpp +++ b/ngraph/core/include/ngraph/pass/constant_folding.hpp @@ -36,6 +36,9 @@ namespace ngraph private: void copy_runtime_info_to_target_inputs(const std::shared_ptr& node, const Output& replacement); + /// \brief Folds pre-calculated output tensor values to constants in case lower and + /// upper estimations are equal. Traverses graph backwards starting from the results. + bool pre_calculated_values_folding(const std::shared_ptr& f); }; } // namespace pass } // namespace ngraph diff --git a/ngraph/core/include/ngraph/pattern/op/wrap_type.hpp b/ngraph/core/include/ngraph/pattern/op/wrap_type.hpp index 3c95c7e4300ce3..77307b7ee670c4 100644 --- a/ngraph/core/include/ngraph/pattern/op/wrap_type.hpp +++ b/ngraph/core/include/ngraph/pattern/op/wrap_type.hpp @@ -36,7 +36,17 @@ namespace ngraph [](const Output& output) { return true; }, const OutputVector& input_values = {}) : Pattern(input_values, pred) - , m_wrapped_type(wrapped_type) + , m_wrapped_types({wrapped_type}) + { + set_output_type(0, element::Type_t::dynamic, PartialShape::dynamic()); + } + + explicit WrapType(std::vector wrapped_types, + const ValuePredicate& pred = + [](const Output& output) { return true; }, + const OutputVector& input_values = {}) + : Pattern(input_values, pred) + , m_wrapped_types(std::move(wrapped_types)) { set_output_type(0, element::Type_t::dynamic, PartialShape::dynamic()); } @@ -45,30 +55,33 @@ namespace ngraph const Output& pattern_value, const Output& graph_value) override; - NodeTypeInfo get_wrapped_type() const { return m_wrapped_type; } + NodeTypeInfo get_wrapped_type() const; + + const std::vector& get_wrapped_types() const; + private: - NodeTypeInfo m_wrapped_type; + std::vector m_wrapped_types; }; } - template + template std::shared_ptr wrap_type(const OutputVector& inputs, const pattern::op::ValuePredicate& pred) { - static_assert(std::is_base_of::value, "Unexpected template type"); - return std::make_shared(T::type_info, pred, inputs); + std::vector info{Args::type_info...}; + return std::make_shared(info, pred, inputs); } - template + template std::shared_ptr wrap_type(const OutputVector& inputs = {}) { - return wrap_type(inputs, [](const Output& output) { return true; }); + return wrap_type(inputs, [](const Output& output) { return true; }); } - template + template std::shared_ptr wrap_type(const pattern::op::ValuePredicate& pred) { - return wrap_type({}, pred); + return wrap_type({}, pred); } } } diff --git a/ngraph/core/include/ngraph/runtime/tensor.hpp b/ngraph/core/include/ngraph/runtime/tensor.hpp index 8985957faab24d..9e83c3a3f61072 100644 --- a/ngraph/core/include/ngraph/runtime/tensor.hpp +++ b/ngraph/core/include/ngraph/runtime/tensor.hpp @@ -63,6 +63,7 @@ namespace ngraph /// \brief Get tensor's unique name /// \return tensor's name + NGRAPH_DEPRECATED("Only output ports have names") const std::string& get_name() const; /// \brief Get the stale value of the tensor. A tensor is stale if its data is diff --git a/ngraph/core/include/ngraph/validation_util.hpp b/ngraph/core/include/ngraph/validation_util.hpp index 58426d71a58eb3..66058b706deb3d 100644 --- a/ngraph/core/include/ngraph/validation_util.hpp +++ b/ngraph/core/include/ngraph/validation_util.hpp @@ -226,6 +226,70 @@ namespace ngraph std::map& output_tensor_map, const OutputVector& outputs); + /// \brief Evaluates lower value estimation of the output tensor. Traverses graph up to deduce + /// estimation through it. + /// \param Node output pointing to the tensor for estimation. + /// \return HostTensorPtr to estimated value if can be determined, or nullptr. + NGRAPH_API HostTensorPtr evaluate_lower_bound(const Output& output); + + /// \brief Evaluates lower value estimation of the output tensor. Traverses graph up to deduce + /// estimation through it. + /// \param output Tensor to be estimated. + /// \return HostTensorPtr to estimated value if can be determined, or nullptr. + NGRAPH_API HostTensorPtr evaluate_upper_bound(const Output& output); + + /// \brief Evaluates lower and upper value estimations of the output tensor. Traverses graph up + /// to deduce estimation through it. + /// \param output Node output pointing to the tensor for estimation. + /// \return pair with HostTensorPtrs for lower and upper value estimation. Each object in pair + /// could be HostTensorPtr to estimated value if particular bound can be determined, or nullptr. + NGRAPH_API std::pair + evaluate_both_bounds(const Output& output); + + /// \brief Evaluates lower and upper value estimations for the output tensor. Estimation would + /// be represented as partial shape object using Dimension(min, max) for each element. + /// \param output Node output pointing to the tensor for estimation. + /// \param pshape Resulting estimation would be stored in this PartialShape. + /// \return boolean status if value evaluation was successful. + NGRAPH_API bool evaluate_as_partial_shape(const Output& output, PartialShape& pshape); + + /// \brief Estimates upper bound for node output tensors using only upper bounds of the nodes + /// inputs. + /// \param node Operation to be performed + /// \param output_values Vector of HostTensorPtrs representing resulting upper value estimations + /// \return boolean status if value evaluation was successful. + NGRAPH_API bool default_upper_bound_evaluator(const Node* node, + const HostTensorVector& output_values); + /// \brief Estimates lower bound for node output tensors using only lower bounds of the nodes + /// inputs. + /// \param node Operation to be performed + /// \param output_values Vector of HostTensorPtrs representing resulting lower value estimations + /// \return boolean status if value evaluation was successful. + NGRAPH_API bool default_lower_bound_evaluator(const Node* node, + const HostTensorVector& output_values); + + NGRAPH_API bool interval_bound_evaluator(const Node* node, + const HostTensorVector& lower_output_values, + const HostTensorVector& upper_output_values); + + /// \brief Checks if all the elements of the bound HostTensor are positive + NGRAPH_API bool host_tensor_is_positive(const HostTensorPtr& bound); + + /// \brief Checks if lower and upper bounds of the corresponding tensor are set (not nullptr) + /// and pointers are the same. It doesn't check if lower and upper values are the same relying + /// only on pointers comparison. + NGRAPH_API bool has_and_set_equal_bounds(const Output& source); + + /// \brief Runs an estimation of source tensor. If it succeeded to calculate both bounds and + /// they are the same returns Constant operation from the resulting bound, otherwise nullptr. + NGRAPH_API std::shared_ptr get_constant_from_source(const Output& source); + + /// \brief Returns a Constant storing scalar value equal to std::numeric_limits::max() + NGRAPH_API std::shared_ptr get_constant_max_of_type(element::Type_t t); + + /// \brief Returns a Constant storing scalar value equal to std::numeric_limits::min() + NGRAPH_API std::shared_ptr get_constant_min_of_type(element::Type_t t); + namespace opset1 { /// diff --git a/ngraph/core/reference/include/ngraph/runtime/reference/convolution.hpp b/ngraph/core/reference/include/ngraph/runtime/reference/convolution.hpp index d78c1c0d52287f..6a005d9c6e2635 100644 --- a/ngraph/core/reference/include/ngraph/runtime/reference/convolution.hpp +++ b/ngraph/core/reference/include/ngraph/runtime/reference/convolution.hpp @@ -16,6 +16,7 @@ #pragma once +#include #include #include #include @@ -24,339 +25,245 @@ #include "ngraph/axis_vector.hpp" #include "ngraph/coordinate_transform.hpp" #include "ngraph/runtime/reference/concat.hpp" +#include "ngraph/runtime/reference/helpers.hpp" #include "ngraph/runtime/reference/reverse.hpp" #include "ngraph/runtime/reference/split.hpp" #include "ngraph/util.hpp" +// can't be removed currently due to arm-plugin dependency +#include "ngraph/runtime/reference/convolution_backprop_data.hpp" namespace ngraph { namespace runtime { namespace reference { - template - struct widen - { - using type = T; - }; - - template <> - struct widen - { - using type = double; - }; - - template <> - struct widen - { - using type = long double; - }; - - // in: NC_I... - // filter: C_OC_I... - // out: NC_O... - template ::type> - void general_convolution(const INPUT* in, - const FILTER* filter, - OUTPUT* out, - const Shape& in_shape, - const Shape& filter_shape, - const Shape& out_shape, - const Strides& stride, - const Strides& filter_dilation, - const CoordinateDiff& in_pad_below, - const CoordinateDiff& in_pad_above, - const Strides& in_dilation, - size_t in_batch_axis, - size_t in_channel_axis, - size_t filter_out_channel_axis, - size_t filter_in_channel_axis, - size_t out_batch_axis, - size_t out_channel_axis) + namespace { - auto old_mode = std::fegetround(); - std::fesetround(FE_TONEAREST); - // Comments throughout assume without loss of generality that: - // - // * batch axes for both in and out are 0 - // * in channel axes for both in and filter are 1 - // * out channel axes for filter is 0 - // * out channel axis for out is 1 - - // At the outermost level we will walk over every out coordinate O. - CoordinateTransform out_transform(out_shape); - - for (const Coordinate& out_coord : out_transform) + constexpr size_t in_batch_axis = 0; + constexpr size_t in_channel_axis = 1; + constexpr size_t filter_out_ch_axis = 0; + constexpr size_t filter_in_ch_axis = 1; + constexpr size_t out_batch_axis = 0; + constexpr size_t out_channel_axis = 1; + constexpr size_t spatial_axis = 2; + + struct ConvolutionParams { - // Our out coordinate O will have the form: - // - // (N,chan_out,i_1,...,i_n) - - size_t batch_index = out_coord[out_batch_axis]; - size_t out_channel = out_coord[out_channel_axis]; - - // For the in we need to iterate the coordinate: - // - // I: - // - // over the range (noninclusive on the right): - // - // (N,0,s_1*i_1,s_2*i_2,...,s_n*i_n) -> - // - // (N+1, - // chans_in_count, - // s_1*i_1+ l_1*filter_dims_1, - /// ..., - /// s_n*i_n +l_n*filter_dims_n) - // - // with strides: - // - // (1,l_1,...,l_n). - // - // Note that we are iterating within the *padded* and *dilated* in batch, so - // further down we must check the current coordinate is in the pad or dilation - // gap. - - size_t n_spatial_dimensions = in_shape.size() - 2; - size_t n_in_channels = in_shape[in_channel_axis]; - - Coordinate in_transform_start(2 + n_spatial_dimensions); - Coordinate in_transform_end(2 + n_spatial_dimensions); - Strides in_transform_movement_strides(2 + n_spatial_dimensions, 1); - CoordinateDiff in_transform_pad_below(2 + n_spatial_dimensions, 0); - CoordinateDiff in_transform_pad_above(2 + n_spatial_dimensions, 0); - Strides in_transform_dilation_strides(2 + n_spatial_dimensions, 1); - - in_transform_start[in_batch_axis] = batch_index; - in_transform_end[in_batch_axis] = batch_index + 1; - in_transform_start[in_channel_axis] = 0; - in_transform_end[in_channel_axis] = 1; - - for (size_t i = 2; i < n_spatial_dimensions + 2; i++) - { - size_t filter_dilation_stride = filter_dilation[i - 2]; - size_t filter_movement_stride = stride[i - 2]; - std::ptrdiff_t below_pad = in_pad_below[i - 2]; - std::ptrdiff_t above_pad = in_pad_above[i - 2]; - size_t in_dilation_stride = in_dilation[i - 2]; - - in_transform_start[i] = filter_movement_stride * out_coord[i]; - in_transform_end[i] = in_transform_start[i] + - (filter_shape[i] - 1) * filter_dilation_stride + 1; - in_transform_movement_strides[i] = filter_dilation_stride; - in_transform_pad_below[i] = below_pad; - in_transform_pad_above[i] = above_pad; - in_transform_dilation_strides[i] = in_dilation_stride; - } + std::vector strides; + std::vector dilation; + std::vector pads_begin; + std::vector pads_end; + + ConvolutionParams(const Strides& strides_, + const Strides& dilation_, + const CoordinateDiff& pads_begin_, + const CoordinateDiff& pads_end_) + : strides{strides_.begin(), strides_.end()} + , dilation{dilation_.begin(), dilation_.end()} + , pads_begin{pads_begin_.begin(), pads_begin_.end()} + , pads_end{pads_end_.begin(), pads_end_.end()} {}; + }; + + template + constexpr inline bool in_range(Int val, std::pair range) noexcept + { + return val >= range.first && val < range.second; + } - AxisVector in_transform_axis_order(2 + n_spatial_dimensions); - for (size_t i = 0; i < in_transform_axis_order.size(); i++) + template + void convolve_3D_channels(const ConvolutionParams& p, + const T* batch, + const Shape& batch_shape, + const T* filter, + const Shape& filter_shape, + T*& out) + { + const int input_size_z = batch_shape[1]; + const int input_size_y = batch_shape[2]; + const int input_size_x = batch_shape[3]; + const int filter_size_z = filter_shape[1]; + const int filter_size_y = filter_shape[2]; + const int filter_size_x = filter_shape[3]; + const int dilated_filter_size_z = + filter_size_z + (filter_size_z - 1) * (p.dilation[0] - 1); + const int dilated_filter_size_y = + filter_size_y + (filter_size_y - 1) * (p.dilation[1] - 1); + const int dilated_filter_size_x = + filter_size_x + (filter_size_x - 1) * (p.dilation[2] - 1); + + const Shape input_channel_shape(++batch_shape.begin(), batch_shape.end()); + const size_t input_channel_size = shape_size(input_channel_shape); + const Shape filter_channel_shape(++filter_shape.begin(), filter_shape.end()); + const size_t filter_channel_size = shape_size(filter_channel_shape); + + for (int i_z = -p.pads_begin[0]; + i_z <= (p.pads_end[0] + input_size_z - dilated_filter_size_z); + i_z += p.strides[0]) { - in_transform_axis_order[i] = i; + for (int i_y = -p.pads_begin[1]; + i_y <= (p.pads_end[1] + input_size_y - dilated_filter_size_y); + i_y += p.strides[1]) + { + for (int i_x = -p.pads_begin[2]; + i_x <= (p.pads_end[2] + input_size_x - dilated_filter_size_x); + i_x += p.strides[2]) + { + auto input_channel = batch; + auto filter_channel = filter; + T sum = 0; + size_t filter_channels_count = filter_shape[0]; + while (filter_channels_count--) + { + for (int f_z = 0; f_z < filter_size_z; ++f_z) + { + for (int f_y = 0; f_y < filter_size_y; ++f_y) + { + for (int f_x = 0; f_x < filter_size_x; ++f_x) + { + int rel_i_z = i_z + (f_z * p.dilation[0]); + int rel_i_y = i_y + (f_y * p.dilation[1]); + int rel_i_x = i_x + (f_x * p.dilation[2]); + + bool padding = + !(in_range(rel_i_x, {0, input_size_x}) && + in_range(rel_i_y, {0, input_size_y}) && + in_range(rel_i_z, {0, input_size_z})); + if (padding) + continue; + + int f_buf_idx = + (f_z * filter_size_y * filter_size_x) + + (f_y * filter_size_x) + f_x; + int i_buf_idx = + (rel_i_z * input_size_y * input_size_x) + + (rel_i_y * input_size_x) + rel_i_x; + sum += static_cast(input_channel[i_buf_idx]) * + static_cast(filter_channel[f_buf_idx]); + } + } + } + input_channel += input_channel_size; + filter_channel += filter_channel_size; + } + *out = sum; + ++out; + } + } } - CoordinateTransform in_transform(in_shape, - in_transform_start, - in_transform_end, - in_transform_movement_strides, - in_transform_axis_order, - in_transform_pad_below, - in_transform_pad_above, - in_transform_dilation_strides); - - // Simultaneously with iterating I, for the filter we need to iterate the - // coordinate: - // - // F - // - // over the range (noninclusive on the right): - // - // (chan_out,0,0,...,0) -> - // (chan_out+1, - // chans_in_count, - // filter_dims_1, - // ..., - // filter_dims_n) - // - // with unit stride. - - Shape filter_transform_start(2 + n_spatial_dimensions); - Shape filter_transform_end(2 + n_spatial_dimensions); - - filter_transform_start[filter_out_channel_axis] = out_channel; - filter_transform_end[filter_out_channel_axis] = out_channel + 1; - filter_transform_start[filter_in_channel_axis] = 0; - filter_transform_end[filter_in_channel_axis] = 1; + } - for (size_t i = 2; i < n_spatial_dimensions + 2; i++) + void extend_to_3D(ConvolutionParams& p, Shape& in_shape, Shape& filter_shape) + { + int spatial_rank = in_shape.size() - 2; + if (spatial_rank < 3) { - filter_transform_start[i] = 0; - filter_transform_end[i] = filter_shape[i]; + int missing_dims = 3 - spatial_rank; + p.dilation.insert( + std::prev(p.dilation.end(), spatial_rank), missing_dims, 1); + p.strides.insert(std::prev(p.strides.end(), spatial_rank), missing_dims, 1); + p.pads_begin.insert( + std::prev(p.pads_begin.end(), spatial_rank), missing_dims, 0); + p.pads_end.insert( + std::prev(p.pads_end.end(), spatial_rank), missing_dims, 0); + in_shape.insert(std::next(in_shape.end(), -spatial_rank), missing_dims, 1); + filter_shape.insert( + std::prev(filter_shape.end(), spatial_rank), missing_dims, 1); } + } + } - CoordinateTransform filter_transform( - filter_shape, filter_transform_start, filter_transform_end); - - // As we go, we sum up: - // - // out[O] += in[I] * filter[F]. + template + void convolution(const T* in, + const T* f, + T* out, + const Shape& in_shape, + const Shape& f_shape, + const Shape& out_shape, + const Strides& strides, + const Strides& dilation, + const CoordinateDiff& pads_begin, + const CoordinateDiff& pads_end) - ACCUMULATION result = 0; + { + // this implementation supports 1D, 2D and 3D convolutions + NGRAPH_CHECK(in_shape.size() >= 3 && in_shape.size() <= 5, + "Unsupported input rank: ", + in_shape); + + NGRAPH_CHECK(f_shape.size() >= 3 && f_shape.size() <= 5, + "Unsupported kernel rank: ", + f_shape); + + // here we are converting all param types to int's to avoid arithmetic issues + // (e.g signed + unsigned) in indexes calculation later + ConvolutionParams params{strides, dilation, pads_begin, pads_end}; + + // here we are extending spatial dimensions to 3D, because we are going to use 3D + // convolution implementation to convolve also in 1D & 2D case + Shape input_shape{in_shape}; + Shape filters_shape{f_shape}; + if (in_shape.size() < 5) + { + extend_to_3D(params, input_shape, filters_shape); + } - CoordinateTransform::Iterator in_it = in_transform.begin(); - CoordinateTransform::Iterator filter_it = filter_transform.begin(); - CoordinateTransform::Iterator in_it_end = in_transform.end(); - CoordinateTransform::Iterator filter_it_end = filter_transform.end(); + const size_t batches_count = input_shape[in_batch_axis]; + const Shape batch_shape(++input_shape.begin(), input_shape.end()); + const size_t batch_size = shape_size(batch_shape); - size_t in_channel_stride = row_major_strides(in_shape).at(in_channel_axis); - size_t filter_in_channel_stride = - row_major_strides(filter_shape).at(filter_in_channel_axis); + const size_t filters_count = filters_shape[filter_out_ch_axis]; + const Shape filter_shape(++filters_shape.begin(), filters_shape.end()); + const size_t filter_size = shape_size(filter_shape); - while (in_it != in_it_end && filter_it != filter_it_end) + auto batch = in; + for (size_t batch_idx = 0; batch_idx < batches_count; ++batch_idx) + { + auto filter = f; + for (size_t f_idx = 0; f_idx < filters_count; ++f_idx) { - const Coordinate& in_coord = *in_it; - if (in_transform.has_source_coordinate(in_coord)) - { - size_t in_idx = in_transform.index(in_coord); - const Coordinate& filter_coord = *filter_it; - size_t filter_idx = filter_transform.index(filter_coord); - for (size_t in_channel = 0; in_channel < n_in_channels; ++in_channel) - { - ACCUMULATION in_v = static_cast(in[in_idx]); - ACCUMULATION f_v = static_cast(filter[filter_idx]); - - result += in_v * f_v; - in_idx += in_channel_stride; - filter_idx += filter_in_channel_stride; - } - } - ++in_it; - ++filter_it; + convolve_3D_channels(params, batch, batch_shape, filter, filter_shape, out); + filter += filter_size; } - - out[out_transform.index(out_coord)] = result; + batch += batch_size; } - std::fesetround(old_mode); } + // DEPRECATED, can't be removed currently due to kmb-plugin dependency (#47799) template ::type> + typename ACCU = typename widen::type> void convolution(const INPUT* in, - const FILTER* filter, + const FILTER* f, OUTPUT* out, const Shape& in_shape, - const Shape& filter_shape, + const Shape& f_shape, const Shape& out_shape, - const Strides& stride, - const Strides& filter_dilation, - const CoordinateDiff& in_pad_below, - const CoordinateDiff& in_pad_above, - const Strides& in_dilation) + const Strides& strides, + const Strides& dilation, + const CoordinateDiff& pads_begin, + const CoordinateDiff& pads_end, + const Strides&) { - general_convolution(in, - filter, - out, - in_shape, - filter_shape, - out_shape, - stride, - filter_dilation, - in_pad_below, - in_pad_above, - in_dilation, - 0, - 1, - 0, - 1, - 0, - 1); + static_assert(std::is_same::value, + "input and filter types must be the same"); + static_assert(std::is_same::value, + "input and output types must be the same"); + + convolution(in, + f, + out, + in_shape, + f_shape, + out_shape, + strides, + dilation, + pads_begin, + pads_end); } - template ::type> - void convolution_backprop_in(const OUTPUT* delta_out, - const FILTER* filter, - INPUT* delta_in, - const Shape& out_shape, - const Shape& filter_shape, - const Shape& in_shape, - const Strides& in_dilation, - const Strides& filter_dilation, - const CoordinateDiff& forward_in_pad_bellow, - const CoordinateDiff& forward_in_pad_above, - const Strides& stride) - { - // Note that we only reverse the spatial dimensions here (loop - // starts at 2) - std::vector reversed(shape_size(filter_shape)); - AxisSet reverse_axes; - size_t reverse_axes_start = 2; - for (size_t i = reverse_axes_start; i < filter_shape.size(); ++i) - { - reverse_axes.insert(i); - } - reverse(reinterpret_cast(filter), - reinterpret_cast(&reversed[0]), - filter_shape, - filter_shape, - reverse_axes, - sizeof(FILTER)); - size_t filter_out_channel_axis = 1; - size_t filter_in_channel_axis = 0; - - // Compute backward pad out pad bellow - size_t spatial_dim_count = in_shape.size() - 2; - - CoordinateDiff backward_delta_out_pad_below; - backward_delta_out_pad_below.resize(spatial_dim_count); - - for (size_t i = 0; i < spatial_dim_count; i++) - { - backward_delta_out_pad_below[i] = - (static_cast(filter_shape[i + 2]) - 1) * filter_dilation[i] - - forward_in_pad_bellow[i]; - } - // Compute backward pad out pad above - CoordinateDiff backward_delta_out_pad_above; - backward_delta_out_pad_above.resize(spatial_dim_count); - - for (size_t i = 0; i < spatial_dim_count; i++) - { - backward_delta_out_pad_above[i] = - (static_cast(filter_shape[i + 2]) - 1) * filter_dilation[i] + - ((forward_in_pad_bellow[i] + ((in_shape[i + 2]) - 1) * in_dilation[i] + - forward_in_pad_above[i] - - (static_cast(filter_shape[i + 2]) - 1) * filter_dilation[i]) % - stride[i]) - - forward_in_pad_above[i]; - } - - general_convolution( - delta_out, - &reversed[0], - delta_in, - out_shape, - filter_shape, - in_shape, - in_dilation, - filter_dilation, - backward_delta_out_pad_below, - backward_delta_out_pad_above, - stride, - 0, - 1, - filter_out_channel_axis, - filter_in_channel_axis, - 0, - 1); - } } // namespace reference } // namespace runtime } // namespace ngraph diff --git a/ngraph/core/reference/include/ngraph/runtime/reference/convolution_backprop_data.hpp b/ngraph/core/reference/include/ngraph/runtime/reference/convolution_backprop_data.hpp new file mode 100644 index 00000000000000..37a35aeabd651d --- /dev/null +++ b/ngraph/core/reference/include/ngraph/runtime/reference/convolution_backprop_data.hpp @@ -0,0 +1,309 @@ +//***************************************************************************** +// Copyright 2017-2021 Intel Corporation +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. +//***************************************************************************** + +#pragma once + +#include +#include +#include +#include + +#include "ngraph/axis_vector.hpp" +#include "ngraph/coordinate_transform.hpp" +#include "ngraph/runtime/reference/concat.hpp" +#include "ngraph/runtime/reference/helpers.hpp" +#include "ngraph/runtime/reference/reverse.hpp" +#include "ngraph/runtime/reference/split.hpp" +#include "ngraph/util.hpp" + +namespace ngraph +{ + namespace runtime + { + namespace reference + { + // in: NC_I... + // filter: C_OC_I... + // out: NC_O... + template ::type> + void convolution_backprop_impl(const INPUT* in, + const FILTER* filter, + OUTPUT* out, + const Shape& in_shape, + const Shape& filter_shape, + const Shape& out_shape, + const Strides& stride, + const Strides& filter_dilation, + const CoordinateDiff& in_pad_below, + const CoordinateDiff& in_pad_above, + const Strides& in_dilation, + size_t in_batch_axis, + size_t in_channel_axis, + size_t filter_out_channel_axis, + size_t filter_in_channel_axis, + size_t out_batch_axis, + size_t out_channel_axis) + { + auto old_mode = std::fegetround(); + std::fesetround(FE_TONEAREST); + // Comments throughout assume without loss of generality that: + // + // * batch axes for both in and out are 0 + // * in channel axes for both in and filter are 1 + // * out channel axes for filter is 0 + // * out channel axis for out is 1 + + // At the outermost level we will walk over every out coordinate O. + CoordinateTransform out_transform(out_shape); + + for (const Coordinate& out_coord : out_transform) + { + // Our out coordinate O will have the form: + // + // (N,chan_out,i_1,...,i_n) + + size_t batch_index = out_coord[out_batch_axis]; + size_t out_channel = out_coord[out_channel_axis]; + + // For the in we need to iterate the coordinate: + // + // I: + // + // over the range (noninclusive on the right): + // + // (N,0,s_1*i_1,s_2*i_2,...,s_n*i_n) -> + // + // (N+1, + // chans_in_count, + // s_1*i_1+ l_1*filter_dims_1, + /// ..., + /// s_n*i_n +l_n*filter_dims_n) + // + // with strides: + // + // (1,l_1,...,l_n). + // + // Note that we are iterating within the *padded* and *dilated* in batch, so + // further down we must check the current coordinate is in the pad or dilation + // gap. + + size_t n_spatial_dimensions = in_shape.size() - 2; + size_t n_in_channels = in_shape[in_channel_axis]; + + Coordinate in_transform_start(2 + n_spatial_dimensions); + Coordinate in_transform_end(2 + n_spatial_dimensions); + Strides in_transform_movement_strides(2 + n_spatial_dimensions, 1); + CoordinateDiff in_transform_pad_below(2 + n_spatial_dimensions, 0); + CoordinateDiff in_transform_pad_above(2 + n_spatial_dimensions, 0); + Strides in_transform_dilation_strides(2 + n_spatial_dimensions, 1); + + in_transform_start[in_batch_axis] = batch_index; + in_transform_end[in_batch_axis] = batch_index + 1; + in_transform_start[in_channel_axis] = 0; + in_transform_end[in_channel_axis] = 1; + + for (size_t i = 2; i < n_spatial_dimensions + 2; i++) + { + size_t filter_dilation_stride = filter_dilation[i - 2]; + size_t filter_movement_stride = stride[i - 2]; + std::ptrdiff_t below_pad = in_pad_below[i - 2]; + std::ptrdiff_t above_pad = in_pad_above[i - 2]; + size_t in_dilation_stride = in_dilation[i - 2]; + + in_transform_start[i] = filter_movement_stride * out_coord[i]; + in_transform_end[i] = in_transform_start[i] + + (filter_shape[i] - 1) * filter_dilation_stride + 1; + in_transform_movement_strides[i] = filter_dilation_stride; + in_transform_pad_below[i] = below_pad; + in_transform_pad_above[i] = above_pad; + in_transform_dilation_strides[i] = in_dilation_stride; + } + + AxisVector in_transform_axis_order(2 + n_spatial_dimensions); + for (size_t i = 0; i < in_transform_axis_order.size(); i++) + { + in_transform_axis_order[i] = i; + } + CoordinateTransform in_transform(in_shape, + in_transform_start, + in_transform_end, + in_transform_movement_strides, + in_transform_axis_order, + in_transform_pad_below, + in_transform_pad_above, + in_transform_dilation_strides); + + // Simultaneously with iterating I, for the filter we need to iterate the + // coordinate: + // + // F + // + // over the range (noninclusive on the right): + // + // (chan_out,0,0,...,0) -> + // (chan_out+1, + // chans_in_count, + // filter_dims_1, + // ..., + // filter_dims_n) + // + // with unit stride. + + Shape filter_transform_start(2 + n_spatial_dimensions); + Shape filter_transform_end(2 + n_spatial_dimensions); + + filter_transform_start[filter_out_channel_axis] = out_channel; + filter_transform_end[filter_out_channel_axis] = out_channel + 1; + filter_transform_start[filter_in_channel_axis] = 0; + filter_transform_end[filter_in_channel_axis] = 1; + + for (size_t i = 2; i < n_spatial_dimensions + 2; i++) + { + filter_transform_start[i] = 0; + filter_transform_end[i] = filter_shape[i]; + } + + CoordinateTransform filter_transform( + filter_shape, filter_transform_start, filter_transform_end); + + // As we go, we sum up: + // + // out[O] += in[I] * filter[F]. + + ACCUMULATION result = 0; + + CoordinateTransform::Iterator in_it = in_transform.begin(); + CoordinateTransform::Iterator filter_it = filter_transform.begin(); + CoordinateTransform::Iterator in_it_end = in_transform.end(); + CoordinateTransform::Iterator filter_it_end = filter_transform.end(); + + size_t in_channel_stride = row_major_strides(in_shape).at(in_channel_axis); + size_t filter_in_channel_stride = + row_major_strides(filter_shape).at(filter_in_channel_axis); + + while (in_it != in_it_end && filter_it != filter_it_end) + { + const Coordinate& in_coord = *in_it; + if (in_transform.has_source_coordinate(in_coord)) + { + size_t in_idx = in_transform.index(in_coord); + const Coordinate& filter_coord = *filter_it; + size_t filter_idx = filter_transform.index(filter_coord); + for (size_t in_channel = 0; in_channel < n_in_channels; ++in_channel) + { + ACCUMULATION in_v = static_cast(in[in_idx]); + ACCUMULATION f_v = static_cast(filter[filter_idx]); + + result += in_v * f_v; + in_idx += in_channel_stride; + filter_idx += filter_in_channel_stride; + } + } + ++in_it; + ++filter_it; + } + + out[out_transform.index(out_coord)] = result; + } + std::fesetround(old_mode); + } + + template ::type> + void convolution_backprop_in(const OUTPUT* delta_out, + const FILTER* filter, + INPUT* delta_in, + const Shape& out_shape, + const Shape& filter_shape, + const Shape& in_shape, + const Strides& in_dilation, + const Strides& filter_dilation, + const CoordinateDiff& forward_in_pad_bellow, + const CoordinateDiff& forward_in_pad_above, + const Strides& stride) + { + // Note that we only reverse the spatial dimensions here (loop + // starts at 2) + std::vector reversed(shape_size(filter_shape)); + AxisSet reverse_axes; + size_t reverse_axes_start = 2; + for (size_t i = reverse_axes_start; i < filter_shape.size(); ++i) + { + reverse_axes.insert(i); + } + reverse(reinterpret_cast(filter), + reinterpret_cast(&reversed[0]), + filter_shape, + filter_shape, + reverse_axes, + sizeof(FILTER)); + size_t filter_out_channel_axis = 1; + size_t filter_in_channel_axis = 0; + + // Compute backward pad out pad bellow + size_t spatial_dim_count = in_shape.size() - 2; + + CoordinateDiff backward_delta_out_pad_below; + backward_delta_out_pad_below.resize(spatial_dim_count); + + for (size_t i = 0; i < spatial_dim_count; i++) + { + backward_delta_out_pad_below[i] = + (static_cast(filter_shape[i + 2]) - 1) * filter_dilation[i] - + forward_in_pad_bellow[i]; + } + // Compute backward pad out pad above + CoordinateDiff backward_delta_out_pad_above; + backward_delta_out_pad_above.resize(spatial_dim_count); + + for (size_t i = 0; i < spatial_dim_count; i++) + { + backward_delta_out_pad_above[i] = + (static_cast(filter_shape[i + 2]) - 1) * filter_dilation[i] + + ((forward_in_pad_bellow[i] + ((in_shape[i + 2]) - 1) * in_dilation[i] + + forward_in_pad_above[i] - + (static_cast(filter_shape[i + 2]) - 1) * filter_dilation[i]) % + stride[i]) - + forward_in_pad_above[i]; + } + + convolution_backprop_impl( + delta_out, + &reversed[0], + delta_in, + out_shape, + filter_shape, + in_shape, + in_dilation, + filter_dilation, + backward_delta_out_pad_below, + backward_delta_out_pad_above, + stride, + 0, + 1, + filter_out_channel_axis, + filter_in_channel_axis, + 0, + 1); + } + } // namespace reference + } // namespace runtime +} // namespace ngraph diff --git a/ngraph/core/reference/include/ngraph/runtime/reference/ctc_greedy_decoder.hpp b/ngraph/core/reference/include/ngraph/runtime/reference/ctc_greedy_decoder.hpp index ce6d482df12567..dc683b132ed277 100644 --- a/ngraph/core/reference/include/ngraph/runtime/reference/ctc_greedy_decoder.hpp +++ b/ngraph/core/reference/include/ngraph/runtime/reference/ctc_greedy_decoder.hpp @@ -59,8 +59,7 @@ namespace ngraph auto data_index = data_transform.index({seq_ind, batch_ind, 0}); auto mask_index = seq_masks_transform.index({seq_ind, batch_ind}); - // first 0 marks the end of a sequence - if (seq_ind && sequence_masks[mask_index] == T{0}) + if (sequence_masks[mask_index] == T{0}) { break; } @@ -69,6 +68,7 @@ namespace ngraph auto class_max_element = std::max_element(class_index, class_index + class_count); unsigned int max_class_ind = std::distance(class_index, class_max_element); + if (!(previous_class_index == max_class_ind && ctc_merge_repeated) && max_class_ind < blank_index) { diff --git a/ngraph/core/reference/include/ngraph/runtime/reference/ctc_greedy_decoder_seq_len.hpp b/ngraph/core/reference/include/ngraph/runtime/reference/ctc_greedy_decoder_seq_len.hpp index bf55043ca9bfdd..267232af6f1775 100644 --- a/ngraph/core/reference/include/ngraph/runtime/reference/ctc_greedy_decoder_seq_len.hpp +++ b/ngraph/core/reference/include/ngraph/runtime/reference/ctc_greedy_decoder_seq_len.hpp @@ -54,14 +54,14 @@ namespace ngraph auto class_max_element = std::max_element(class_index, class_index + class_count); const auto max_class_ind = std::distance(class_index, class_max_element); - if (max_class_ind < blank_index[0] && + if (max_class_ind != blank_index[0] && !(ctc_merge_repeated && previous_class_index == max_class_ind)) { out1[out_index++] = max_class_ind; } previous_class_index = max_class_ind; } - out2[batch_ind] = seq_len; + out2[batch_ind] = out_index - batch_ind * seq_len_max; } } } // namespace reference diff --git a/ngraph/core/reference/include/ngraph/runtime/reference/dot.hpp b/ngraph/core/reference/include/ngraph/runtime/reference/dot.hpp index 7ce1a056af34e7..cfc212b50eb644 100644 --- a/ngraph/core/reference/include/ngraph/runtime/reference/dot.hpp +++ b/ngraph/core/reference/include/ngraph/runtime/reference/dot.hpp @@ -21,8 +21,8 @@ #include #include -#include "convolution.hpp" #include "ngraph/coordinate_transform.hpp" +#include "ngraph/runtime/reference/helpers.hpp" #include "ngraph/shape_util.hpp" namespace ngraph diff --git a/ngraph/core/reference/include/ngraph/runtime/reference/fake_quantize.hpp b/ngraph/core/reference/include/ngraph/runtime/reference/fake_quantize.hpp index 3353b6299272e5..f9174f8f9c11e7 100644 --- a/ngraph/core/reference/include/ngraph/runtime/reference/fake_quantize.hpp +++ b/ngraph/core/reference/include/ngraph/runtime/reference/fake_quantize.hpp @@ -223,11 +223,11 @@ namespace ngraph out_high, i, out_high_offsets); - if (arg[i] <= in_low_val) + if (arg[i] <= std::min(in_low_val, in_high_val)) { out[i] = out_low_val; } - else if (arg[i] > in_high_val) + else if (arg[i] > std::max(in_low_val, in_high_val)) { out[i] = out_high_val; } diff --git a/ngraph/core/reference/include/ngraph/runtime/reference/group_convolution.hpp b/ngraph/core/reference/include/ngraph/runtime/reference/group_convolution.hpp new file mode 100644 index 00000000000000..60867568f13f8e --- /dev/null +++ b/ngraph/core/reference/include/ngraph/runtime/reference/group_convolution.hpp @@ -0,0 +1,113 @@ +//***************************************************************************** +// Copyright 2017-2021 Intel Corporation +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. +//***************************************************************************** + +#pragma once + +#include "ngraph/runtime/reference/convolution.hpp" +#include "ngraph/util.hpp" + +namespace +{ + constexpr size_t filter_group_axis = 0; + constexpr size_t in_batch_axis = 0; + constexpr size_t in_channel_axis = 1; + constexpr size_t out_batch_axis = 0; + constexpr size_t out_channel_axis = 1; +} + +namespace ngraph +{ + namespace runtime + { + namespace reference + { + template ::type> + void group_convolution(const INPUT* in, + const FILTER* f, + OUTPUT* out, + const Shape& in_shape, + const Shape& filter_shape, + const Shape& out_shape, + const Strides& strides, + const Strides& dilation, + const CoordinateDiff& pads_begin, + const CoordinateDiff& pads_end) + + { + NGRAPH_CHECK(filter_shape.size() >= 4 && filter_shape.size() <= 6, + "Unsupported kernel rank: ", + filter_shape); + + const size_t group_count = filter_shape[filter_group_axis]; + + const INPUT* group_batch = in; + const Shape group_batch_shape = [&]() { + Shape new_shape{in_shape}; + new_shape[in_batch_axis] = 1; + new_shape[in_channel_axis] /= group_count; + return new_shape; + }(); + const size_t group_batch_size = shape_size(group_batch_shape); + + const FILTER* group_filter = f; + const Shape group_filter_shape = [&]() { + Shape new_shape{++filter_shape.begin(), filter_shape.end()}; + return new_shape; + }(); + const size_t group_filter_size = shape_size(group_filter_shape); + + OUTPUT* group_out = out; + const Shape group_out_shape = [&]() { + Shape new_shape{out_shape}; + new_shape[out_batch_axis] = 1; + new_shape[out_channel_axis] /= group_count; + return new_shape; + }(); + const size_t group_out_size = shape_size(group_out_shape); + + // TODO: delete in_dilation when Convolution PR (#3922) is merged + // in_dilation parameter is needed only for old implementation (CoordinateTransform + // based) + Strides in_dilation(in_shape.size()); + std::fill(in_dilation.begin(), in_dilation.end(), 1); + for (size_t batch_idx = 0; batch_idx < in_shape[in_batch_axis]; ++batch_idx) + { + group_filter = f; + for (size_t group_idx = 0; group_idx < group_count; ++group_idx) + { + runtime::reference::convolution(group_batch, + group_filter, + group_out, + group_batch_shape, + group_filter_shape, + group_out_shape, + strides, + dilation, + pads_begin, + pads_end, + in_dilation); + group_batch += group_batch_size; + group_filter += group_filter_size; + group_out += group_out_size; + } + } + } + } // namespace reference + } // namespace runtime +} // namespace ngraph diff --git a/ngraph/core/reference/include/ngraph/runtime/reference/helpers.hpp b/ngraph/core/reference/include/ngraph/runtime/reference/helpers.hpp new file mode 100644 index 00000000000000..b5430201a7705b --- /dev/null +++ b/ngraph/core/reference/include/ngraph/runtime/reference/helpers.hpp @@ -0,0 +1,44 @@ +//***************************************************************************** +// Copyright 2017-2021 Intel Corporation +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. +//***************************************************************************** + +#pragma once + +namespace ngraph +{ + namespace runtime + { + namespace reference + { + template + struct widen + { + using type = T; + }; + + template <> + struct widen + { + using type = double; + }; + + template <> + struct widen + { + using type = long double; + }; + } + } +} diff --git a/ngraph/core/src/descriptor/tensor.cpp b/ngraph/core/src/descriptor/tensor.cpp index 9669e9e3b8d6f9..1588735e1e4763 100644 --- a/ngraph/core/src/descriptor/tensor.cpp +++ b/ngraph/core/src/descriptor/tensor.cpp @@ -16,6 +16,7 @@ #include "ngraph/descriptor/tensor.hpp" #include "ngraph/node.hpp" +#include "ngraph/runtime/host_tensor.hpp" using namespace ngraph; using namespace std; @@ -42,11 +43,6 @@ descriptor::Tensor::Tensor(const element::Type& element_type, { } -void descriptor::Tensor::set_name(const string& name) -{ - m_name = name; -} - void descriptor::Tensor::set_tensor_type(const element::Type& element_type, const PartialShape& pshape) { @@ -72,6 +68,26 @@ void descriptor::Tensor::set_partial_shape(const PartialShape& partial_shape) } } +void descriptor::Tensor::invalidate_values() +{ + m_upper_value = nullptr; + m_lower_value = nullptr; +} + +void descriptor::Tensor::set_lower_value(const HostTensorPtr& value) +{ + NGRAPH_CHECK(m_partial_shape.same_scheme(value->get_partial_shape())); + NGRAPH_CHECK(m_element_type == value->get_element_type()); + m_lower_value = value; +} + +void descriptor::Tensor::set_upper_value(const HostTensorPtr& value) +{ + NGRAPH_CHECK(m_partial_shape.same_scheme(value->get_partial_shape())); + NGRAPH_CHECK(m_element_type == value->get_element_type()); + m_upper_value = value; +} + const Shape& descriptor::Tensor::get_shape() const { if (m_partial_shape.is_static()) @@ -90,13 +106,41 @@ size_t descriptor::Tensor::size() const return shape_size(get_shape()) * m_element_type.size(); } +NGRAPH_SUPPRESS_DEPRECATED_START +void descriptor::Tensor::set_name(const string& name) +{ + m_name = name; +} + const std::string& descriptor::Tensor::get_name() const { return m_name; } +NGRAPH_SUPPRESS_DEPRECATED_END + +const std::unordered_set& descriptor::Tensor::get_names() const +{ + return m_names; +} + +void descriptor::Tensor::set_names(const std::unordered_set& names) +{ + m_names = names; +} ostream& operator<<(ostream& out, const descriptor::Tensor& tensor) { - out << "Tensor(" << tensor.get_name() << ")"; + std::string names; + for (const auto& name : tensor.get_names()) + { + if (!names.empty()) + names += ", "; + names += name; + } + NGRAPH_SUPPRESS_DEPRECATED_START + if (names.empty()) + names = tensor.get_name(); + NGRAPH_SUPPRESS_DEPRECATED_END + out << "Tensor(" << names << ")"; return out; } diff --git a/ngraph/core/src/function.cpp b/ngraph/core/src/function.cpp index ea696fa30499b2..06f30f11b3493b 100644 --- a/ngraph/core/src/function.cpp +++ b/ngraph/core/src/function.cpp @@ -23,7 +23,6 @@ #include "ngraph/graph_util.hpp" #include "ngraph/log.hpp" #include "ngraph/op/util/op_types.hpp" -#include "ngraph/util.hpp" #include "ngraph/validation_util.hpp" using namespace std; @@ -42,7 +41,7 @@ Function::Function(const ResultVector& results, , m_unique_name("Function_" + to_string(m_next_instance_id.fetch_add(1))) , m_topological_sorter(topological_sort>>) { - validate_nodes_and_infer_types(); + check_all_parameters_registered(); } Function::Function(const OutputVector& results, @@ -54,7 +53,7 @@ Function::Function(const OutputVector& results, , m_unique_name("Function_" + to_string(m_next_instance_id.fetch_add(1))) , m_topological_sorter(topological_sort>>) { - validate_nodes_and_infer_types(); + check_all_parameters_registered(); } Function::Function(const NodeVector& results, @@ -66,7 +65,7 @@ Function::Function(const NodeVector& results, , m_unique_name("Function_" + to_string(m_next_instance_id.fetch_add(1))) , m_topological_sorter(topological_sort>>) { - validate_nodes_and_infer_types(); + check_all_parameters_registered(); } Function::Function(const std::shared_ptr& result, @@ -87,7 +86,7 @@ Function::Function(const ResultVector& results, , m_unique_name("Function_" + to_string(m_next_instance_id.fetch_add(1))) , m_topological_sorter(topological_sort>>) { - validate_nodes_and_infer_types(); + check_all_parameters_registered(); } Function::Function(const OutputVector& results, @@ -98,25 +97,38 @@ Function::Function(const OutputVector& results, { } +void Function::check_all_parameters_registered() const +{ + OV_ITT_SCOPED_TASK(ngraph::itt::domains::nGraphPass_LT, + "Function::check_all_parameters_registered"); + std::stringstream unregistered_parameters; + for (auto& node : get_ordered_ops()) + { + if (op::is_parameter(node) && + std::find(m_parameters.begin(), m_parameters.end(), node) == m_parameters.end()) + unregistered_parameters << node << std::endl; + } + if (!unregistered_parameters.str().empty()) + throw ngraph_error("Function references undeclared parameters: " + + unregistered_parameters.str()); +} + void Function::validate_nodes_and_infer_types() const { OV_ITT_SCOPED_TASK(ngraph::itt::domains::nGraphPass_LT, "Function::validate_nodes_and_infer_types"); + std::stringstream unregistered_parameters; for (auto& node : get_ordered_ops()) { node->revalidate_and_infer_types(); - - // If we find a parameter make sure it is in the list of parameters of the function - if (op::is_parameter(node)) - { - auto it = std::find(m_parameters.begin(), m_parameters.end(), node); - if (it == m_parameters.end()) - { - throw ngraph_error("Function references undeclared parameter"); - } - } + if (op::is_parameter(node) && + std::find(m_parameters.begin(), m_parameters.end(), node) == m_parameters.end()) + unregistered_parameters << node << std::endl; } + if (!unregistered_parameters.str().empty()) + throw ngraph_error("Function references undeclared parameters: " + + unregistered_parameters.str()); } std::vector> Function::get_ordered_ops() const @@ -405,4 +417,29 @@ void Function::remove_result(const std::shared_ptr& result) m_results.end()); } +void Function::add_parameters(const ParameterVector& params) +{ + for (int i = 0; i < params.size(); i++) + { + for (int j = 0; j < m_parameters.size(); j++) + { + NGRAPH_CHECK(params[i] != m_parameters[j], + "add_parameters(): Tried to add parameter (index in array ", + i, + ") but function already have the same parameter with index ", + j); + } + } + m_parameters.insert(m_parameters.end(), params.begin(), params.end()); +} + +void Function::remove_parameter(const std::shared_ptr& param) +{ + m_parameters.erase( + std::remove_if(m_parameters.begin(), + m_parameters.end(), + [¶m](std::shared_ptr& r) { return r == param; }), + m_parameters.end()); +} + constexpr DiscreteTypeInfo AttributeAdapter>::type_info; diff --git a/ngraph/core/src/graph_util.cpp b/ngraph/core/src/graph_util.cpp index bee2c68c0e3b48..fc011de40a54ac 100644 --- a/ngraph/core/src/graph_util.cpp +++ b/ngraph/core/src/graph_util.cpp @@ -924,7 +924,9 @@ bool ngraph::replace_output_update_name(Output output, const Output& { replacement.get_node()->set_friendly_name(output.get_node()->get_friendly_name()); // Update output tensor name + NGRAPH_SUPPRESS_DEPRECATED_START replacement.get_tensor().set_name(output.get_node()->get_friendly_name()); + NGRAPH_SUPPRESS_DEPRECATED_END } output.replace(replacement); copy_runtime_info({replacement.get_node_shared_ptr(), output.get_node_shared_ptr()}, diff --git a/ngraph/core/src/node.cpp b/ngraph/core/src/node.cpp index b74ee51111e254..752ac9b98b9c83 100644 --- a/ngraph/core/src/node.cpp +++ b/ngraph/core/src/node.cpp @@ -15,6 +15,7 @@ //***************************************************************************** #include +#include #include #include #include @@ -143,6 +144,10 @@ std::shared_ptr { clone->add_control_dependency(cdep); } + for (size_t i = 0; i < get_output_size(); i++) + { + clone->get_output_tensor(i).set_names(get_output_tensor(i).get_names()); + } return clone; } @@ -242,6 +247,12 @@ void Node::set_output_size(size_t n) } } +void Node::invalidate_values() +{ + for (const auto& output : outputs()) + output.get_tensor().invalidate_values(); +} + void Node::validate_and_infer_types() { } @@ -658,13 +669,6 @@ descriptor::Tensor& Node::get_input_tensor(size_t i) const return input.get_tensor(); } -const string& Node::get_output_tensor_name(size_t i) const -{ - NGRAPH_CHECK( - i < m_outputs.size(), "index '", i, "' out of range in get_output_tensor_name(size_t i)"); - return m_outputs[i].get_tensor().get_name(); -} - size_t Node::get_input_size() const { return m_inputs.size(); @@ -690,6 +694,7 @@ const PartialShape& Node::get_input_partial_shape(size_t i) const return m_inputs[i].get_partial_shape(); } +NGRAPH_SUPPRESS_DEPRECATED_START const string& Node::get_input_tensor_name(size_t i) const { NGRAPH_CHECK( @@ -697,6 +702,14 @@ const string& Node::get_input_tensor_name(size_t i) const return m_inputs[i].get_tensor().get_name(); } +const string& Node::get_output_tensor_name(size_t i) const +{ + NGRAPH_CHECK( + i < m_outputs.size(), "index '", i, "' out of range in get_output_tensor_name(size_t i)"); + return m_outputs[i].get_tensor().get_name(); +} +NGRAPH_SUPPRESS_DEPRECATED_END + bool Node::has_same_type(std::shared_ptr node) const { if (get_output_size() != node->get_output_size()) @@ -950,6 +963,28 @@ bool Node::evaluate(const HostTensorVector& output_values, return false; } +bool Node::evaluate_lower(const HostTensorVector& output_values) const +{ + const auto& inputs = input_values(); + bool dyn_inputs = std::any_of(inputs.begin(), inputs.end(), [](const Output& output) { + return !output.get_tensor().has_and_set_bound(); + }); + if (dyn_inputs) + return false; + return default_lower_bound_evaluator(this, output_values); +} + +bool Node::evaluate_upper(const HostTensorVector& output_values) const +{ + const auto& inputs = input_values(); + bool dyn_inputs = std::any_of(inputs.begin(), inputs.end(), [](const Output& output) { + return !output.get_tensor().has_and_set_bound(); + }); + if (dyn_inputs) + return false; + return default_upper_bound_evaluator(this, output_values); +} + bool Node::constant_fold(OutputVector& output_values, const OutputVector& input_values) { OV_ITT_SCOPED_TASK(itt::domains::nGraph, "Node::constant_fold"); @@ -960,22 +995,23 @@ bool Node::constant_fold(OutputVector& output_values, const OutputVector& input_ } // If all the inputs are constants, try to evaluate the outputs + bool all_constants = + std::all_of(input_values.begin(), input_values.end(), [](const Output& input) { + return as_type_ptr(input.get_node_shared_ptr()); + }); + if (!all_constants) + return false; + HostTensorVector input_tensors; - for (auto input : input_values) + for (const auto& input : input_values) { - if (auto constant = as_type_ptr(input.get_node_shared_ptr())) - { - auto host_tensor = make_shared(constant); - input_tensors.push_back(host_tensor); - } - else - { - return false; - } + auto host_tensor = make_shared( + as_type_ptr(input.get_node_shared_ptr())); + input_tensors.push_back(host_tensor); } HostTensorVector output_tensors; OutputVector output_constants; - for (auto output : outputs()) + for (const auto& output : outputs()) { auto tensor = make_shared(output.get_element_type(), output.get_partial_shape()); diff --git a/ngraph/core/src/op/batch_to_space.cpp b/ngraph/core/src/op/batch_to_space.cpp index 2c87cc05c88ad3..06a1520fcee093 100644 --- a/ngraph/core/src/op/batch_to_space.cpp +++ b/ngraph/core/src/op/batch_to_space.cpp @@ -17,6 +17,7 @@ #include #include #include +#include #include #include "itt.hpp" @@ -77,9 +78,11 @@ void op::v1::BatchToSpace::validate_and_infer_types() auto crops_begin = input_value(2); auto crops_end = input_value(3); - if (ngraph::op::is_constant(block.get_node_shared_ptr()) && - ngraph::op::is_constant(crops_begin.get_node_shared_ptr()) && - ngraph::op::is_constant(crops_end.get_node_shared_ptr()) && data_pshape.is_static()) + auto block_const = get_constant_from_source(block); + auto crops_begin_const = get_constant_from_source(crops_begin); + auto crops_end_const = get_constant_from_source(crops_end); + + if (block_const && crops_begin_const && crops_end_const && data_pshape.is_static()) { const auto& data_shape = data.get_shape(); @@ -90,14 +93,9 @@ void op::v1::BatchToSpace::validate_and_infer_types() data_shape.size(), ")"); - auto block_val = std::dynamic_pointer_cast(block.get_node_shared_ptr()) - ->cast_vector(); - auto crops_begin_val = - std::dynamic_pointer_cast(crops_begin.get_node_shared_ptr()) - ->cast_vector(); - auto crops_end_val = - std::dynamic_pointer_cast(crops_end.get_node_shared_ptr()) - ->cast_vector(); + auto block_val = block_const->cast_vector(); + auto crops_begin_val = crops_begin_const->cast_vector(); + auto crops_end_val = crops_end_const->cast_vector(); int64_t block_prod = 1; for (long val : block_val) diff --git a/ngraph/core/src/op/broadcast.cpp b/ngraph/core/src/op/broadcast.cpp index bf5c7f44b2a98a..8893c8229e8d9e 100644 --- a/ngraph/core/src/op/broadcast.cpp +++ b/ngraph/core/src/op/broadcast.cpp @@ -21,6 +21,7 @@ #include "ngraph/op/constant.hpp" #include "ngraph/partial_shape.hpp" +#include #include #include "ngraph/runtime/host_tensor.hpp" #include "ngraph/runtime/reference/broadcast.hpp" @@ -185,8 +186,7 @@ void op::v3::Broadcast::validate_and_infer_types() { auto arg_shape = get_input_partial_shape(0); - const auto shape_constant = - as_type_ptr(input_value(1).get_node_shared_ptr()); + const auto shape_constant = get_constant_from_source(input_value(1)); if (shape_constant) { auto target_shape = shape_constant->get_shape_val(); diff --git a/ngraph/core/src/op/concat.cpp b/ngraph/core/src/op/concat.cpp index 94f734c70364de..b85f8b2086b82d 100644 --- a/ngraph/core/src/op/concat.cpp +++ b/ngraph/core/src/op/concat.cpp @@ -15,6 +15,7 @@ //***************************************************************************** #include +#include #include "itt.hpp" #include "ngraph/attribute_visitor.hpp" @@ -152,3 +153,12 @@ bool op::Concat::evaluate(const HostTensorVector& outputs, const HostTensorVecto auto concat_axis = get_axis() < 0 ? get_axis() + inputs[0]->get_shape().size() : get_axis(); return evaluate_concat(inputs, outputs[0], concat_axis); } +bool op::Concat::evaluate_lower(const HostTensorVector& output_values) const +{ + return default_lower_bound_evaluator(this, output_values); +} + +bool op::Concat::evaluate_upper(const HostTensorVector& output_values) const +{ + return default_upper_bound_evaluator(this, output_values); +} \ No newline at end of file diff --git a/ngraph/core/src/op/constant.cpp b/ngraph/core/src/op/constant.cpp index ef1439501e7a2c..d7fcfb9313753d 100644 --- a/ngraph/core/src/op/constant.cpp +++ b/ngraph/core/src/op/constant.cpp @@ -17,6 +17,7 @@ #include #include #include +#include #include "itt.hpp" #include "ngraph/log.hpp" @@ -58,7 +59,7 @@ op::Constant::Constant(const shared_ptr& tensor) } op::Constant::Constant(const element::Type& type, - Shape shape, + const Shape& shape, const std::vector& values) : Constant(type, shape) { @@ -647,6 +648,15 @@ bool op::v0::Constant::evaluate(const HostTensorVector& outputs, return true; } +bool op::v0::Constant::evaluate_lower(const HostTensorVector& outputs) const +{ + return evaluate(outputs, {}); +} +bool op::v0::Constant::evaluate_upper(const HostTensorVector& outputs) const +{ + return evaluate(outputs, {}); +} + // // We have to open up namespace blocks here to work around a problem with gcc: // diff --git a/ngraph/core/src/op/convert.cpp b/ngraph/core/src/op/convert.cpp index ac925de82775f2..ba58f70cb13de4 100644 --- a/ngraph/core/src/op/convert.cpp +++ b/ngraph/core/src/op/convert.cpp @@ -15,9 +15,12 @@ //***************************************************************************** #include +#include #include "itt.hpp" #include "ngraph/op/convert.hpp" +#include "ngraph/op/equal.hpp" +#include "ngraph/op/select.hpp" #include "ngraph/runtime/reference/convert.hpp" using namespace std; @@ -116,6 +119,41 @@ namespace convert } return rc; } + + bool evaluate_bound(const Node* node, const HostTensorVector& output_values, bool is_upper) + { + const auto& input = node->input_value(0); + if (const auto& value = is_upper ? input.get_tensor().get_upper_value() + : input.get_tensor().get_lower_value()) + { + // constants for dynamic values translation + auto input_maximum_value = get_constant_max_of_type(input.get_element_type()); + auto output_maximum_value = + get_constant_max_of_type(output_values[0]->get_element_type()); + if (input_maximum_value == nullptr || output_maximum_value == nullptr) + return false; + + bool status = node->evaluate(output_values, {value}); + + if (!status) + return status; + + // dynamic values translation + auto input_dynamic_mask = + std::make_shared(element::boolean, input.get_shape()); + status = op::v1::Equal().evaluate( + {input_dynamic_mask}, {value, std::make_shared(input_maximum_value)}); + if (!status) + return status; + status = op::v1::Select().evaluate(output_values, + {input_dynamic_mask, + std::make_shared(output_maximum_value), + output_values[0]}); + return status; + } + else + return false; + } } bool op::v0::Convert::evaluate(const HostTensorVector& output_values, const HostTensorVector& input_values) const @@ -123,3 +161,13 @@ bool op::v0::Convert::evaluate(const HostTensorVector& output_values, NGRAPH_OP_SCOPE(v0_Convert_evaluate); return convert::evaluate_convert(input_values[0], output_values[0]); } + +bool op::v0::Convert::evaluate_lower(const HostTensorVector& output_values) const +{ + return convert::evaluate_bound(this, output_values, false); +} + +bool op::v0::Convert::evaluate_upper(const HostTensorVector& output_values) const +{ + return convert::evaluate_bound(this, output_values, true); +} diff --git a/ngraph/core/src/op/convolution.cpp b/ngraph/core/src/op/convolution.cpp index d8309acdb36eaa..332d0f0b91e26a 100644 --- a/ngraph/core/src/op/convolution.cpp +++ b/ngraph/core/src/op/convolution.cpp @@ -221,7 +221,7 @@ bool op::v1::ConvolutionBackpropData::is_dynamic() const bool is_dynamic = Node::is_dynamic(); if (inputs().size() == 3 && !is_dynamic) { - return !is_type(input_value(2).get_node()); + return !has_and_set_equal_bounds(input_value(2)); } return is_dynamic; } @@ -242,7 +242,7 @@ const PartialShape op::v1::ConvolutionBackpropData::get_output_shape() const bool is_output_shape_present = inputs().size() == 3; if (is_output_shape_present) { - if (auto const_op = as_type(input_value(2).get_node())) + if (auto const_op = get_constant_from_source(input_value(2))) { shape = const_op->get_shape_val(); } diff --git a/ngraph/core/src/op/divide.cpp b/ngraph/core/src/op/divide.cpp index 38b7cb5a10da20..9a761f1d765fb2 100644 --- a/ngraph/core/src/op/divide.cpp +++ b/ngraph/core/src/op/divide.cpp @@ -16,8 +16,6 @@ #include "ngraph/op/divide.hpp" #include "itt.hpp" -#include "ngraph/op/multiply.hpp" -#include "ngraph/op/negative.hpp" #include "ngraph/runtime/host_tensor.hpp" #include "ngraph/runtime/reference/divide.hpp" diff --git a/ngraph/core/src/op/embedding_segments_sum.cpp b/ngraph/core/src/op/embedding_segments_sum.cpp index f79db62e207ad8..fd4071b9ebdcdb 100644 --- a/ngraph/core/src/op/embedding_segments_sum.cpp +++ b/ngraph/core/src/op/embedding_segments_sum.cpp @@ -15,6 +15,7 @@ //***************************************************************************** #include "ngraph/op/embedding_segments_sum.hpp" +#include #include "itt.hpp" #include "ngraph/op/constant.hpp" #include "ngraph/opsets/opset3.hpp" @@ -161,8 +162,7 @@ void op::v3::EmbeddingSegmentsSum::validate_and_infer_types() if (emb_table_shape.rank().is_static()) { result_shape = emb_table_shape; - if (auto num_segments_const = - as_type(this->get_input_node_ptr(NUM_SEGMENTS))) + if (const auto& num_segments_const = get_constant_from_source(input_value(NUM_SEGMENTS))) { result_shape[0] = num_segments_const->cast_vector()[0]; } diff --git a/ngraph/core/src/op/experimental_detectron_prior_grid_generator.cpp b/ngraph/core/src/op/experimental_detectron_prior_grid_generator.cpp index 74ea30374202f1..bf656e86ab231e 100644 --- a/ngraph/core/src/op/experimental_detectron_prior_grid_generator.cpp +++ b/ngraph/core/src/op/experimental_detectron_prior_grid_generator.cpp @@ -95,8 +95,9 @@ void op::v6::ExperimentalDetectronPriorGridGenerator::validate() const auto num_batches_featmap = featmap_shape[0]; const auto num_batches_im_data = im_data_shape[0]; + const auto batches_intersection = num_batches_featmap & num_batches_im_data; NODE_VALIDATION_CHECK(this, - num_batches_featmap.same_scheme(num_batches_im_data), + !batches_intersection.get_interval().empty(), "The first dimension of both 'feature_map' and 'im_data' must match. " "Feature_map: ", num_batches_featmap, diff --git a/ngraph/core/src/op/experimental_detectron_roi_feature.cpp b/ngraph/core/src/op/experimental_detectron_roi_feature.cpp index 037ae0bffba4f6..17c22e4514f073 100644 --- a/ngraph/core/src/op/experimental_detectron_roi_feature.cpp +++ b/ngraph/core/src/op/experimental_detectron_roi_feature.cpp @@ -70,8 +70,9 @@ void op::v6::ExperimentalDetectronROIFeatureExtractor::validate_and_infer_types( NODE_VALIDATION_CHECK( this, rois_shape.rank().get_length() == 2, "Input rois rank must be equal to 2."); + auto input_rois_last_dim_intersection_with_4 = rois_shape[1] & Dimension(4); NODE_VALIDATION_CHECK(this, - rois_shape[1].is_static() && rois_shape[1].get_length() == 4u, + !input_rois_last_dim_intersection_with_4.get_interval().empty(), "The last dimension of the 'input_rois' input must be equal to 4. " "Got: ", rois_shape[1]); @@ -95,9 +96,9 @@ void op::v6::ExperimentalDetectronROIFeatureExtractor::validate_and_infer_types( "Rank of each element of the pyramid must be equal to 4. Got: ", current_rank); + auto first_dim_intersection_with_1 = current_shape[0] & Dimension(1); NODE_VALIDATION_CHECK(this, - current_shape[0].is_static() && - current_shape[0].get_length() == 1u, + !first_dim_intersection_with_1.get_interval().empty(), "The first dimension of each pyramid element must be equal to 1. " "Got: ", current_shape[0]); diff --git a/ngraph/core/src/op/gather.cpp b/ngraph/core/src/op/gather.cpp index 2510711770f7eb..84a09e8573fc1c 100644 --- a/ngraph/core/src/op/gather.cpp +++ b/ngraph/core/src/op/gather.cpp @@ -23,7 +23,7 @@ #include "ngraph/runtime/reference/gather.hpp" #include "ngraph/shape.hpp" -#include +#include NGRAPH_SUPPRESS_DEPRECATED_START @@ -118,8 +118,7 @@ void op::v1::Gather::validate_and_infer_types() int64_t op::v1::Gather::get_axis() const { int64_t axis = AXIS_NOT_SET_VALUE; - auto axes_input_node = input_value(AXIS).get_node_shared_ptr(); - if (auto const_op = as_type_ptr(axes_input_node)) + if (const auto& const_op = get_constant_from_source(input_value(AXIS))) { axis = const_op->cast_vector()[0]; } @@ -319,6 +318,22 @@ bool op::v1::Gather::evaluate(const HostTensorVector& outputs, const HostTensorV return evaluate_gather(outputs, inputs); } +bool op::v1::Gather::evaluate_lower(const HostTensorVector& output_values) const +{ + if (!input_value(INDICES).get_tensor().has_and_set_bound() || + !input_value(AXIS).get_tensor().has_and_set_bound()) + return false; + return default_lower_bound_evaluator(this, output_values); +} + +bool op::v1::Gather::evaluate_upper(const HostTensorVector& output_values) const +{ + if (!input_value(INDICES).get_tensor().has_and_set_bound() || + !input_value(AXIS).get_tensor().has_and_set_bound()) + return false; + return default_upper_bound_evaluator(this, output_values); +} + bool op::v1::Gather::constant_fold(OutputVector& output_values, const OutputVector& input_values) { // try the regular constant folding just for the Gather node diff --git a/ngraph/core/src/op/group_conv.cpp b/ngraph/core/src/op/group_conv.cpp index 79a66bc1f94fb8..d29ab97e632694 100644 --- a/ngraph/core/src/op/group_conv.cpp +++ b/ngraph/core/src/op/group_conv.cpp @@ -284,7 +284,7 @@ bool op::v1::GroupConvolutionBackpropData::is_dynamic() const bool is_dynamic = Node::is_dynamic(); if (inputs().size() == 3 && !is_dynamic) { - return !is_type(input_value(2).get_node()); + return !has_and_set_equal_bounds(input_value(2)); } return is_dynamic; } @@ -305,7 +305,7 @@ const PartialShape op::v1::GroupConvolutionBackpropData::get_convolution_output_ bool is_output_shape_present = inputs().size() == 3; if (is_output_shape_present) { - if (auto const_op = as_type(input_value(2).get_node())) + if (const auto& const_op = get_constant_from_source(input_value(2))) { shape = const_op->get_shape_val(); } diff --git a/ngraph/core/src/op/interpolate.cpp b/ngraph/core/src/op/interpolate.cpp index 3e5bb04f91b10a..fe3ce18d7bbad6 100644 --- a/ngraph/core/src/op/interpolate.cpp +++ b/ngraph/core/src/op/interpolate.cpp @@ -18,6 +18,7 @@ #include #include #include +#include #include #include "itt.hpp" #include "ngraph/op/constant.hpp" @@ -67,7 +68,7 @@ void op::v0::Interpolate::validate_and_infer_types() } } - if (auto const_shape = as_type_ptr(input_value(1).get_node_shared_ptr())) + if (const auto& const_shape = get_constant_from_source(input_value(1))) { auto out_shape = const_shape->cast_vector(); size_t i = 0; @@ -166,8 +167,8 @@ std::vector op::v4::Interpolate::get_axes() const return default_value; } - auto axes_node = as_type_ptr(input_value(3).get_node_shared_ptr()); - NODE_VALIDATION_CHECK(this, axes_node, "Input 'axes' should be Constant."); + auto axes_node = get_constant_from_source(input_value(3)); + NODE_VALIDATION_CHECK(this, axes_node, "Input 'axes' should be Constant or foldable."); return axes_node->cast_vector(); } @@ -185,7 +186,7 @@ void op::v4::Interpolate::infer_using_scales(PartialShape& output_shape, if (padded_input_shape[axis].is_static()) { float padded_len = static_cast(padded_input_shape[axis].get_length()); - int64_t new_dim = static_cast(padded_len * scales[i] + epsilon); + int64_t new_dim = static_cast(padded_len * (scales[i] + epsilon)); output_shape[axis] = Dimension(new_dim); } ++i; @@ -259,7 +260,7 @@ void op::v4::Interpolate::validate_and_infer_types() set_output_type(0, get_input_element_type(0), output_shape); if (m_attrs.shape_calculation_mode == ShapeCalcMode::scales) { - if (auto const_scales = as_type_ptr(input_value(2).get_node_shared_ptr())) + if (const auto& const_scales = get_constant_from_source(input_value(2))) { auto scales = const_scales->cast_vector(); infer_using_scales(output_shape, axes, scales, padded_input_shape); @@ -267,7 +268,7 @@ void op::v4::Interpolate::validate_and_infer_types() } else { - if (auto const_shape = as_type_ptr(input_value(1).get_node_shared_ptr())) + if (const auto& const_shape = get_constant_from_source(input_value(1))) { auto sizes = const_shape->cast_vector(); infer_using_shapes(output_shape, axes, sizes); diff --git a/ngraph/core/src/op/loop.cpp b/ngraph/core/src/op/loop.cpp index abd3b3458b9fba..7f849e8f621f26 100644 --- a/ngraph/core/src/op/loop.cpp +++ b/ngraph/core/src/op/loop.cpp @@ -72,8 +72,7 @@ void op::v5::Loop::validate_and_infer_types() loop_condition_rank.compatible(0), "Rank of ExecutionCondition input must be equal to 0 or 1"); } - if (const auto& cond_value = std::dynamic_pointer_cast( - loop_execution_condition.get_node_shared_ptr())) + if (const auto& cond_value = get_constant_from_source(loop_execution_condition)) { auto val = cond_value->cast_vector(); NODE_VALIDATION_CHECK(this, @@ -101,8 +100,7 @@ void op::v5::Loop::validate_and_infer_types() body_condition_rank.compatible(1), "Rank of BodyExecutionCondition output must be equal to 0 or 1"); } - if (const auto& cond_value = std::dynamic_pointer_cast( - body_execution_condition.get_node_shared_ptr())) + if (const auto& cond_value = get_constant_from_source(body_execution_condition)) { auto val = cond_value->cast_vector(); NODE_VALIDATION_CHECK(this, @@ -127,8 +125,7 @@ void op::v5::Loop::validate_and_infer_types() if (m_body->get_parameters().at(desc->m_body_parameter_index) == cond_param) { if (const auto& cond_value = - std::dynamic_pointer_cast( - input_value(desc->m_input_index).get_node_shared_ptr())) + get_constant_from_source(input_value(desc->m_input_index))) { auto val = cond_value->cast_vector(); NODE_VALIDATION_CHECK( @@ -157,8 +154,7 @@ void op::v5::Loop::validate_and_infer_types() trip_count_rank.compatible(1) || trip_count_rank.compatible(0), "Rank of TripCount input must be equal to 0 or 1"); } - if (const auto& trip_count_val = std::dynamic_pointer_cast( - trip_count.get_node_shared_ptr())) + if (const auto& trip_count_val = get_constant_from_source(trip_count)) { auto val = trip_count_val->cast_vector(); NODE_VALIDATION_CHECK(this, @@ -237,9 +233,6 @@ void op::v5::Loop::validate_and_infer_types() auto body_param_partial_shape = body_parameter->get_partial_shape(); auto input_partial_shape = input(index).get_partial_shape(); - NODE_VALIDATION_CHECK(this, - input_partial_shape.compatible(body_param_partial_shape), - "Iterator initial value is not compatible with body param"); body_parameter->set_partial_shape(input_partial_shape); } @@ -318,94 +311,15 @@ void op::v5::Loop::validate_and_infer_types() std::shared_ptr op::v5::Loop::clone_with_new_inputs(const OutputVector& new_args) const { NGRAPH_OP_SCOPE(v5_Loop_clone_with_new_inputs); - // WA: input description with index 0 or 1 means that Loop consructor will duplicate it in - // the inputs. - // When using visit_attributes() no duplication occurs, input_offset shall be decremented. - size_t input_offset = 2; - for (const auto& in_desc : m_input_descriptions) - { - if (in_desc->m_input_index == 0 || in_desc->m_input_index == 1) - { - input_offset--; - } - } - // input_offset < 0 means that there are several duplications of external_port_id - // (the same ext_port_id is connected to several Parameters in the port map) in input_desc, - // this can lead to wrong or undefined behavior, so throw exception here. Ticket: 47302 - NODE_VALIDATION_CHECK(this, input_offset >= 0, "External port id 0 or 1 is duplicated."); - // 0 - trip_count, 1 - execution condition, these inputs are not connected to the body - // params - OutputVector body_params_args(new_args.begin() + input_offset, new_args.end()); - auto op = make_shared(new_args[0], new_args[1]); - for (int idx = 2; idx < new_args.size(); ++idx) - { - op->set_argument(idx, new_args[idx]); - } + check_new_args_count(this, new_args); + auto op = make_shared(); NGRAPH_CHECK(op.get(), op != nullptr, "Cannot clone ", description(), " operation with name ", get_friendly_name()); - op->set_output_size(m_output_descriptions.size()); - - std::vector<::ngraph::element::Type> types(m_body->get_parameters().size()); - std::vector<::ngraph::PartialShape> new_shapes(m_body->get_parameters().size()); - - for (size_t input_index = 0; input_index < new_args.size(); ++input_index) - { - for (auto& input_description : m_input_descriptions) - { - if (input_description->m_input_index == input_index) - { - types[input_description->m_body_parameter_index] = - new_args[input_index].get_element_type(); - new_shapes[input_description->m_body_parameter_index] = - new_args[input_index].get_partial_shape(); - - if (new_shapes[input_description->m_body_parameter_index].is_static()) - { - if (auto slice_in = ::ngraph::as_type_ptr< - ngraph::op::v0::TensorIterator::SliceInputDescription>( - input_description)) - { - new_shapes[slice_in->m_body_parameter_index][slice_in->m_axis] = - slice_in->m_part_size; - } - } - } - } - } - - if (m_special_body_ports.current_iteration_input_idx >= 0) - { - const auto& cur_iterations_param = - m_body->get_parameters().at(m_special_body_ports.current_iteration_input_idx); - body_params_args.insert(body_params_args.begin() + - m_special_body_ports.current_iteration_input_idx, - cur_iterations_param); - new_shapes.at(m_special_body_ports.current_iteration_input_idx) = - cur_iterations_param->get_partial_shape(); - types.at(m_special_body_ports.current_iteration_input_idx) = - cur_iterations_param->get_element_type(); - } - op->m_num_iterations = m_num_iterations; - op->m_special_body_ports = m_special_body_ports; - auto func = std::make_shared( - m_body->get_results(), m_body->get_sinks(), m_body->get_parameters()); - auto spec_func = specialize_function( - func, types, new_shapes, std::vector(body_params_args.size(), nullptr)); - op->m_body = std::make_shared( - spec_func->get_results(), spec_func->get_sinks(), spec_func->get_parameters()); - - for (auto& input_description : m_input_descriptions) - { - op->m_input_descriptions.push_back(input_description->copy()); - } - for (auto& output_description : m_output_descriptions) - { - op->m_output_descriptions.push_back(output_description->copy()); - } + clone_to(*op, new_args); return op; } @@ -431,6 +345,31 @@ bool op::v5::Loop::evaluate(const HostTensorVector& outputs, const HostTensorVec return true; } +void op::v5::Loop::clone_to(op::v5::Loop& dst, const OutputVector& new_args) const +{ + dst.set_arguments(new_args); + dst.set_output_size(m_output_descriptions.size()); + + dst.m_num_iterations = m_num_iterations; + dst.m_special_body_ports = m_special_body_ports; + + dst.m_body = clone_function(*get_function()); + + for (auto& input_description : m_input_descriptions) + { + dst.m_input_descriptions.push_back(input_description->copy()); + } + for (auto& output_description : m_output_descriptions) + { + dst.m_output_descriptions.push_back(output_description->copy()); + } +} + +op::v5::Loop::Loop(const op::v5::Loop& other) +{ + other.clone_to(*this, other.input_values()); +} + namespace ngraph { constexpr DiscreteTypeInfo AttributeAdapter::type_info; diff --git a/ngraph/core/src/op/lrn.cpp b/ngraph/core/src/op/lrn.cpp index d0382f63735d15..739c28148722bd 100644 --- a/ngraph/core/src/op/lrn.cpp +++ b/ngraph/core/src/op/lrn.cpp @@ -15,6 +15,7 @@ //***************************************************************************** #include "ngraph/op/lrn.hpp" +#include #include "itt.hpp" #include "ngraph/attribute_visitor.hpp" #include "ngraph/op/constant.hpp" @@ -50,10 +51,8 @@ AxisSet op::LRN::get_reduction_axes() const { AxisSet axes{1}; // channel axis as default auto axes_input_node = input_value(1).get_node_shared_ptr(); - if (auto const_op = as_type_ptr(axes_input_node)) - { + if (const auto& const_op = get_constant_from_source(axes_input_node)) axes = const_op->get_axis_set_val(); - } return axes; } diff --git a/ngraph/core/src/op/min.cpp b/ngraph/core/src/op/min.cpp index fb4609dbb05923..d11be3a666fb54 100644 --- a/ngraph/core/src/op/min.cpp +++ b/ngraph/core/src/op/min.cpp @@ -15,6 +15,7 @@ //***************************************************************************** #include "ngraph/op/min.hpp" +#include #include "itt.hpp" #include "ngraph/graph_util.hpp" #include "ngraph/runtime/host_tensor.hpp" @@ -83,3 +84,17 @@ bool op::v1::ReduceMin::evaluate(const HostTensorVector& outputs, NGRAPH_OP_SCOPE(v1_ReduceMin_evaluate); return minop::evaluate_min(inputs[0], outputs[0], get_reduction_axes(), get_keep_dims()); } + +bool op::v1::ReduceMin::evaluate_lower(const HostTensorVector& output_values) const +{ + if (!input_value(1).get_tensor().has_and_set_bound()) + return false; + return default_lower_bound_evaluator(this, output_values); +} + +bool op::v1::ReduceMin::evaluate_upper(const HostTensorVector& output_values) const +{ + if (!input_value(1).get_tensor().has_and_set_bound()) + return false; + return default_upper_bound_evaluator(this, output_values); +} \ No newline at end of file diff --git a/ngraph/core/src/op/mod.cpp b/ngraph/core/src/op/mod.cpp index d350330bd96d5f..682a1b934b3fc6 100644 --- a/ngraph/core/src/op/mod.cpp +++ b/ngraph/core/src/op/mod.cpp @@ -43,6 +43,7 @@ op::v1::Mod::Mod(const Output& A, : FusedOp({A, B}) , m_auto_broadcast(auto_broadcast) { + constructor_validate_and_infer_types(); } bool ngraph::op::v1::Mod::visit_attributes(AttributeVisitor& visitor) diff --git a/ngraph/core/src/op/non_max_suppression.cpp b/ngraph/core/src/op/non_max_suppression.cpp index 15e8f8bbe1ed05..dfeb4c07c12ab0 100644 --- a/ngraph/core/src/op/non_max_suppression.cpp +++ b/ngraph/core/src/op/non_max_suppression.cpp @@ -16,6 +16,7 @@ #include "ngraph/op/non_max_suppression.hpp" #include +#include #include "itt.hpp" #include "ngraph/attribute_visitor.hpp" #include "ngraph/op/constant.hpp" @@ -178,12 +179,12 @@ void op::v1::NonMaxSuppression::validate_and_infer_types() "The last dimension of the 'boxes' input must be equal to 4. Got:", boxes_ps[2]); - const auto max_output_boxes_per_class = input_value(2).get_node_shared_ptr(); - if (num_boxes_boxes.is_static() && scores_ps[1].is_static() && - op::is_constant(max_output_boxes_per_class)) + const auto& max_output_boxes_input = get_constant_from_source(input_value(2)); + if (num_boxes_boxes.is_static() && scores_ps[1].is_static() && max_output_boxes_input) { const auto num_boxes = num_boxes_boxes.get_length(); - const auto max_output_boxes_per_class = max_boxes_output_from_input(); + const auto max_output_boxes_per_class = + max_output_boxes_input->cast_vector().at(0); const auto num_classes = scores_ps[1].get_length(); out_shape[0] = std::min(num_boxes, max_output_boxes_per_class * num_classes); @@ -195,8 +196,7 @@ int64_t op::v1::NonMaxSuppression::max_boxes_output_from_input() const { int64_t max_output_boxes{0}; - const auto max_output_boxes_input = - as_type_ptr(input_value(2).get_node_shared_ptr()); + const auto max_output_boxes_input = get_constant_from_source(input_value(2)); max_output_boxes = max_output_boxes_input->cast_vector().at(0); return max_output_boxes; @@ -395,13 +395,13 @@ void op::v3::NonMaxSuppression::validate_and_infer_types() if (boxes_ps.rank().is_static() && scores_ps.rank().is_static()) { const auto num_boxes_boxes = boxes_ps[1]; - const auto max_output_boxes_per_class_node = input_value(2).get_node_shared_ptr(); - if (num_boxes_boxes.is_static() && scores_ps[1].is_static() && - op::is_constant(max_output_boxes_per_class_node)) + const auto max_output_boxes_input = get_constant_from_source(input_value(2)); + if (num_boxes_boxes.is_static() && scores_ps[1].is_static() && max_output_boxes_input) { const auto num_boxes = num_boxes_boxes.get_length(); const auto num_classes = scores_ps[1].get_length(); - const auto max_output_boxes_per_class = max_boxes_output_from_input(); + const auto max_output_boxes_per_class = + max_output_boxes_input->cast_vector().at(0); out_shape[0] = std::min(num_boxes, max_output_boxes_per_class * num_classes); } @@ -413,8 +413,7 @@ int64_t op::v3::NonMaxSuppression::max_boxes_output_from_input() const { int64_t max_output_boxes{0}; - const auto max_output_boxes_input = - as_type_ptr(input_value(2).get_node_shared_ptr()); + const auto max_output_boxes_input = get_constant_from_source(input_value(2)); max_output_boxes = max_output_boxes_input->cast_vector().at(0); return max_output_boxes; @@ -530,13 +529,14 @@ void op::v4::NonMaxSuppression::validate_and_infer_types() if (boxes_ps.rank().is_static() && scores_ps.rank().is_static()) { const auto num_boxes_boxes = boxes_ps[1]; - const auto max_output_boxes_per_class_node = input_value(2).get_node_shared_ptr(); + const auto max_output_boxes_input = get_constant_from_source(input_value(2)); if (num_boxes_boxes.is_static() && scores_ps[0].is_static() && scores_ps[1].is_static() && - op::is_constant(max_output_boxes_per_class_node)) + max_output_boxes_input) { const auto num_boxes = num_boxes_boxes.get_length(); const auto num_classes = scores_ps[1].get_length(); - const auto max_output_boxes_per_class = max_boxes_output_from_input(); + const auto max_output_boxes_per_class = + max_output_boxes_input->cast_vector().at(0); out_shape[0] = std::min(num_boxes, max_output_boxes_per_class) * num_classes * scores_ps[0].get_length(); @@ -838,7 +838,7 @@ int64_t op::v5::NonMaxSuppression::max_boxes_output_from_input() const } const auto max_output_boxes_input = - as_type_ptr(input_value(max_output_boxes_port).get_node_shared_ptr()); + get_constant_from_source(input_value(max_output_boxes_port)); max_output_boxes = max_output_boxes_input->cast_vector().at(0); return max_output_boxes; @@ -853,8 +853,7 @@ float op::v5::NonMaxSuppression::iou_threshold_from_input() const return iou_threshold; } - const auto iou_threshold_input = - as_type_ptr(input_value(iou_threshold_port).get_node_shared_ptr()); + const auto iou_threshold_input = get_constant_from_source(input_value(iou_threshold_port)); iou_threshold = iou_threshold_input->cast_vector().at(0); return iou_threshold; @@ -869,8 +868,7 @@ float op::v5::NonMaxSuppression::score_threshold_from_input() const return score_threshold; } - const auto score_threshold_input = - as_type_ptr(input_value(score_threshold_port).get_node_shared_ptr()); + const auto score_threshold_input = get_constant_from_source(input_value(score_threshold_port)); score_threshold = score_threshold_input->cast_vector().at(0); return score_threshold; @@ -885,8 +883,7 @@ float op::v5::NonMaxSuppression::soft_nms_sigma_from_input() const return soft_nms_sigma; } - const auto soft_nms_sigma_input = - as_type_ptr(input_value(soft_nms_sigma_port).get_node_shared_ptr()); + const auto soft_nms_sigma_input = get_constant_from_source(input_value(soft_nms_sigma_port)); soft_nms_sigma = soft_nms_sigma_input->cast_vector().at(0); return soft_nms_sigma; @@ -927,9 +924,8 @@ void op::v5::NonMaxSuppression::validate_and_infer_types() if (boxes_ps.rank().is_static() && scores_ps.rank().is_static() && get_input_size() > 2) { const auto num_boxes_boxes = boxes_ps[1]; - const auto max_output_boxes_per_class_node = input_value(2).get_node_shared_ptr(); if (num_boxes_boxes.is_static() && scores_ps[0].is_static() && scores_ps[1].is_static() && - op::is_constant(max_output_boxes_per_class_node)) + has_and_set_equal_bounds(input_value(2))) { const auto num_boxes = num_boxes_boxes.get_length(); const auto num_classes = scores_ps[1].get_length(); diff --git a/ngraph/core/src/op/non_zero.cpp b/ngraph/core/src/op/non_zero.cpp index c5d2647ac82390..1821a0b79bab0c 100644 --- a/ngraph/core/src/op/non_zero.cpp +++ b/ngraph/core/src/op/non_zero.cpp @@ -15,6 +15,7 @@ //***************************************************************************** #include "ngraph/op/non_zero.hpp" +#include #include "itt.hpp" #include "ngraph/op/op.hpp" #include "ngraph/runtime/host_tensor.hpp" @@ -78,6 +79,17 @@ void op::v3::NonZero::validate_and_infer_types() } set_input_is_relevant_to_shape(0); + + if (const auto& input_constant = get_constant_from_source(input_value(0))) + { // input_value is available to calculate output shape + const auto& input_data = std::make_shared(input_constant); + auto output = std::make_shared(m_output_type, get_output_partial_shape(0)); + if (!evaluate({output}, {input_data})) + return; + set_output_type(0, m_output_type, output->get_partial_shape()); + get_output_tensor(0).set_lower_value(output); + get_output_tensor(0).set_upper_value(output); + } } shared_ptr op::v3::NonZero::clone_with_new_inputs(const OutputVector& new_args) const diff --git a/ngraph/core/src/op/normalize_l2.cpp b/ngraph/core/src/op/normalize_l2.cpp index 1804cab089b615..ed21e2d7c3a0a4 100644 --- a/ngraph/core/src/op/normalize_l2.cpp +++ b/ngraph/core/src/op/normalize_l2.cpp @@ -15,6 +15,7 @@ //***************************************************************************** #include #include +#include #include "itt.hpp" #include "ngraph/attribute_visitor.hpp" @@ -67,7 +68,8 @@ void op::NormalizeL2::pre_validate_and_infer_types() const auto& input_rank = input_pshape.rank(); const auto& axes_rank = axes_pshape.rank(); - NODE_VALIDATION_CHECK(this, op::is_constant(axes_node), "Input axes must be Constant type"); + NODE_VALIDATION_CHECK( + this, has_and_set_equal_bounds(input_value(1)), "Input axes must be Constant type"); if (axes_rank.is_static()) { @@ -99,8 +101,7 @@ void op::NormalizeL2::pre_validate_and_infer_types() AxisSet op::NormalizeL2::get_reduction_axes() const { AxisSet axes; - auto axes_input_node = input_value(1).get_node_shared_ptr(); - if (auto const_op = as_type_ptr(axes_input_node)) + if (auto const_op = get_constant_from_source(input_value(1))) { axes = const_op->get_axis_set_val(); } diff --git a/ngraph/core/src/op/one_hot.cpp b/ngraph/core/src/op/one_hot.cpp index 4a50ae32da9fa4..eddbe8ce2965d0 100644 --- a/ngraph/core/src/op/one_hot.cpp +++ b/ngraph/core/src/op/one_hot.cpp @@ -74,18 +74,13 @@ void op::v1::OneHot::validate_and_infer_types() off_value_shape.is_dynamic() || is_scalar(off_value_shape.to_shape()), "off_value input must be scalar."); - const auto& depth = input_value(1).get_node_shared_ptr(); PartialShape result_shape{PartialShape::dynamic()}; - - if (indices_shape.is_static() && indices_shape.rank().is_static() && op::is_constant(depth)) + const auto& depth = input_value(1).get_node_shared_ptr(); + const auto& depth_constant = get_constant_from_source(input_value(1)); + if (indices_shape.rank().is_static() && depth_constant) { + std::vector out_dims{indices_shape}; const auto indices_rank = indices_shape.rank().get_length(); - - std::vector out_dims(indices_rank); - for (auto i = 0; i < indices_rank; i++) - { - out_dims[i] = indices_shape[i]; - } m_axis = ngraph::normalize_axis(this, m_axis, indices_rank + 1, -indices_rank - 1, indices_rank); @@ -103,9 +98,7 @@ void op::v1::OneHot::validate_and_infer_types() depth->get_shape(), " elements)."); - const auto depth_constant = as_type_ptr(depth); int64_t depth_val = depth_constant->cast_vector()[0]; - NODE_VALIDATION_CHECK(this, depth_val > 0, "The value of 'depth' must be a positive number.", diff --git a/ngraph/core/src/op/pad.cpp b/ngraph/core/src/op/pad.cpp index 9354ec9d936730..c23dcdc029b451 100644 --- a/ngraph/core/src/op/pad.cpp +++ b/ngraph/core/src/op/pad.cpp @@ -15,6 +15,7 @@ //***************************************************************************** #include "ngraph/op/pad.hpp" +#include #include "itt.hpp" #include "ngraph/attribute_visitor.hpp" #include "ngraph/except.hpp" @@ -53,9 +54,8 @@ op::v1::Pad::Pad(const Output& arg, CoordinateDiff op::v1::Pad::get_pads_begin() const { - auto pads_begin_node = input_value(1).get_node_shared_ptr(); CoordinateDiff pads_begin_coord{}; - if (auto pads_begin_const = as_type_ptr(pads_begin_node)) + if (auto pads_begin_const = get_constant_from_source(input_value(1))) { pads_begin_coord = pads_begin_const->cast_vector(); } @@ -64,9 +64,8 @@ CoordinateDiff op::v1::Pad::get_pads_begin() const CoordinateDiff op::v1::Pad::get_pads_end() const { - auto pads_end_node = input_value(2).get_node_shared_ptr(); CoordinateDiff pads_end_coord{}; - if (auto pads_end_const = as_type_ptr(pads_end_node)) + if (auto pads_end_const = get_constant_from_source(input_value(2))) { pads_end_coord = pads_end_const->cast_vector(); } @@ -161,10 +160,7 @@ void op::v1::Pad::validate_and_infer_types() const auto& pads_begin_coord = get_pads_begin(); const auto& pads_end_coord = get_pads_end(); - auto pads_begin_node = input_value(1).get_node_shared_ptr(); - auto pads_end_node = input_value(2).get_node_shared_ptr(); - if (arg_shape_rank.is_static() && op::is_constant(pads_begin_node) && - op::is_constant(pads_end_node)) + if (arg_shape_rank.is_static() && !pads_begin_coord.empty() && !pads_end_coord.empty()) { const auto implied_rank = pads_begin_coord.size(); std::vector result_dims(implied_rank, Dimension::dynamic()); diff --git a/ngraph/core/src/op/prior_box.cpp b/ngraph/core/src/op/prior_box.cpp index 982ad24b8c18cf..7738b65dd84120 100644 --- a/ngraph/core/src/op/prior_box.cpp +++ b/ngraph/core/src/op/prior_box.cpp @@ -14,6 +14,7 @@ // limitations under the License. //***************************************************************************** +#include #include "itt.hpp" #include "ngraph/op/constant.hpp" @@ -63,7 +64,7 @@ void op::PriorBox::validate_and_infer_types() set_input_is_relevant_to_shape(0); - if (auto const_shape = as_type_ptr(input_value(0).get_node_shared_ptr())) + if (auto const_shape = get_constant_from_source(input_value(0))) { NODE_VALIDATION_CHECK(this, shape_size(const_shape->get_shape()) == 2, diff --git a/ngraph/core/src/op/prior_box_clustered.cpp b/ngraph/core/src/op/prior_box_clustered.cpp index 12f3c26aad1eaf..5352f8005ce492 100644 --- a/ngraph/core/src/op/prior_box_clustered.cpp +++ b/ngraph/core/src/op/prior_box_clustered.cpp @@ -14,6 +14,7 @@ // limitations under the License. //***************************************************************************** +#include #include "itt.hpp" #include "ngraph/op/constant.hpp" @@ -70,7 +71,7 @@ void op::PriorBoxClustered::validate_and_infer_types() set_input_is_relevant_to_shape(0); - if (auto const_shape = as_type_ptr(input_value(0).get_node_shared_ptr())) + if (auto const_shape = get_constant_from_source(input_value(0).get_node_shared_ptr())) { NODE_VALIDATION_CHECK(this, shape_size(const_shape->get_shape()) == 2, diff --git a/ngraph/core/src/op/range.cpp b/ngraph/core/src/op/range.cpp index d89d1c72c6e6a9..b3045dc92bba5e 100644 --- a/ngraph/core/src/op/range.cpp +++ b/ngraph/core/src/op/range.cpp @@ -15,6 +15,7 @@ //***************************************************************************** #include +#include #include "itt.hpp" #include "ngraph/op/constant.hpp" @@ -110,9 +111,9 @@ void op::v4::Range::validate_and_infer_types() "'step' input scalar should be a numeric type. Got: ", get_input_element_type(2)); - auto const_start = as_type_ptr(this->input_value(0).get_node_shared_ptr()); - auto const_stop = as_type_ptr(this->input_value(1).get_node_shared_ptr()); - auto const_step = as_type_ptr(this->input_value(2).get_node_shared_ptr()); + auto const_start = get_constant_from_source(input_value(0)); + auto const_stop = get_constant_from_source(input_value(1)); + auto const_step = get_constant_from_source(input_value(2)); double start = 0; double stop = 0; @@ -360,9 +361,9 @@ static template static PartialShape infer_output_shape(const op::v0::Range* node, const element::Type& /* et */) { - auto const_start = as_type_ptr(node->input_value(0).get_node_shared_ptr()); - auto const_stop = as_type_ptr(node->input_value(1).get_node_shared_ptr()); - auto const_step = as_type_ptr(node->input_value(2).get_node_shared_ptr()); + auto const_start = get_constant_from_source(node->input_value(0)); + auto const_stop = get_constant_from_source(node->input_value(1)); + auto const_step = get_constant_from_source(node->input_value(2)); T start = static_cast(0); T stop = static_cast(0); diff --git a/ngraph/core/src/op/reduce_prod.cpp b/ngraph/core/src/op/reduce_prod.cpp index 488b155fb46654..d0637458d7e02d 100644 --- a/ngraph/core/src/op/reduce_prod.cpp +++ b/ngraph/core/src/op/reduce_prod.cpp @@ -15,6 +15,7 @@ //***************************************************************************** #include "ngraph/op/reduce_prod.hpp" +#include #include "itt.hpp" #include "ngraph/graph_util.hpp" #include "ngraph/runtime/host_tensor.hpp" @@ -87,3 +88,25 @@ bool op::v1::ReduceProd::evaluate(const HostTensorVector& outputs, return reduce_prod::evaluate_product( inputs[0], outputs[0], get_reduction_axes(), get_keep_dims()); } + +bool op::v1::ReduceProd::evaluate_lower(const HostTensorVector& output_values) const +{ + if (!input_value(1).get_tensor().has_and_set_bound()) + return false; + HostTensorPtr lb = input_value(0).get_tensor().get_lower_value(), + ub = input_value(0).get_tensor().get_upper_value(); + if (!lb || !ub || !host_tensor_is_positive(lb) || !host_tensor_is_positive(ub)) + return false; + return default_lower_bound_evaluator(this, output_values); +} + +bool op::v1::ReduceProd::evaluate_upper(const HostTensorVector& output_values) const +{ + if (!input_value(1).get_tensor().has_and_set_bound()) + return false; + HostTensorPtr lb = input_value(0).get_tensor().get_lower_value(), + ub = input_value(0).get_tensor().get_upper_value(); + if (!lb || !ub || !host_tensor_is_positive(lb) || !host_tensor_is_positive(ub)) + return false; + return default_upper_bound_evaluator(this, output_values); +} diff --git a/ngraph/core/src/op/reshape.cpp b/ngraph/core/src/op/reshape.cpp index 5ed23ff6c275e6..a0a1e3473b6279 100644 --- a/ngraph/core/src/op/reshape.cpp +++ b/ngraph/core/src/op/reshape.cpp @@ -16,6 +16,7 @@ #include #include +#include #include "itt.hpp" #include "ngraph/function.hpp" @@ -105,7 +106,8 @@ namespace reshapeop if (input_pshape.rank().is_static()) for (size_t i = 0; i < input_pshape.rank().get_length(); ++i) { - if (i < reshape_pattern.size() && reshape_pattern[i] == 0) + if (i < reshape_pattern.size() && reshape_pattern[i].get_min_length() == 0 && + reshape_pattern[i].get_max_length() == 0) continue; input_product *= input_pshape[i]; } @@ -240,20 +242,26 @@ void op::v1::Reshape::validate_and_infer_types() std::vector reshape_pattern; int64_t minus_one_idx = -1; - if (const auto constant = as_type_ptr(get_input_node_shared_ptr(1))) + HostTensorPtr lb, ub; + std::tie(lb, ub) = evaluate_both_bounds(get_input_source_output(1)); + if (lb && ub) { - const auto pattern_vector = constant->cast_vector(); - for (size_t i = 0; i < pattern_vector.size(); ++i) + const auto lower_bound = std::make_shared(lb)->cast_vector(); + const auto upper_bound = std::make_shared(ub)->cast_vector(); + NGRAPH_CHECK(lower_bound.size() == upper_bound.size()); + for (size_t i = 0; i < lower_bound.size(); ++i) { - NODE_VALIDATION_CHECK(this, pattern_vector[i] >= -1, "Dim size cannot be less than -1"); + NODE_VALIDATION_CHECK(this, + lower_bound[i] >= -1 && upper_bound[i] >= -1, + "Dim size cannot be less than -1"); - if (pattern_vector[i] == -1) + if (lower_bound[i] == -1 && upper_bound[i] == -1) { // ctor of Dimension(-1) would turn input Dimension(0, max_int) NODE_VALIDATION_CHECK( this, minus_one_idx == -1, "More than one dimension has size of -1"); minus_one_idx = static_cast(i); } - reshape_pattern.emplace_back(pattern_vector[i]); + reshape_pattern.emplace_back(lower_bound[i], upper_bound[i]); } } @@ -332,6 +340,20 @@ bool op::v1::Reshape::evaluate(const HostTensorVector& outputs, return evaluate_reshape(outputs, inputs); } +bool op::v1::Reshape::evaluate_lower(const HostTensorVector& output_values) const +{ + if (!input_value(1).get_tensor().has_and_set_bound()) + return false; + return default_lower_bound_evaluator(this, output_values); +} + +bool op::v1::Reshape::evaluate_upper(const HostTensorVector& output_values) const +{ + if (!input_value(1).get_tensor().has_and_set_bound()) + return false; + return default_upper_bound_evaluator(this, output_values); +} + bool op::v1::Reshape::constant_fold(OutputVector& output_values, const OutputVector& inputs_values) { if (get_output_partial_shape(0).is_dynamic()) diff --git a/ngraph/core/src/op/reverse.cpp b/ngraph/core/src/op/reverse.cpp index 20b872fb9f19a5..505d88919b833c 100644 --- a/ngraph/core/src/op/reverse.cpp +++ b/ngraph/core/src/op/reverse.cpp @@ -16,6 +16,7 @@ #include #include +#include #include #include "itt.hpp" @@ -99,12 +100,9 @@ void op::v1::Reverse::validate_and_infer_types() if (input_rank.is_static()) { const auto rank = input_rank.get_length(); - const auto rev_axes_node = input_value(1).get_node_shared_ptr(); - if (op::is_constant(rev_axes_node)) + if (const auto& rev_axes_constant = get_constant_from_source(input_value(1))) { - const auto rev_axes_constant = as_type_ptr(rev_axes_node); - if (m_mode == Mode::INDEX) { const AxisSet rev_axes = rev_axes_constant->get_axis_set_val(); diff --git a/ngraph/core/src/op/scatter_elements_update.cpp b/ngraph/core/src/op/scatter_elements_update.cpp index 8748e548729966..32249121f262ba 100644 --- a/ngraph/core/src/op/scatter_elements_update.cpp +++ b/ngraph/core/src/op/scatter_elements_update.cpp @@ -95,9 +95,16 @@ void op::v3::ScatterElementsUpdate::validate_and_infer_types() " and: ", updates_shape); - if (ngraph::op::is_constant(input_value(3).get_node()) && data_shape.rank().is_static()) + set_output_size(1); + set_output_type(0, data_et, data_shape); + + if (data_shape.is_dynamic()) + set_input_is_relevant_to_shape(0); + if (data_shape.rank().is_dynamic()) + return; + + if (const auto& axis_input = get_constant_from_source(input_value(3))) { - const auto axis_input = as_type_ptr(input_value(3).get_node_shared_ptr()); auto axis = axis_input->cast_vector().at(0); int64_t data_rank_length = data_shape.rank().get_length(); @@ -114,14 +121,6 @@ void op::v3::ScatterElementsUpdate::validate_and_infer_types() "]. Got axis value: ", axis); } - - if (data_shape.is_dynamic()) - { - set_input_is_relevant_to_shape(0); - } - - set_output_size(1); - set_output_type(0, data_et, data_shape); } shared_ptr diff --git a/ngraph/core/src/op/select.cpp b/ngraph/core/src/op/select.cpp index a78cb634e81e35..f4d035e3776540 100644 --- a/ngraph/core/src/op/select.cpp +++ b/ngraph/core/src/op/select.cpp @@ -146,6 +146,7 @@ namespace detail NGRAPH_TYPE_CASE(evaluate_select, u32, output_values, input_values, autob); NGRAPH_TYPE_CASE(evaluate_select, u64, output_values, input_values, autob); NGRAPH_TYPE_CASE(evaluate_select, bf16, output_values, input_values, autob); + NGRAPH_TYPE_CASE(evaluate_select, f16, output_values, input_values, autob); NGRAPH_TYPE_CASE(evaluate_select, f32, output_values, input_values, autob); NGRAPH_TYPE_CASE(evaluate_select, f64, output_values, input_values, autob); NGRAPH_TYPE_CASE(evaluate_select, boolean, output_values, input_values, autob); @@ -161,5 +162,6 @@ bool op::v1::Select::evaluate(const HostTensorVector& output_values, { NGRAPH_OP_SCOPE(v1_Select_evaluate); const auto autob = get_auto_broadcast(); - return detail::evaluate_select(output_values, input_values, autob, get_output_element_type(0)); + return detail::evaluate_select( + output_values, input_values, autob, output_values[0]->get_element_type()); } diff --git a/ngraph/core/src/op/shape_of.cpp b/ngraph/core/src/op/shape_of.cpp index 43b22b8d38ca93..e5a6f652b87b3b 100644 --- a/ngraph/core/src/op/shape_of.cpp +++ b/ngraph/core/src/op/shape_of.cpp @@ -21,8 +21,8 @@ #include "ngraph/op/concat.hpp" #include "ngraph/op/constant.hpp" #include "ngraph/op/gather.hpp" +#include "ngraph/op/select.hpp" #include "ngraph/op/shape_of.hpp" -#include "ngraph/pass/constant_folding.hpp" #include "ngraph/runtime/host_tensor.hpp" #include "ngraph/runtime/reference/shape_of.hpp" #include "ngraph/type/element_type_traits.hpp" @@ -46,7 +46,8 @@ void op::v3::ShapeOf::validate_and_infer_types() m_output_type == element::i64 || m_output_type == element::i32, "Output type must be i32 or i64"); set_input_is_relevant_to_value(0, false); - set_output_type(0, m_output_type, PartialShape{get_input_partial_shape(0).rank()}); + const auto input_partial_shape = get_input_partial_shape(0); + set_output_type(0, m_output_type, PartialShape{input_partial_shape.rank()}); } bool ngraph::op::v3::ShapeOf::visit_attributes(AttributeVisitor& visitor) @@ -152,6 +153,73 @@ namespace shape_of } return false; } + + bool evaluate_bound_shape(const Node* shape_of_node, + const HostTensorVector& output_values, + bool is_upper) + { + const auto& input_partial_shape = shape_of_node->get_input_partial_shape(0); + if (input_partial_shape.rank().is_dynamic()) + return false; + const auto rank = input_partial_shape.rank().get_length(); + auto pshape_low = PartialShape::dynamic(rank), pshape_up = PartialShape::dynamic(rank); + for (Dimension::value_type i = 0; i < rank; ++i) + { + Interval interval = input_partial_shape[i].get_interval(); + pshape_low[i] = interval.get_min_val(); + pshape_up[i] = Dimension(interval.get_max_val()).is_dynamic() + ? Dimension(interval.get_max_val() - 1) + : interval.get_max_val(); + } + NGRAPH_CHECK(pshape_up.is_static() && pshape_low.is_static()); + const auto input_et = shape_of_node->get_input_element_type(0); + const auto output_et = shape_of_node->get_output_element_type(0); + if (pshape_low.to_shape() == pshape_up.to_shape()) + { + shape_of_node->evaluate(output_values, + {std::make_shared(input_et, pshape_low)}); + shape_of_node->get_output_tensor(0).set_lower_value(output_values[0]); + shape_of_node->get_output_tensor(0).set_upper_value(output_values[0]); + } + else + { + HostTensorVector upper = + is_upper ? output_values + : HostTensorVector{std::make_shared( + output_et, PartialShape{pshape_up.rank().get_length()})}; + shape_of_node->evaluate(upper, {std::make_shared(input_et, pshape_up)}); + shape_of_node->get_output_tensor(0).set_upper_value(upper[0]); + + HostTensorVector lower = + !is_upper ? output_values + : HostTensorVector{std::make_shared( + output_et, PartialShape{pshape_low.rank().get_length()})}; + shape_of_node->evaluate(lower, {std::make_shared(input_et, pshape_low)}); + shape_of_node->get_output_tensor(0).set_lower_value(lower[0]); + + vector dynamic_mask; // true if dimension is dynamic + for (const auto& i : input_partial_shape) + dynamic_mask.push_back(Dimension(i.get_interval().get_max_val()).is_dynamic()); + auto mask_const = + ngraph::op::Constant::create(element::boolean, {dynamic_mask.size()}, dynamic_mask); + auto dynamic_min_const = ngraph::op::Constant::create(output_et, {}, {0}); + auto dynamic_max_const = ngraph::op::Constant::create( + output_et, + {}, + {output_et == element::i64 ? std::numeric_limits::max() + : std::numeric_limits::max()}); + + op::v1::Select().evaluate(lower, + {std::make_shared(mask_const), + std::make_shared(dynamic_min_const), + lower[0]}); + op::v1::Select().evaluate(upper, + {std::make_shared(mask_const), + std::make_shared(dynamic_max_const), + upper[0]}); + } + return true; + } } bool op::v3::ShapeOf::evaluate(const HostTensorVector& output_values, @@ -161,6 +229,16 @@ bool op::v3::ShapeOf::evaluate(const HostTensorVector& output_values, return shape_of::evaluate_shape_of(output_values[0], input_values[0]); } +bool op::v3::ShapeOf::evaluate_lower(const HostTensorVector& output_values) const +{ + return shape_of::evaluate_bound_shape(this, output_values, false); +} + +bool op::v3::ShapeOf::evaluate_upper(const HostTensorVector& output_values) const +{ + return shape_of::evaluate_bound_shape(this, output_values, true); +} + bool op::v3::ShapeOf::constant_fold(OutputVector& output_values, const OutputVector& input_values) { OV_ITT_SCOPED_TASK(itt::domains::nGraph, "op::v3::ShapeOf::constant_fold"); @@ -220,3 +298,13 @@ bool op::v0::ShapeOf::constant_fold(OutputVector& output_values, const OutputVec return false; return shape_of::constant_fold_shape_of(this, output_values[0], input_values[0], m_is_foldable); } + +bool op::v0::ShapeOf::evaluate_lower(const HostTensorVector& output_values) const +{ + return shape_of::evaluate_bound_shape(this, output_values, false); +} + +bool op::v0::ShapeOf::evaluate_upper(const HostTensorVector& output_values) const +{ + return shape_of::evaluate_bound_shape(this, output_values, true); +} \ No newline at end of file diff --git a/ngraph/core/src/op/space_to_batch.cpp b/ngraph/core/src/op/space_to_batch.cpp index 2d61fe3c3a5184..526e1bc5d5a3f9 100644 --- a/ngraph/core/src/op/space_to_batch.cpp +++ b/ngraph/core/src/op/space_to_batch.cpp @@ -16,6 +16,7 @@ #include #include #include +#include #include #include "itt.hpp" @@ -74,9 +75,11 @@ void op::v1::SpaceToBatch::validate_and_infer_types() auto pads_begin = input_value(2); auto pads_end = input_value(3); - if (ngraph::op::is_constant(block.get_node_shared_ptr()) && - ngraph::op::is_constant(pads_begin.get_node_shared_ptr()) && - ngraph::op::is_constant(pads_end.get_node_shared_ptr()) && data_pshape.is_static()) + const auto& block_const = get_constant_from_source(block); + const auto& pads_begin_const = get_constant_from_source(pads_begin); + const auto& pads_end_const = get_constant_from_source(pads_end); + + if (block_const && pads_begin_const && pads_end_const && data_pshape.is_static()) { const auto& data_shape = data.get_shape(); @@ -87,13 +90,9 @@ void op::v1::SpaceToBatch::validate_and_infer_types() data_shape.size(), ")"); - auto block_val = std::dynamic_pointer_cast(block.get_node_shared_ptr()) - ->cast_vector(); - auto pads_begin_val = - std::dynamic_pointer_cast(pads_begin.get_node_shared_ptr()) - ->cast_vector(); - auto pads_end_val = std::dynamic_pointer_cast(pads_end.get_node_shared_ptr()) - ->cast_vector(); + auto block_val = block_const->cast_vector(); + auto pads_begin_val = pads_begin_const->cast_vector(); + auto pads_end_val = pads_end_const->cast_vector(); int64_t block_prod = 1; for (long idx : block_val) diff --git a/ngraph/core/src/op/split.cpp b/ngraph/core/src/op/split.cpp index 795e4b7a696788..d7e8c02d381a22 100644 --- a/ngraph/core/src/op/split.cpp +++ b/ngraph/core/src/op/split.cpp @@ -63,9 +63,9 @@ void op::v1::Split::validate_and_infer_types() this, axis_et.is_integral(), "The 'axis' input only accepts integral types"); PartialShape each_output_shape{data_ps}; - if (op::is_constant(input_value(1).get_node()) && data_ps.rank().is_static()) + const auto axis_input = get_constant_from_source(input_value(1)); + if (axis_input && data_ps.rank().is_static()) { - const auto axis_input = as_type_ptr(input_value(1).get_node_shared_ptr()); auto axis = axis_input->cast_vector()[0]; const auto data_rank = get_input_partial_shape(0).rank(); diff --git a/ngraph/core/src/op/squeeze.cpp b/ngraph/core/src/op/squeeze.cpp index 168843de923b6d..649a927fdb2059 100644 --- a/ngraph/core/src/op/squeeze.cpp +++ b/ngraph/core/src/op/squeeze.cpp @@ -52,7 +52,7 @@ void op::Squeeze::pre_validate_and_infer_types() bool data_has_dynamic_rank = data.get_partial_shape().rank().is_dynamic(); bool data_has_dynamic_shape = data.get_partial_shape().is_dynamic(); - auto axes_constant = as_type_ptr(axes_node); + auto axes_constant = get_constant_from_source(axes_node); bool axes_is_empty_constant = (axes_constant) ? axes_constant->cast_vector().empty() : false; @@ -184,6 +184,20 @@ bool op::v0::Squeeze::evaluate(const HostTensorVector& outputs, return squeeze::evaluate_squeeze(inputs[0], inputs[1], outputs[0]); } +bool op::v0::Squeeze::evaluate_lower(const HostTensorVector& output_values) const +{ + if (inputs().size() > 1 && !input_value(1).get_tensor().has_and_set_bound()) + return false; + return default_lower_bound_evaluator(this, output_values); +} + +bool op::v0::Squeeze::evaluate_upper(const HostTensorVector& output_values) const +{ + if (inputs().size() > 1 && !input_value(1).get_tensor().has_and_set_bound()) + return false; + return default_upper_bound_evaluator(this, output_values); +} + bool op::v0::Squeeze::constant_fold(OutputVector& output_values, const OutputVector& inputs_values) { if (get_output_partial_shape(0).is_dynamic()) diff --git a/ngraph/core/src/op/strided_slice.cpp b/ngraph/core/src/op/strided_slice.cpp index b3f93de59758af..c08855d0672dae 100644 --- a/ngraph/core/src/op/strided_slice.cpp +++ b/ngraph/core/src/op/strided_slice.cpp @@ -186,9 +186,9 @@ void op::v1::StridedSlice::validate_and_infer_types() set_input_is_relevant_to_shape(2); set_input_is_relevant_to_shape(3); - auto begin_const = as_type_ptr(input_value(1).get_node_shared_ptr()); - auto end_const = as_type_ptr(input_value(2).get_node_shared_ptr()); - auto strides = as_type_ptr(input_value(3).get_node_shared_ptr()); + auto begin_const = get_constant_from_source(input_value(1)); + auto end_const = get_constant_from_source(input_value(2)); + auto strides = get_constant_from_source(input_value(3)); if (begin_const && end_const && strides) { @@ -296,3 +296,21 @@ bool op::v1::StridedSlice::evaluate(const HostTensorVector& output_values, convert_mask_to_axis_set(get_ellipsis_mask()), output_values[0]); } + +bool op::v1::StridedSlice::evaluate_lower(const HostTensorVector& output_values) const +{ + if (!input_value(1).get_tensor().has_and_set_bound() || + !input_value(2).get_tensor().has_and_set_bound() || + !input_value(3).get_tensor().has_and_set_bound()) + return false; + return default_lower_bound_evaluator(this, output_values); +} + +bool op::v1::StridedSlice::evaluate_upper(const HostTensorVector& output_values) const +{ + if (!input_value(1).get_tensor().has_and_set_bound() || + !input_value(2).get_tensor().has_and_set_bound() || + !input_value(3).get_tensor().has_and_set_bound()) + return false; + return default_upper_bound_evaluator(this, output_values); +} \ No newline at end of file diff --git a/ngraph/core/src/op/tile.cpp b/ngraph/core/src/op/tile.cpp index c9da2a80dfbf98..2b2945a7081bd5 100644 --- a/ngraph/core/src/op/tile.cpp +++ b/ngraph/core/src/op/tile.cpp @@ -15,6 +15,7 @@ //***************************************************************************** #include "ngraph/op/tile.hpp" +#include #include "itt.hpp" #include "ngraph/op/constant.hpp" @@ -51,41 +52,31 @@ void op::v0::Tile::validate_and_infer_types() auto arg_shape = get_input_partial_shape(0); auto repeats_shape = get_input_partial_shape(1); - auto repeats_rank = repeats_shape.rank(); - - NODE_VALIDATION_CHECK(this, repeats_rank.compatible(1), "Shape of repeats must be of rank 1"); - - auto out_shape = PartialShape::dynamic(); - - if (auto const_repeats = as_type_ptr(input_value(1).get_node_shared_ptr())) + NODE_VALIDATION_CHECK( + this, repeats_shape.rank().compatible(1), "Shape of repeats must be of rank 1"); + PartialShape repeats_as_pshape; + bool repeats_are_known = + evaluate_as_partial_shape(get_input_source_output(1), repeats_as_pshape); + std::vector repeats_value(repeats_as_pshape); + if (repeats_are_known && !repeats_value.empty() && arg_shape.rank().is_static()) { - if (arg_shape.is_static()) - { - auto data_shape = arg_shape.to_shape(); - auto data_rank = data_shape.size(); - auto repeats_val = const_repeats->cast_vector(); - auto repeats_rank = repeats_val.size(); - auto output_rank = std::max(data_rank, repeats_rank); - - // expand data shape and repeats to output rank - data_shape.insert(data_shape.begin(), output_rank - data_rank, 1); - repeats_val.insert(repeats_val.begin(), output_rank - repeats_rank, 1); - - Shape output_shape(output_rank); - for (size_t i = 0; i < output_rank; i++) - { - output_shape[i] = data_shape[i] * repeats_val[i]; - } - set_output_type(0, arg_et, output_shape); - } - else - { - set_output_type(0, arg_et, out_shape); - } + std::vector data_shape(arg_shape); + auto data_rank = data_shape.size(); + auto repeats_rank = repeats_value.size(); + auto output_rank = std::max(data_rank, repeats_rank); + + // expand data shape and repeats to output rank + data_shape.insert(data_shape.begin(), output_rank - data_rank, 1); + repeats_value.insert(repeats_value.begin(), output_rank - repeats_rank, 1); + + auto output_shape = PartialShape::dynamic(output_rank); + for (size_t i = 0; i < output_rank; i++) + output_shape[i] = data_shape[i] * repeats_value[i]; + set_output_type(0, arg_et, output_shape); } else { - set_output_type(0, arg_et, out_shape); + set_output_type(0, arg_et, PartialShape::dynamic()); } set_input_is_relevant_to_shape(0); diff --git a/ngraph/core/src/op/topk.cpp b/ngraph/core/src/op/topk.cpp index a33c413f73d964..f4cf1334e66c23 100644 --- a/ngraph/core/src/op/topk.cpp +++ b/ngraph/core/src/op/topk.cpp @@ -295,28 +295,32 @@ void op::v1::TopK::validate_and_infer_types() if (output_shape.rank().is_static()) { m_normalized_axis = ngraph::normalize_axis(this, m_axis, output_shape.rank()); - if (k != 0) - { - output_shape[m_normalized_axis] = k; - } - else + + PartialShape k_as_shape; + if (evaluate_as_partial_shape(input_value(1), k_as_shape)) { - auto max_k = maximum_value(input_value(1)); - if (max_k.first) + if (k_as_shape.is_static()) { - const auto in_min = output_shape[m_normalized_axis].get_min_length(); - const auto in_max = output_shape[m_normalized_axis].get_max_length(); - const auto lower = std::min(in_min, max_k.second); - const auto upper = in_max < 0 - ? Dimension::dynamic().get_max_length() - : std::max(in_max, max_k.second); - output_shape[m_normalized_axis] = Dimension(lower, upper); + output_shape[m_normalized_axis] = k_as_shape[0]; } else { - output_shape[m_normalized_axis] = -1; + const auto in_min = output_shape[m_normalized_axis].get_min_length(); + const auto in_max = output_shape[m_normalized_axis].get_max_length(); + + const auto k_min = k_as_shape[0].get_min_length(); + const auto k_max = k_as_shape[0].get_max_length(); + + const auto lower = std::min(in_min, k_min); + const auto upper = in_max < 0 ? Dimension::dynamic().get_max_length() + : std::max(in_max, k_max); + output_shape[m_normalized_axis] = Dimension(lower, upper); } } + else + { + output_shape[m_normalized_axis] = -1; + } } set_output_size(2); diff --git a/ngraph/core/src/op/transpose.cpp b/ngraph/core/src/op/transpose.cpp index 0306d13c2ab7fa..4de38977219caf 100644 --- a/ngraph/core/src/op/transpose.cpp +++ b/ngraph/core/src/op/transpose.cpp @@ -14,10 +14,9 @@ // limitations under the License. //***************************************************************************** -#include +#include #include "itt.hpp" -#include "ngraph/op/constant.hpp" #include "ngraph/op/transpose.hpp" #include "ngraph/runtime/opt_kernel/reshape.hpp" @@ -59,7 +58,7 @@ void op::v1::Transpose::validate_and_infer_types() set_input_is_relevant_to_shape(1); - if (auto input_const = as_type_ptr(input_value(1).get_node_shared_ptr())) + if (const auto& input_const = get_constant_from_source(input_value(1))) { auto permutation = input_const->get_axis_vector_val(); if (permutation.empty()) diff --git a/ngraph/core/src/op/unsqueeze.cpp b/ngraph/core/src/op/unsqueeze.cpp index e174d601ded1b7..52df36927081ab 100644 --- a/ngraph/core/src/op/unsqueeze.cpp +++ b/ngraph/core/src/op/unsqueeze.cpp @@ -44,9 +44,8 @@ void op::v0::Unsqueeze::validate_and_infer_types() auto data_partial_shape = data.get_partial_shape(); const auto data_rank = data_partial_shape.rank(); - const auto axes_node = input_value(1).get_node_shared_ptr(); - - if (data_rank.is_dynamic() || !op::is_constant(axes_node)) + const auto axes_constant = get_constant_from_source(input_value(1)); + if (data_rank.is_dynamic() || !axes_constant) { set_output_type(0, get_input_element_type(0), PartialShape::dynamic()); return; @@ -55,7 +54,6 @@ void op::v0::Unsqueeze::validate_and_infer_types() uint64_t data_rank_value = data_partial_shape.rank().get_length(); // Get value of axes from Constant - const auto axes_constant = as_type_ptr(axes_node); const auto axes_values = axes_constant->cast_vector(); const auto expanded_rank = data_rank_value + axes_values.size(); auto axes = normalize_axes(this->description(), axes_values, expanded_rank); @@ -157,6 +155,20 @@ bool op::v0::Unsqueeze::evaluate(const HostTensorVector& outputs, return unsqueeze::evaluate_unsqueeze(inputs[0], inputs[1], outputs[0]); } +bool op::v0::Unsqueeze::evaluate_lower(const HostTensorVector& output_values) const +{ + if (!input_value(1).get_tensor().has_and_set_bound()) + return false; + return default_lower_bound_evaluator(this, output_values); +} + +bool op::v0::Unsqueeze::evaluate_upper(const HostTensorVector& output_values) const +{ + if (!input_value(1).get_tensor().has_and_set_bound()) + return false; + return default_upper_bound_evaluator(this, output_values); +} + bool op::v0::Unsqueeze::constant_fold(OutputVector& output_values, const OutputVector& inputs_values) { diff --git a/ngraph/core/src/op/util/arithmetic_reduction.cpp b/ngraph/core/src/op/util/arithmetic_reduction.cpp index 69ddc77ff694d7..5793327b306e8b 100644 --- a/ngraph/core/src/op/util/arithmetic_reduction.cpp +++ b/ngraph/core/src/op/util/arithmetic_reduction.cpp @@ -50,7 +50,7 @@ bool op::util::ArithmeticReduction::reduction_axes_constant() const const AxisSet op::util::ArithmeticReduction::get_reduction_axes() const { AxisSet axes; - if (auto const_op = as_type(input_value(1).get_node())) + if (const auto& const_op = get_constant_from_source(input_value(1))) { const auto const_data = const_op->cast_vector(); const auto input_data_rank = get_input_partial_shape(0).rank(); @@ -76,11 +76,11 @@ void op::util::ArithmeticReduction::validate_and_infer_types() PartialShape result_shape{PartialShape::dynamic()}; - if (input_rank.is_static() && reduction_axes_constant()) + auto axes = get_constant_from_source(input_value(1)); + if (input_rank.is_static() && axes) { AxisSet reduction_axes; - const auto reduction_axes_val = - as_type(input_value(1).get_node())->cast_vector(); + const auto reduction_axes_val = axes->cast_vector(); for (auto axis : reduction_axes_val) { try diff --git a/ngraph/core/src/op/util/arithmetic_reductions_keep_dims.cpp b/ngraph/core/src/op/util/arithmetic_reductions_keep_dims.cpp index b6930a05eecbbc..4b2767fa129bce 100644 --- a/ngraph/core/src/op/util/arithmetic_reductions_keep_dims.cpp +++ b/ngraph/core/src/op/util/arithmetic_reductions_keep_dims.cpp @@ -51,11 +51,11 @@ void op::util::ArithmeticReductionKeepDims::validate_and_infer_types() if (input_rank.is_static()) result_shape = PartialShape::dynamic(input_rank); - if (input_rank.is_static() && reduction_axes_constant()) + const auto& axes = get_constant_from_source(input_value(1)); + if (input_rank.is_static() && axes) { AxisSet reduction_axes; - auto reduction_axes_val = - as_type(input_value(1).get_node())->cast_vector(); + auto reduction_axes_val = axes->cast_vector(); for (auto axis : reduction_axes_val) { try diff --git a/ngraph/core/src/op/util/attr_types.cpp b/ngraph/core/src/op/util/attr_types.cpp index 60af609c673269..3d109afeaf909e 100644 --- a/ngraph/core/src/op/util/attr_types.cpp +++ b/ngraph/core/src/op/util/attr_types.cpp @@ -13,6 +13,7 @@ // See the License for the specific language governing permissions and // limitations under the License. //***************************************************************************** +#include #include #include "ngraph/attribute_visitor.hpp" diff --git a/ngraph/core/src/op/util/binary_elementwise_arithmetic.cpp b/ngraph/core/src/op/util/binary_elementwise_arithmetic.cpp index 8f5c2735015214..6c755892c52fcb 100644 --- a/ngraph/core/src/op/util/binary_elementwise_arithmetic.cpp +++ b/ngraph/core/src/op/util/binary_elementwise_arithmetic.cpp @@ -15,6 +15,7 @@ //***************************************************************************** #include "ngraph/op/util/binary_elementwise_arithmetic.hpp" +#include #include "itt.hpp" #include "ngraph/attribute_visitor.hpp" #include "ngraph/op/util/elementwise_args.hpp" @@ -65,3 +66,27 @@ bool op::util::BinaryElementwiseArithmetic::visit_attributes(AttributeVisitor& v visitor.on_attribute("auto_broadcast", m_autob); return true; } + +bool op::util::BinaryElementwiseArithmetic::evaluate_upper( + const HostTensorVector& output_values) const +{ + HostTensorVector lower_output_tensors; + for (const auto& output : output_values) + lower_output_tensors.push_back( + std::make_shared(output->get_element_type(), output->get_partial_shape())); + if (!interval_bound_evaluator(this, lower_output_tensors, output_values)) + return false; + return true; +} + +bool op::util::BinaryElementwiseArithmetic::evaluate_lower( + const HostTensorVector& output_values) const +{ + HostTensorVector upper_output_tensors; + for (const auto& output : output_values) + upper_output_tensors.push_back( + std::make_shared(output->get_element_type(), output->get_partial_shape())); + if (!interval_bound_evaluator(this, output_values, upper_output_tensors)) + return false; + return true; +} \ No newline at end of file diff --git a/ngraph/core/src/op/util/broadcast_base.cpp b/ngraph/core/src/op/util/broadcast_base.cpp index c0ed8f556cc996..1b8ab41d10dee5 100644 --- a/ngraph/core/src/op/util/broadcast_base.cpp +++ b/ngraph/core/src/op/util/broadcast_base.cpp @@ -24,6 +24,7 @@ #include "ngraph/runtime/reference/broadcast.hpp" +#include #include using namespace std; @@ -48,9 +49,12 @@ op::util::BroadcastBase::BroadcastBase(const Output& arg, PartialShape op::util::BroadcastBase::get_result_shape_pdpd( const PartialShape& arg0_shape, - const Shape& target_shape, + const PartialShape& target_pshape, const op::BroadcastModeSpec& broadcast_spec) const { + if (target_pshape.is_dynamic()) + return PartialShape::dynamic(target_pshape.rank()); + Shape target_shape = target_pshape.to_shape(); if (arg0_shape.rank().is_dynamic()) { return PartialShape::dynamic(target_shape.size()); @@ -85,44 +89,47 @@ PartialShape op::util::BroadcastBase::get_result_shape_pdpd( } void op::util::BroadcastBase::validate_target_shape_numpy(const PartialShape& arg_shape, - const Shape& target_shape) const + const PartialShape& target_shape) const { - if (arg_shape.rank().is_dynamic()) + if (arg_shape.rank().is_dynamic() || target_shape.rank().is_dynamic()) { return; } const auto arg_rank_length = arg_shape.rank().get_length(); - const int64_t start_axis = target_shape.size() - arg_rank_length; + const auto target_rank_length = target_shape.rank().get_length(); + const int64_t start_axis = target_rank_length - arg_rank_length; NODE_VALIDATION_CHECK(this, start_axis >= 0, "Broadcast target_shape has smaller rank ", - target_shape.size(), + target_rank_length, " than arg shape ", arg_rank_length); - for (auto i = start_axis; i < target_shape.size(); i++) + for (auto i = start_axis; i < target_rank_length; i++) { - if (arg_shape[i - start_axis].is_dynamic()) - { - continue; - } - const size_t arg_dim = arg_shape[i - start_axis].get_length(); + stringstream ss; + ss << " or " << target_shape[i]; NODE_VALIDATION_CHECK(this, - arg_dim == 1 || arg_dim == target_shape[i], + arg_shape[i - start_axis].is_dynamic() || + target_shape[i].is_dynamic() || arg_shape[i - start_axis] == 1 || + arg_shape[i - start_axis] == target_shape[i], "Input shape dimension equal ", - arg_dim, + arg_shape[i - start_axis], " cannot be broadcasted (numpy mode) to ", target_shape[i], ". Allowed input dimension value would be 1", - target_shape[i] != 1 - ? (std::string(" or ") + std::to_string(target_shape[i])).c_str() - : ""); + target_shape[i] != 1 ? ss.str() : ""); } } -void op::util::BroadcastBase::validate_target_shape_none(const Shape& arg_shape, +void op::util::BroadcastBase::validate_target_shape_none(const PartialShape& arg_shape, const AxisVector& axes_mapping_val, - const Shape& target_shape) const + const PartialShape& target_shape) const { + if (arg_shape.rank().is_dynamic() || target_shape.rank().is_dynamic()) + { + return; + } + const auto target_rank_length = target_shape.rank().get_length(); // axes_mapping needs to be in sorted order NODE_VALIDATION_CHECK(this, std::is_sorted(axes_mapping_val.begin(), axes_mapping_val.end()), @@ -130,7 +137,7 @@ void op::util::BroadcastBase::validate_target_shape_none(const Shape& arg_shape, axes_mapping_val, " not in sorted order"); - if (arg_shape.size() == 0 && axes_mapping_val.size() > 0) + if (arg_shape.rank().get_length() == 0 && axes_mapping_val.size() > 0) { NODE_VALIDATION_CHECK(this, target_shape[axes_mapping_val[0]] == 1, @@ -141,18 +148,18 @@ void op::util::BroadcastBase::validate_target_shape_none(const Shape& arg_shape, for (size_t i = 0; i < axes_mapping_val.size(); i++) { NODE_VALIDATION_CHECK(this, - axes_mapping_val[i] < target_shape.size(), + axes_mapping_val[i] < target_rank_length, "Broadcast axes_mapping[", i, "]: ", axes_mapping_val[i], " exceeds target rank ", - target_shape.size()); + target_rank_length); - if (arg_shape.size() > 0) + if (arg_shape.rank().get_length() > 0) { NODE_VALIDATION_CHECK(this, - target_shape[axes_mapping_val[i]] == arg_shape[i], + target_shape[axes_mapping_val[i]].same_scheme(arg_shape[i]), "Broadcast target[axes_mapping[", i, "]]", @@ -219,13 +226,15 @@ void op::util::BroadcastBase::validate_and_infer_types() } } - const auto shape_constant = as_type_ptr(input_value(1).get_node_shared_ptr()); + PartialShape output_shape; + bool output_shape_defined = evaluate_as_partial_shape(get_input_source_output(1), output_shape); if (auto concat = as_type_ptr(input_value(1).get_node_shared_ptr())) { auto concat_inputs = concat->inputs(); - if (concat->get_output_partial_shape(0).is_static() && concat->get_shape().size() == 1 && + if (!output_shape_defined && concat->get_output_partial_shape(0).is_static() && + concat->get_shape().size() == 1 && concat_inputs.size() == shape_size(concat->get_shape())) { auto output_partial_shape = vector{}; @@ -241,15 +250,16 @@ void op::util::BroadcastBase::validate_and_infer_types() output_partial_shape.push_back(Dimension::dynamic()); } } - result_shape = PartialShape(output_partial_shape); + output_shape_defined = true; + output_shape = PartialShape(output_partial_shape); } } if (m_mode.m_type == BroadcastType::NONE) { - if (shape_constant) + if (output_shape_defined) { - result_shape = shape_constant->get_shape_val(); + result_shape = output_shape; } // Validate axes_mapping if (get_input_partial_shape(0).is_static() && get_input_partial_shape(1).is_static() && @@ -268,31 +278,27 @@ void op::util::BroadcastBase::validate_and_infer_types() " doesn't match rank of input tensor ", input_rank); - if (shape_constant && op::is_constant(input_value(2).get_node())) + if (output_shape_defined && has_and_set_equal_bounds(input_value(2))) { - auto target_shape = shape_constant->get_shape_val(); auto axes_mapping_val = - as_type_ptr(input_value(2).get_node_shared_ptr()) - ->get_axis_vector_val(); - validate_target_shape_none(arg_shape, axes_mapping_val, target_shape); + get_constant_from_source(input_value(2))->get_axis_vector_val(); + validate_target_shape_none(arg_shape, axes_mapping_val, output_shape); } } } else if (m_mode.m_type == BroadcastType::NUMPY) { - if (shape_constant) + if (output_shape_defined) { - const auto target_shape = shape_constant->get_shape_val(); - result_shape = target_shape; - validate_target_shape_numpy(input_shape, target_shape); + result_shape = output_shape; + validate_target_shape_numpy(input_shape, output_shape); } } else if (m_mode.m_type == BroadcastType::PDPD) { - if (shape_constant) + if (output_shape_defined) { - const auto target_shape = shape_constant->get_shape_val(); - result_shape = get_result_shape_pdpd(input_shape, target_shape, m_mode); + result_shape = get_result_shape_pdpd(input_shape, output_shape, m_mode); } } set_output_type(0, get_input_element_type(0), result_shape); @@ -344,8 +350,7 @@ std::pair op::util::BroadcastBase::get_broadcast_axes() const if (m_mode.m_type == BroadcastType::NONE) { - const auto axes_mapping_constant = - as_type_ptr(input_value(2).get_node_shared_ptr()); + const auto axes_mapping_constant = get_constant_from_source(input_value(2)); if (get_input_partial_shape(1).is_static() && axes_mapping_constant) { auto axes_mapping_val = axes_mapping_constant->get_axis_vector_val(); @@ -563,3 +568,19 @@ bool op::util::BroadcastBase::evaluate(const HostTensorVector& outputs, return evaluate_broadcast(inputs[0], outputs[0], pair_broadcast_axes, result_shape.to_shape()); } + +bool op::util::BroadcastBase::evaluate_lower(const HostTensorVector& output_values) const +{ + if (!input_value(1).get_tensor().has_and_set_bound() || + (get_input_size() > 2 && !input_value(2).get_tensor().has_and_set_bound())) + return false; + return default_lower_bound_evaluator(this, output_values); +} + +bool op::util::BroadcastBase::evaluate_upper(const HostTensorVector& output_values) const +{ + if (!input_value(1).get_tensor().has_and_set_bound() || + (get_input_size() > 2 && !input_value(2).get_tensor().has_and_set_bound())) + return false; + return default_upper_bound_evaluator(this, output_values); +} \ No newline at end of file diff --git a/ngraph/core/src/op/util/fused_op.cpp b/ngraph/core/src/op/util/fused_op.cpp index 0184091195c714..af80acdd36eac6 100644 --- a/ngraph/core/src/op/util/fused_op.cpp +++ b/ngraph/core/src/op/util/fused_op.cpp @@ -48,7 +48,6 @@ void op::util::FusedOp::validate_and_infer_types() for (auto& val : input_values()) nodes.emplace_back(val.get_node_shared_ptr()); auto subgraph = extract_subgraph(ngraph::as_node_vector(subgraph_outputs), nodes); - validate_nodes_and_infer_types(subgraph); size_t i = 0; for (const auto& output : subgraph_outputs) diff --git a/ngraph/core/src/op/util/logical_reduction.cpp b/ngraph/core/src/op/util/logical_reduction.cpp index f26d37f62aedc8..53fb0d71fa389c 100644 --- a/ngraph/core/src/op/util/logical_reduction.cpp +++ b/ngraph/core/src/op/util/logical_reduction.cpp @@ -43,13 +43,13 @@ op::util::LogicalReduction::LogicalReduction(const Output& arg, bool op::util::LogicalReduction::reduction_axes_constant() const { - return is_type(input_value(1).get_node()); + return has_and_set_equal_bounds(input_value(1)); } const AxisSet op::util::LogicalReduction::get_reduction_axes() const { AxisSet axes; - if (auto const_op = as_type(input_value(1).get_node())) + if (auto const_op = get_constant_from_source(input_value(1))) { axes = const_op->get_axis_set_val(); } @@ -71,11 +71,21 @@ void op::util::LogicalReduction::validate_and_infer_types() PartialShape result_shape{PartialShape::dynamic()}; - if (input_rank.is_static() && reduction_axes_constant()) + set_input_is_relevant_to_shape(1); + + NODE_VALIDATION_CHECK(this, + get_input_element_type(0).compatible(element::boolean), + "Input element type must be boolean."); + + set_output_type(0, element::boolean, result_shape); + + if (input_rank.is_dynamic()) + return; + + if (const auto axes_const = get_constant_from_source(input_value(1))) { AxisSet reduction_axes; - auto reduction_axes_val = - as_type(input_value(1).get_node())->cast_vector(); + auto reduction_axes_val = axes_const->cast_vector(); for (auto axis : reduction_axes_val) { try @@ -110,11 +120,5 @@ void op::util::LogicalReduction::validate_and_infer_types() result_shape = PartialShape(dims); } - set_input_is_relevant_to_shape(1); - - NODE_VALIDATION_CHECK(this, - get_input_element_type(0).compatible(element::boolean), - "Input element type must be boolean."); - set_output_type(0, element::boolean, result_shape); } diff --git a/ngraph/core/src/op/util/logical_reduction_keep_dims.cpp b/ngraph/core/src/op/util/logical_reduction_keep_dims.cpp index b9ce974de16b24..8a80c596b61300 100644 --- a/ngraph/core/src/op/util/logical_reduction_keep_dims.cpp +++ b/ngraph/core/src/op/util/logical_reduction_keep_dims.cpp @@ -46,18 +46,18 @@ void op::util::LogicalReductionKeepDims::validate_and_infer_types() { const auto input_shape = get_input_partial_shape(0); const auto input_rank = input_shape.rank(); - PartialShape result_shape{PartialShape::dynamic()}; + PartialShape result_shape{PartialShape::dynamic(input_rank)}; - if (input_rank.is_static()) - { - result_shape = PartialShape::dynamic(input_rank); - } + set_input_is_relevant_to_shape(1); + set_output_type(0, get_input_element_type(0), result_shape); - if (input_rank.is_static() && reduction_axes_constant()) + if (input_shape.is_dynamic()) + return; + + if (auto axes_const = get_constant_from_source(input_value(1))) { AxisSet reduction_axes; - auto reduction_axes_val = - as_type(input_value(1).get_node())->cast_vector(); + auto reduction_axes_val = axes_const->cast_vector(); for (auto axis : reduction_axes_val) { try @@ -94,7 +94,7 @@ void op::util::LogicalReductionKeepDims::validate_and_infer_types() } result_shape = PartialShape(dims); } - set_input_is_relevant_to_shape(1); + set_output_type(0, get_input_element_type(0), result_shape); } else diff --git a/ngraph/core/src/op/util/scatter_base.cpp b/ngraph/core/src/op/util/scatter_base.cpp index e4f1d6b6c00520..992642737572df 100644 --- a/ngraph/core/src/op/util/scatter_base.cpp +++ b/ngraph/core/src/op/util/scatter_base.cpp @@ -81,14 +81,19 @@ void op::util::ScatterBase::validate_and_infer_types() data_shape.rank().get_length() - 1, "Updates rank is expected to be indices rank + data rank - 1."); - bool is_axis_constant = op::is_constant(input_value(AXIS).get_node()); + if (data_shape.is_dynamic()) + { + set_input_is_relevant_to_shape(0); + } + set_output_type(0, data_et, data_shape); + + if (data_shape.rank().is_dynamic()) + return; // Get axis value if possible. - if (is_axis_constant && data_shape.rank().is_static()) + if (const auto& axis_const_input = get_constant_from_source(input_value(AXIS))) { bool compatible = true; - const auto axis_const_input = - as_type_ptr(input_value(AXIS).get_node_shared_ptr()); int64_t axis = axis_const_input->cast_vector().at(0); axis = normalize_axis(this, axis, data_shape.rank().get_length()); @@ -125,12 +130,6 @@ void op::util::ScatterBase::validate_and_infer_types() axis, "."); } - - if (data_shape.is_dynamic()) - { - set_input_is_relevant_to_shape(0); - } - set_output_type(0, data_et, data_shape); } bool op::util::ScatterBase::visit_attributes(AttributeVisitor& visitor) diff --git a/ngraph/core/src/op/util/sub_graph_base.cpp b/ngraph/core/src/op/util/sub_graph_base.cpp index 76112e232bca5b..bcb18d80e529a6 100644 --- a/ngraph/core/src/op/util/sub_graph_base.cpp +++ b/ngraph/core/src/op/util/sub_graph_base.cpp @@ -142,6 +142,7 @@ void op::util::SubGraphOp::set_merged_input(const std::shared_ptr& bo input_for_value(initial_value).get_index(), m_body->get_parameter_index(body_parameter), m_body->get_result_index(successive_value))); + validate_and_infer_types(); } void op::util::SubGraphOp::set_invariant_input(const std::shared_ptr& body_parameter, @@ -149,6 +150,7 @@ void op::util::SubGraphOp::set_invariant_input(const std::shared_ptr& { m_input_descriptions.push_back(std::make_shared( input_for_value(value).get_index(), m_body->get_parameter_index(body_parameter))); + validate_and_infer_types(); } Output op::util::SubGraphOp::get_iter_value(const Output& body_value, int64_t iteration) @@ -157,6 +159,7 @@ Output op::util::SubGraphOp::get_iter_value(const Output& body_value m_output_descriptions.push_back(std::make_shared( m_body->get_result_index(body_value), output_index, iteration)); set_output_size(output_index + 1); + validate_and_infer_types(); return Output(shared_from_this(), output_index); } @@ -171,6 +174,7 @@ Output op::util::SubGraphOp::get_concatenated_slices(const Output& b m_output_descriptions.push_back(std::make_shared( m_body->get_result_index(body_value), output_index, start, stride, part_size, end, axis)); set_output_size(output_index + 1); + validate_and_infer_types(); return Output(shared_from_this(), output_index); } @@ -190,6 +194,7 @@ void op::util::SubGraphOp::set_sliced_input(const std::shared_ptr& pa part_size, end, axis)); + validate_and_infer_types(); } Input op::util::SubGraphOp::input_for_value(const Output& value) diff --git a/ngraph/core/src/op/variadic_split.cpp b/ngraph/core/src/op/variadic_split.cpp index c5ca504c67691e..c54f0ce6d48f02 100644 --- a/ngraph/core/src/op/variadic_split.cpp +++ b/ngraph/core/src/op/variadic_split.cpp @@ -63,23 +63,21 @@ void ngraph::op::v1::VariadicSplit::validate_and_infer_types() auto num_outputs = split_lengths_pshape[0].get_length(); auto data = input_value(0); - auto axis_input = input_value(1).get_node_shared_ptr(); - auto split_lengths_input = input_value(2).get_node_shared_ptr(); + auto axis_source = input_value(1); + auto split_lengths_source = input_value(2); auto data_shape = data.get_partial_shape(); const auto& data_type = data.get_element_type(); set_output_size(num_outputs); - if (data_shape.rank().is_static() && op::is_constant(axis_input) && - op::is_constant(split_lengths_input)) + const auto& axis_input_constant = get_constant_from_source(axis_source); + const auto& split_lengths_constant = get_constant_from_source(split_lengths_source); + if (data_shape.rank().is_static() && axis_input_constant && split_lengths_constant) { - const auto axis_input_constant = as_type_ptr(axis_input); auto axis_val = axis_input_constant->cast_vector()[0]; - // Adjust split axis in case of negatives int64_t axis = ngraph::normalize_axis(this, axis_val, data_shape.rank()); - auto split_lengths = - as_type_ptr(split_lengths_input)->cast_vector(); + auto split_lengths = split_lengths_constant->cast_vector(); // Adjust split lengths in case of negatives size_t sum_of_splits = 0; int64_t negative_one = -1; diff --git a/ngraph/core/src/pass/constant_folding.cpp b/ngraph/core/src/pass/constant_folding.cpp index 0c70cde3d011e3..da2cfee15957eb 100644 --- a/ngraph/core/src/pass/constant_folding.cpp +++ b/ngraph/core/src/pass/constant_folding.cpp @@ -15,6 +15,7 @@ //***************************************************************************** #include "ngraph/pass/constant_folding.hpp" +#include #include "ngraph/op/util/sub_graph_base.hpp" #include "ngraph/rt_info.hpp" @@ -25,13 +26,13 @@ NGRAPH_RTTI_DEFINITION(ngraph::pass::ConstantFolding, "ConstantFolding", 0); bool ngraph::pass::ConstantFolding::run_on_function(std::shared_ptr f) { - bool rewritten = false; + bool rewritten = pre_calculated_values_folding(f); for (const auto& node : f->get_ordered_ops()) { if (rewritten) { - node->revalidate_and_infer_types(); + node->validate_and_infer_types(); } OutputVector replacements(node->get_output_size()); @@ -90,3 +91,58 @@ void ngraph::pass::ConstantFolding::copy_runtime_info_to_target_inputs( copy_runtime_info({node, consumer}, consumer); } } + +bool ngraph::pass::ConstantFolding::pre_calculated_values_folding( + const std::shared_ptr& f) +{ + deque> nodes; + set> visited; + for (auto& r : f->get_results()) + nodes.push_back(r); + for (auto& r : f->get_sinks()) + nodes.emplace_back(r); + + bool rewritten = false; + while (!nodes.empty()) + { + auto curr_node = nodes.front(); + nodes.pop_front(); + if (visited.count(curr_node) || is_type(curr_node)) + continue; + visited.insert(curr_node); + + for (auto& input_value : curr_node->input_values()) + { + if (input_value.get_tensor().has_and_set_bound()) + { + auto input_node = input_value.get_node_shared_ptr(); + auto replacement = + std::make_shared(input_value.get_tensor().get_lower_value()); + if (replacement && !is_type(input_node)) + { + if (input_node->get_output_size() == 1) + { + replacement->set_friendly_name(input_node->get_friendly_name()); + } + else + { + replacement->set_friendly_name(input_node->get_friendly_name() + "." + + std::to_string(input_value.get_index())); + } + input_value.replace(replacement); + // Propagate runtime info attributes to replacement consumer nodes + copy_runtime_info_to_target_inputs(input_node, replacement); + + rewritten = true; + } + } + else + { + // continue searching + const auto& input_node = input_value.get_node_shared_ptr(); + nodes.push_front(input_node); + } + } + } + return rewritten; +} diff --git a/ngraph/core/src/pass/graph_rewrite.cpp b/ngraph/core/src/pass/graph_rewrite.cpp index d1980024e47060..85a9189241b33f 100644 --- a/ngraph/core/src/pass/graph_rewrite.cpp +++ b/ngraph/core/src/pass/graph_rewrite.cpp @@ -109,12 +109,14 @@ bool pass::GraphRewrite::run_on_function(shared_ptr f) // it's type // and use it in unordered_map as key for fast MatcherPass search. Otherwise type is unknown // and default algorithm is used. - NodeTypeInfo root_type_info = root->get_type_info(); if (auto p = dynamic_pointer_cast(root)) { if (auto any_type = dynamic_pointer_cast(p)) { - root_type_info = any_type->get_wrapped_type(); + for (const auto& root_type_info : any_type->get_wrapped_types()) + { + type_to_matcher[root_type_info].push_back(matcher_index); + } } else { @@ -122,7 +124,10 @@ bool pass::GraphRewrite::run_on_function(shared_ptr f) break; } } - type_to_matcher[root_type_info].push_back(matcher_index); + else + { + type_to_matcher[root->get_type_info()].push_back(matcher_index); + } // TODO: traverse parents for root_type_info in order to register complete list of matchers // including ones triggered by parent type info. diff --git a/ngraph/core/src/pass/visualize_tree.cpp b/ngraph/core/src/pass/visualize_tree.cpp index 5b1de3e022fa6c..c9cbd8825db2de 100644 --- a/ngraph/core/src/pass/visualize_tree.cpp +++ b/ngraph/core/src/pass/visualize_tree.cpp @@ -314,7 +314,7 @@ static std::string pretty_partial_shape(const PartialShape& shape) } if (shape[i].is_dynamic()) { - ss << "?"; + ss << shape[i]; } else { diff --git a/ngraph/core/src/pattern/op/wrap_type.cpp b/ngraph/core/src/pattern/op/wrap_type.cpp index 74ca1b61bdc5e1..b76403c032950f 100644 --- a/ngraph/core/src/pattern/op/wrap_type.cpp +++ b/ngraph/core/src/pattern/op/wrap_type.cpp @@ -31,7 +31,12 @@ bool pattern::op::WrapType::match_value(Matcher* matcher, const Output& pattern_value, const Output& graph_value) { - if (graph_value.get_node_shared_ptr()->get_type_info().is_castable(get_wrapped_type()) && + if (std::any_of(m_wrapped_types.begin(), + m_wrapped_types.end(), + [&](const NodeTypeInfo& type_info) { + return graph_value.get_node_shared_ptr()->get_type_info().is_castable( + type_info); + }) && m_predicate(graph_value)) { auto& pattern_map = matcher->get_pattern_value_map(); @@ -44,3 +49,17 @@ bool pattern::op::WrapType::match_value(Matcher* matcher, } return false; } + +NodeTypeInfo pattern::op::WrapType::get_wrapped_type() const +{ + if (m_wrapped_types.size() > 1) + { + throw ngraph::ngraph_error("get_wrapped_type() called on WrapType with more than one type"); + } + return m_wrapped_types.at(0); +} + +const std::vector& pattern::op::WrapType::get_wrapped_types() const +{ + return m_wrapped_types; +} \ No newline at end of file diff --git a/ngraph/core/src/runtime/host_tensor.cpp b/ngraph/core/src/runtime/host_tensor.cpp index 2c5de03136e8ed..da996869442eb9 100644 --- a/ngraph/core/src/runtime/host_tensor.cpp +++ b/ngraph/core/src/runtime/host_tensor.cpp @@ -65,10 +65,12 @@ runtime::HostTensor::HostTensor(const std::string& name) { } +NGRAPH_SUPPRESS_DEPRECATED_START runtime::HostTensor::HostTensor(const Output& value) : HostTensor(value.get_element_type(), value.get_partial_shape(), value.get_tensor().get_name()) { } +NGRAPH_SUPPRESS_DEPRECATED_END void runtime::HostTensor::allocate_buffer() { @@ -101,11 +103,13 @@ void runtime::HostTensor::allocate_buffer() } } +NGRAPH_SUPPRESS_DEPRECATED_START runtime::HostTensor::HostTensor(const std::shared_ptr& constant) : HostTensor(constant->output(0).get_tensor().get_name()) { initialize(constant); } +NGRAPH_SUPPRESS_DEPRECATED_END void runtime::HostTensor::initialize(const std::shared_ptr& constant) { diff --git a/ngraph/core/src/runtime/tensor.cpp b/ngraph/core/src/runtime/tensor.cpp index e5da131c7f3781..21e9c328a24d16 100644 --- a/ngraph/core/src/runtime/tensor.cpp +++ b/ngraph/core/src/runtime/tensor.cpp @@ -49,7 +49,9 @@ size_t runtime::Tensor::get_size_in_bytes() const const std::string& runtime::Tensor::get_name() const { + NGRAPH_SUPPRESS_DEPRECATED_START return m_descriptor->get_name(); + NGRAPH_SUPPRESS_DEPRECATED_END } bool runtime::Tensor::get_stale() const diff --git a/ngraph/core/src/validation_util.cpp b/ngraph/core/src/validation_util.cpp index 1fded1643095da..a31b612f4c073f 100644 --- a/ngraph/core/src/validation_util.cpp +++ b/ngraph/core/src/validation_util.cpp @@ -15,6 +15,10 @@ //***************************************************************************** #include +#include +#include +#include +#include #include "ngraph/evaluator.hpp" #include "ngraph/op/concat.hpp" @@ -1194,3 +1198,380 @@ void ngraph::evaluate_nodes(std::map& value_map, evaluator.evaluate(value); } } + +bool could_propagate(const Output& output, std::vector& order) +{ + bool status = true; + + std::deque nodes_to_calculate = {output.get_node()}; + order.push_back(output.get_node()); + + while (status && !nodes_to_calculate.empty()) + { + auto current_node = nodes_to_calculate.front(); + nodes_to_calculate.pop_front(); + + if (current_node->inputs().empty() && !is_type(current_node)) + status = false; + else if (!is_type(current_node) && !is_type(current_node)) + { + // not a leaf, not a shape_of -- continue to search + for (const auto& input_value : current_node->input_values()) + { + const auto& input_node = input_value.get_node(); + order.push_back(input_node); + nodes_to_calculate.push_front(input_node); + } + } + } + return status; +} + +HostTensorPtr evaluate_bound(const Output& output, bool is_upper) +{ + // bound is already set in the tensor + if (is_upper && output.get_tensor().get_upper_value() != nullptr) + return output.get_tensor().get_upper_value(); + if (!is_upper && output.get_tensor().get_lower_value() != nullptr) + return output.get_tensor().get_lower_value(); + + std::vector order; + if (could_propagate(output, order)) + { + reverse(order.begin(), order.end()); + for (const auto& node : order) + { + HostTensorVector outputs; + for (const auto& out : node->outputs()) + outputs.push_back(std::make_shared(out)); + if (is_upper ? node->evaluate_upper(outputs) : node->evaluate_lower(outputs)) + { + const auto& input_values = node->input_values(); + bool same_inputs = std::all_of( + input_values.begin(), input_values.end(), [](const Output& input) { + return input.get_tensor().has_and_set_bound(); + }); + for (size_t i = 0; i < outputs.size(); ++i) + { + // TODO: should we skip setting value for tensors that have only one consumer? + if ((same_inputs || is_upper) && + node->get_output_tensor(i).get_upper_value() == nullptr) + node->get_output_tensor(i).set_upper_value(outputs[i]); + if ((same_inputs || !is_upper) && + node->get_output_tensor(i).get_lower_value() == nullptr) + node->get_output_tensor(i).set_lower_value(outputs[i]); + } + for (const auto& input : input_values) + if (input.get_target_inputs().size() == 1) + input.get_tensor().invalidate_values(); + } + else + { + break; + } + } + } + if (is_upper) + return output.get_tensor().get_upper_value(); + else + return output.get_tensor().get_lower_value(); +} + +HostTensorPtr ngraph::evaluate_lower_bound(const Output& output) +{ + return evaluate_bound(output, false); +} + +HostTensorPtr ngraph::evaluate_upper_bound(const Output& output) +{ + return evaluate_bound(output, true); +} + +pair ngraph::evaluate_both_bounds(const Output& output) +{ + return {evaluate_lower_bound(output), evaluate_upper_bound(output)}; +} + +bool ngraph::evaluate_as_partial_shape(const Output& output, PartialShape& pshape) +{ + HostTensorPtr lb, ub; + std::tie(lb, ub) = evaluate_both_bounds(output); + bool shape_defined = false; + if (lb && ub) + { + const auto lower_bound = std::make_shared(lb)->cast_vector(); + const auto upper_bound = std::make_shared(ub)->cast_vector(); + NGRAPH_CHECK(lower_bound.size() == upper_bound.size()); + vector resulting_pshape(lower_bound.size()); + for (size_t i = 0; i < lower_bound.size(); ++i) + { + NGRAPH_CHECK(lower_bound[i] >= 0 && upper_bound[i] >= 0); + resulting_pshape[i] = {lower_bound[i], upper_bound[i]}; + } + pshape = PartialShape(resulting_pshape); + shape_defined = true; + } + return shape_defined; +} + +bool default_bound_evaluator(const Node* node, const HostTensorVector& output_values, bool is_upper) +{ + HostTensorVector input_tensors; + for (const auto& input : node->input_values()) + { + if (auto bound = is_upper ? input.get_tensor().get_upper_value() + : input.get_tensor().get_lower_value()) + input_tensors.push_back(bound); + else + return false; + } + return node->evaluate(output_values, input_tensors); +} + +bool ngraph::default_lower_bound_evaluator(const Node* node, const HostTensorVector& output_values) +{ + return default_bound_evaluator(node, output_values, false); +} + +bool ngraph::default_upper_bound_evaluator(const Node* node, const HostTensorVector& output_values) +{ + return default_bound_evaluator(node, output_values, true); +} + +shared_ptr ngraph::get_constant_max_of_type(element::Type_t t) +{ +#define NGRAPH_TYPE_TO_MAX_CONST(t) \ + case t: \ + return op::Constant::create( \ + t, {}, {std::numeric_limits::value_type>::max()}); \ + break + + switch (t) + { + NGRAPH_TYPE_TO_MAX_CONST(element::boolean); + NGRAPH_TYPE_TO_MAX_CONST(element::bf16); + NGRAPH_TYPE_TO_MAX_CONST(element::f16); + NGRAPH_TYPE_TO_MAX_CONST(element::f32); + NGRAPH_TYPE_TO_MAX_CONST(element::f64); + NGRAPH_TYPE_TO_MAX_CONST(element::i8); + NGRAPH_TYPE_TO_MAX_CONST(element::i16); + NGRAPH_TYPE_TO_MAX_CONST(element::i32); + NGRAPH_TYPE_TO_MAX_CONST(element::i64); + NGRAPH_TYPE_TO_MAX_CONST(element::u1); + NGRAPH_TYPE_TO_MAX_CONST(element::u8); + NGRAPH_TYPE_TO_MAX_CONST(element::u16); + NGRAPH_TYPE_TO_MAX_CONST(element::u32); + NGRAPH_TYPE_TO_MAX_CONST(element::u64); + + case element::undefined: + case element::dynamic: + default: return nullptr; + } +} + +shared_ptr ngraph::get_constant_min_of_type(element::Type_t t) +{ +#define NGRAPH_TYPE_TO_MIN_CONST(t) \ + case t: \ + return op::Constant::create( \ + t, {}, {std::numeric_limits::value_type>::min()}); \ + break + + switch (t) + { + NGRAPH_TYPE_TO_MIN_CONST(element::boolean); + NGRAPH_TYPE_TO_MIN_CONST(element::bf16); + NGRAPH_TYPE_TO_MIN_CONST(element::f16); + NGRAPH_TYPE_TO_MIN_CONST(element::f32); + NGRAPH_TYPE_TO_MIN_CONST(element::f64); + NGRAPH_TYPE_TO_MIN_CONST(element::i8); + NGRAPH_TYPE_TO_MIN_CONST(element::i16); + NGRAPH_TYPE_TO_MIN_CONST(element::i32); + NGRAPH_TYPE_TO_MIN_CONST(element::i64); + NGRAPH_TYPE_TO_MIN_CONST(element::u1); + NGRAPH_TYPE_TO_MIN_CONST(element::u8); + NGRAPH_TYPE_TO_MIN_CONST(element::u16); + NGRAPH_TYPE_TO_MIN_CONST(element::u32); + NGRAPH_TYPE_TO_MIN_CONST(element::u64); + + case element::undefined: + case element::dynamic: + default: return nullptr; + } +} + +HostTensorPtr equality_mask(const HostTensorPtr& tensor, const shared_ptr& constant) +{ + auto mask = std::make_shared(element::boolean, tensor->get_shape()); + const auto& param = + std::make_shared(tensor->get_element_type(), tensor->get_shape()); + op::v1::Equal(param, constant, ngraph::op::AutoBroadcastSpec::NUMPY) + .evaluate({mask}, {tensor, std::make_shared(constant)}); + return mask; +} + +HostTensorPtr or_tensor(const HostTensorPtr& lhs, const HostTensorPtr& rhs) +{ + auto result = std::make_shared(element::boolean, lhs->get_shape()); + op::v1::LogicalOr(std::make_shared(lhs->get_element_type(), lhs->get_shape()), + std::make_shared(rhs->get_element_type(), rhs->get_shape()), + ngraph::op::AutoBroadcastSpec::NUMPY) + .evaluate({result}, {lhs, rhs}); + return result; +} + +bool ngraph::interval_bound_evaluator(const Node* node, + const HostTensorVector& lower_output_values, + const HostTensorVector& upper_output_values) +{ + // TODO: relax for n inputs ? + NGRAPH_CHECK(lower_output_values.size() == upper_output_values.size()); + NGRAPH_CHECK(node->get_input_size() == 2); + + const auto num_of_outputs = node->get_output_size(); + std::shared_ptr low_0 = evaluate_lower_bound(node->get_input_source_output(0)); + std::shared_ptr low_1 = evaluate_lower_bound(node->get_input_source_output(1)); + std::shared_ptr up_0 = evaluate_upper_bound(node->get_input_source_output(0)); + std::shared_ptr up_1 = evaluate_upper_bound(node->get_input_source_output(1)); + std::set input_variants = { + {low_0, low_1}, {low_0, up_1}, {up_0, low_1}, {up_0, up_1}}; + + for (const auto& variant_of_input_vector : input_variants) + for (const auto& input_tensor : variant_of_input_vector) + if (input_tensor == nullptr) + return false; + + if (input_variants.size() == 1) + return node->evaluate(upper_output_values, *input_variants.begin()) && + node->evaluate(lower_output_values, *input_variants.begin()); + + auto zero = op::v0::Constant::create(element::i64, {1}, {0}); + std::vector unsqueezed_output_variants; + for (auto& input_variant : input_variants) + { + HostTensorVector vector_of_output_variants; + for (const auto& output : lower_output_values) + vector_of_output_variants.push_back(std::make_shared( + output->get_element_type(), output->get_partial_shape())); + + node->evaluate(vector_of_output_variants, input_variant); + + HostTensorVector vector_of_unsqueezed_output_variants; + for (const auto& output : vector_of_output_variants) + { + if (!output) + return false; + auto unsqueezed_shape = output->get_shape(); + unsqueezed_shape.insert(unsqueezed_shape.begin(), 1); + const auto unsqueezed = + make_shared(output->get_element_type(), unsqueezed_shape); + op::v0::Unsqueeze().evaluate({unsqueezed}, {output, make_shared(zero)}); + vector_of_unsqueezed_output_variants.push_back(unsqueezed); + } + unsqueezed_output_variants.push_back(vector_of_unsqueezed_output_variants); + } + + auto input_0_maximum_value = get_constant_max_of_type(low_0->get_element_type()); + auto input_1_maximum_value = get_constant_max_of_type(low_1->get_element_type()); + if (input_0_maximum_value == nullptr || input_1_maximum_value == nullptr) + return false; + + auto input_0_low_dyn_mask = equality_mask(low_0, input_0_maximum_value); + auto input_0_up_dyn_mask = equality_mask(up_0, input_0_maximum_value); + auto input_1_low_dyn_mask = equality_mask(low_1, input_1_maximum_value); + auto input_1_up_dyn_mask = equality_mask(up_1, input_1_maximum_value); + + auto final_input_dyn_mask = or_tensor(or_tensor(input_0_low_dyn_mask, input_0_up_dyn_mask), + or_tensor(input_1_low_dyn_mask, input_1_up_dyn_mask)); + + bool fully_defined = true; + for (size_t i = 0; i < num_of_outputs; ++i) + { + HostTensorVector all_variants_for_ith_output; + for (const auto& unsqueezed_output_variant : unsqueezed_output_variants) + all_variants_for_ith_output.push_back(unsqueezed_output_variant[i]); + + auto concated_shape = all_variants_for_ith_output[0]->get_shape(); + concated_shape[0] = all_variants_for_ith_output.size(); + auto concated = make_shared(all_variants_for_ith_output[0]->get_element_type(), + concated_shape); + auto concat = op::Concat(); + concat.set_axis(0); + concat.evaluate({concated}, all_variants_for_ith_output); + + auto fake_param = make_shared( + all_variants_for_ith_output[0]->get_element_type(), concated_shape); + auto reduce_min_op = op::v1::ReduceMin(fake_param, zero, false); + reduce_min_op.evaluate({lower_output_values[i]}, {concated, make_shared(zero)}); + auto reduce_max_op = op::v1::ReduceMax(fake_param, zero, false); + reduce_max_op.evaluate({upper_output_values[i]}, {concated, make_shared(zero)}); + + if (upper_output_values[i] == nullptr) + fully_defined = false; + else + { + auto output_maximum_value = + get_constant_max_of_type(upper_output_values[i]->get_element_type()); + op::v1::Select().evaluate({upper_output_values[i]}, + {final_input_dyn_mask, + std::make_shared(output_maximum_value), + upper_output_values[i]}); + node->get_output_tensor(i).set_upper_value(upper_output_values[i]); + } + if (lower_output_values[i] == nullptr) + fully_defined = false; + else + { + auto output_minimum_value = + op::Constant::create(lower_output_values[i]->get_element_type(), {}, {0}); + // Can not set to get_constant_min_of_type(lower_output_values[i]->get_element_type()) + // yet + op::v1::Select().evaluate({lower_output_values[i]}, + {final_input_dyn_mask, + std::make_shared(output_minimum_value), + lower_output_values[i]}); + node->get_output_tensor(i).set_lower_value(lower_output_values[i]); + } + } + return fully_defined; +} + +bool ngraph::host_tensor_is_positive(const HostTensorPtr& bound) +{ + const auto bound_constant = std::make_shared(bound); + const auto zero_constant = op::Constant::create(bound->get_element_type(), {1}, {0}); + OutputVector greater(1); + bool folded = std::make_shared(bound_constant, zero_constant) + ->constant_fold(greater, {bound_constant, zero_constant}); + NGRAPH_CHECK(folded); + + auto axes_vector = std::vector(greater[0].get_shape().size()); + std::iota(axes_vector.begin(), axes_vector.end(), 0); + const auto axes = op::Constant::create(element::i64, {axes_vector.size()}, axes_vector); + OutputVector all(1); + folded = std::make_shared(greater[0], axes) + ->constant_fold(all, {greater[0], axes}); + NGRAPH_CHECK(folded && is_type(all[0].get_node_shared_ptr())); + const auto result = + std::dynamic_pointer_cast(all[0].get_node_shared_ptr())->cast_vector(); + NGRAPH_CHECK(all[0].get_shape() == Shape{}); + return result[0]; +} + +bool ngraph::has_and_set_equal_bounds(const Output& source) +{ + if (op::is_constant(source.get_node_shared_ptr())) + return true; + HostTensorPtr lb, ub; + std::tie(lb, ub) = evaluate_both_bounds(source); + return lb && lb == ub; +} + +shared_ptr ngraph::get_constant_from_source(const Output& source) +{ + if (!has_and_set_equal_bounds(source)) + return nullptr; + if (const auto& c = as_type_ptr(source.get_node_shared_ptr())) + return c; + return std::make_shared(source.get_tensor().get_upper_value()); +} \ No newline at end of file diff --git a/ngraph/frontend/onnx_import/src/core/graph.cpp b/ngraph/frontend/onnx_import/src/core/graph.cpp index 278dfad457cc55..a9543e440e51ad 100644 --- a/ngraph/frontend/onnx_import/src/core/graph.cpp +++ b/ngraph/frontend/onnx_import/src/core/graph.cpp @@ -20,6 +20,7 @@ #include #include "core/graph.hpp" +#include "core/null_node.hpp" #include "exceptions.hpp" #include "ngraph/log.hpp" #include "ngraph/node.hpp" @@ -272,7 +273,21 @@ namespace ngraph break; } - ng_node_vector[i].get_node()->set_friendly_name(onnx_node.output(i)); + auto onnx_node_name = onnx_node.get_name(); + if (onnx_node_name.empty()) + { + ng_node_vector[i].get_node()->set_friendly_name(onnx_node.output(i)); + } + else + { + ng_node_vector[i].get_node()->set_friendly_name(onnx_node.get_name()); + } + + // null node does not have tensor + if (!ngraph::op::is_null(ng_node_vector[i])) + { + ng_node_vector[i].get_tensor().set_names({onnx_node.output(i)}); + } } } diff --git a/ngraph/frontend/onnx_import/src/core/value_info.hpp b/ngraph/frontend/onnx_import/src/core/value_info.hpp index 10b84851f06623..961b4357c01b1e 100644 --- a/ngraph/frontend/onnx_import/src/core/value_info.hpp +++ b/ngraph/frontend/onnx_import/src/core/value_info.hpp @@ -104,6 +104,7 @@ namespace ngraph auto parameter = std::make_shared(get_element_type(), get_shape()); parameter->set_friendly_name(get_name()); + parameter->get_output_tensor(0).set_names({get_name()}); return parameter; } diff --git a/ngraph/frontend/onnx_import/src/default_opset.hpp b/ngraph/frontend/onnx_import/src/default_opset.hpp index 551406a976fd96..f6c4eb75792a80 100644 --- a/ngraph/frontend/onnx_import/src/default_opset.hpp +++ b/ngraph/frontend/onnx_import/src/default_opset.hpp @@ -1,9 +1,9 @@ -#include "ngraph/opsets/opset5.hpp" +#include "ngraph/opsets/opset6.hpp" namespace ngraph { namespace onnx_import { - namespace default_opset = ngraph::opset5; + namespace default_opset = ngraph::opset6; } } diff --git a/ngraph/frontend/onnx_import/src/op/gather_elements.hpp b/ngraph/frontend/onnx_import/src/op/gather_elements.hpp index 4e7ea62cefb524..c23b31dd1d3ed8 100644 --- a/ngraph/frontend/onnx_import/src/op/gather_elements.hpp +++ b/ngraph/frontend/onnx_import/src/op/gather_elements.hpp @@ -16,6 +16,7 @@ #pragma once +#include "default_opset.hpp" #include "ngraph/output_vector.hpp" namespace ngraph @@ -33,7 +34,7 @@ namespace ngraph auto indices = ng_inputs.at(1); auto axis = node.get_attribute_value("axis", 0); - return {std::make_shared(data, indices, axis)}; + return {std::make_shared(data, indices, axis)}; } } // namespace set_1 } // namespace op diff --git a/ngraph/frontend/onnx_import/src/op/instance_norm.cpp b/ngraph/frontend/onnx_import/src/op/instance_norm.cpp index 3594e12c1f1a08..1eca715526d834 100644 --- a/ngraph/frontend/onnx_import/src/op/instance_norm.cpp +++ b/ngraph/frontend/onnx_import/src/op/instance_norm.cpp @@ -93,7 +93,8 @@ namespace ngraph const auto reduction_axes = common::get_monotonic_range_along_node_rank(data, 2); - auto mvn = std::make_shared(data, false, true, epsilon); + auto mvn = std::make_shared( + data, reduction_axes, true, epsilon, ngraph::op::MVNEpsMode::INSIDE_SQRT); std::shared_ptr data_shape_node; if (data_pshape.is_static()) diff --git a/ngraph/frontend/onnx_import/src/op/loop.cpp b/ngraph/frontend/onnx_import/src/op/loop.cpp index eec315049d5eef..77ba8186b2af57 100644 --- a/ngraph/frontend/onnx_import/src/op/loop.cpp +++ b/ngraph/frontend/onnx_import/src/op/loop.cpp @@ -175,7 +175,7 @@ namespace ngraph body_inputs[0]); // current iteration body input const auto body = std::make_shared(body_outputs, body_params); auto loop = std::make_shared(trip_count, termination_cond); - ngraph::opset5::Loop::SpecialBodyPorts spec_ports{0, 0}; + default_opset::Loop::SpecialBodyPorts spec_ports{0, 0}; loop->set_special_body_ports(spec_ports); loop->set_function(body); diff --git a/ngraph/frontend/onnx_import/src/op/mean_variance_normalization.cpp b/ngraph/frontend/onnx_import/src/op/mean_variance_normalization.cpp index 55abf537ba9852..5967a89e76ed8f 100644 --- a/ngraph/frontend/onnx_import/src/op/mean_variance_normalization.cpp +++ b/ngraph/frontend/onnx_import/src/op/mean_variance_normalization.cpp @@ -19,6 +19,7 @@ #include "default_opset.hpp" #include "ngraph/axis_set.hpp" #include "ngraph/op/mvn.hpp" +#include "ngraph/opsets/opset5.hpp" #include "ngraph/validation_util.hpp" #include "op/mean_variance_normalization.hpp" @@ -38,7 +39,7 @@ namespace ngraph bool normalize_variance = node.get_attribute_value("normalize_variance", 1); - return {std::make_shared( + return {std::make_shared( data, across_channels, normalize_variance)}; } @@ -49,11 +50,14 @@ namespace ngraph OutputVector mean_variance_normalization(const Node& node) { auto data = node.get_ng_inputs().at(0); - auto axes = node.get_attribute_value>("axes", {0, 2, 3}); + auto axes = + node.get_attribute_value>("axes", {0, 2, 3}); const std::vector normalized_axes = ngraph::normalize_axes( node.get_description(), axes, data.get_partial_shape().rank()); - - return {std::make_shared(data, AxisSet(normalized_axes))}; + auto const_axes = default_opset::Constant::create( + element::i64, Shape{normalized_axes.size()}, normalized_axes); + return {std::make_shared( + data, const_axes, true, 1e-09, ngraph::op::MVNEpsMode::OUTSIDE_SQRT)}; } } // namespace set_9 diff --git a/ngraph/frontend/onnx_import/src/op/org.openvinotoolkit/group_norm.cpp b/ngraph/frontend/onnx_import/src/op/org.openvinotoolkit/group_norm.cpp index 9119866fc8378c..24a795bcc57727 100644 --- a/ngraph/frontend/onnx_import/src/op/org.openvinotoolkit/group_norm.cpp +++ b/ngraph/frontend/onnx_import/src/op/org.openvinotoolkit/group_norm.cpp @@ -19,6 +19,7 @@ #include "ngraph/builder/reduce_ops.hpp" #include "ngraph/builder/split.hpp" #include "ngraph/node.hpp" +#include "ngraph/opsets/opset5.hpp" #include "onnx_import/core/node.hpp" #include "utils/common.hpp" #include "utils/reshape.hpp" @@ -84,7 +85,7 @@ namespace ngraph data, detail::create_group_norm_shape(data, num_groups), true); auto mvn = - std::make_shared(data_reshaped, false, true, eps); + std::make_shared(data_reshaped, false, true, eps); std::shared_ptr result = std::make_shared(mvn, data_shape_node, true); diff --git a/ngraph/python/requirements_test.txt b/ngraph/python/requirements_test.txt index 0126af5d7abc83..0536bb70b2d955 100644 --- a/ngraph/python/requirements_test.txt +++ b/ngraph/python/requirements_test.txt @@ -2,7 +2,7 @@ flake8==3.8.4 flake8-comprehensions==3.3.0 flake8-docstrings==1.5.0 flake8-quotes==3.2.0 -onnx==1.8.0 +onnx==1.8.1 pydocstyle==5.1.1 pytest==6.1.2 retrying==1.3.3 diff --git a/ngraph/python/src/ngraph/__init__.py b/ngraph/python/src/ngraph/__init__.py index b779e49228d394..bc41e932e54eb4 100644 --- a/ngraph/python/src/ngraph/__init__.py +++ b/ngraph/python/src/ngraph/__init__.py @@ -13,7 +13,7 @@ # See the License for the specific language governing permissions and # limitations under the License. # ****************************************************************************** -"""! ngraph module namespace, exposing factory functions for all ops and other classes.""" +"""ngraph module namespace, exposing factory functions for all ops and other classes.""" # noqa: F401 from pkg_resources import get_distribution, DistributionNotFound @@ -56,6 +56,7 @@ from ngraph.opset6 import cos from ngraph.opset6 import cosh from ngraph.opset6 import ctc_greedy_decoder +from ngraph.opset6 import ctc_greedy_decoder_seq_len from ngraph.opset6 import ctc_loss from ngraph.opset6 import cum_sum from ngraph.opset6 import cum_sum as cumsum diff --git a/ngraph/python/src/ngraph/exceptions.py b/ngraph/python/src/ngraph/exceptions.py index 09d3904c21d6dd..704c6925e7cf7e 100644 --- a/ngraph/python/src/ngraph/exceptions.py +++ b/ngraph/python/src/ngraph/exceptions.py @@ -13,16 +13,16 @@ # See the License for the specific language governing permissions and # limitations under the License. # ****************************************************************************** -"""! ngraph exceptions hierarchy. All exceptions are descendants of NgraphError.""" +"""ngraph exceptions hierarchy. All exceptions are descendants of NgraphError.""" class NgraphError(Exception): - """! Base class for Ngraph exceptions.""" + """Base class for Ngraph exceptions.""" class UserInputError(NgraphError): - """! User provided unexpected input.""" + """User provided unexpected input.""" class NgraphTypeError(NgraphError, TypeError): - """! Type mismatch error.""" + """Type mismatch error.""" diff --git a/ngraph/python/src/ngraph/helpers.py b/ngraph/python/src/ngraph/helpers.py index 7b3f6447fb5884..8c7a7ef6600bfe 100644 --- a/ngraph/python/src/ngraph/helpers.py +++ b/ngraph/python/src/ngraph/helpers.py @@ -13,14 +13,14 @@ # See the License for the specific language governing permissions and # limitations under the License. # ****************************************************************************** -"""! nGraph helper functions.""" +"""nGraph helper functions.""" from ngraph.impl import Function from openvino.inference_engine import IENetwork def function_from_cnn(cnn_network: IENetwork) -> Function: - """! Get nGraph function from Inference Engine CNN network.""" + """Get nGraph function from Inference Engine CNN network.""" capsule = cnn_network._get_function_capsule() ng_function = Function.from_capsule(capsule) return ng_function diff --git a/ngraph/python/src/ngraph/impl/op/__init__.py b/ngraph/python/src/ngraph/impl/op/__init__.py index 0baae3aea51fbb..159563c034e3e2 100644 --- a/ngraph/python/src/ngraph/impl/op/__init__.py +++ b/ngraph/python/src/ngraph/impl/op/__init__.py @@ -24,7 +24,7 @@ from _pyngraph.op import Constant -""" Retrieve Constant inner data. +"""Retrieve Constant inner data. Internally uses PyBind11 Numpy's buffer protocol. diff --git a/ngraph/python/src/ngraph/opset1/ops.py b/ngraph/python/src/ngraph/opset1/ops.py index 3156397e193667..d37d1ba840fa9e 100644 --- a/ngraph/python/src/ngraph/opset1/ops.py +++ b/ngraph/python/src/ngraph/opset1/ops.py @@ -14,7 +14,7 @@ # limitations under the License. # ****************************************************************************** -"""! Factory functions for all ngraph ops.""" +"""Factory functions for all ngraph ops.""" from typing import Callable, Iterable, List, Optional, Set, Union import numpy as np @@ -60,7 +60,7 @@ @unary_op def absolute(node: NodeInput, name: Optional[str] = None) -> Node: - """! Return node which applies f(x) = abs(x) to the input node element-wise. + """Return node which applies f(x) = abs(x) to the input node element-wise. @param node: One of: input node, array or scalar. @param name: Optional new name for output node. @@ -71,7 +71,7 @@ def absolute(node: NodeInput, name: Optional[str] = None) -> Node: @unary_op def acos(node: NodeInput, name: Optional[str] = None) -> Node: - """! Apply inverse cosine function on the input node element-wise. + """Apply inverse cosine function on the input node element-wise. @param node: One of: input node, array or scalar. @param name: Optional new name for output node. @@ -87,7 +87,7 @@ def add( auto_broadcast: str = "NUMPY", name: Optional[str] = None, ) -> Node: - """! Return node which applies f(x) = A+B to the input nodes element-wise.""" + """Return node which applies f(x) = A+B to the input nodes element-wise.""" return _get_node_factory_opset1().create( "Add", [left_node, right_node], {"auto_broadcast": auto_broadcast.upper()} ) @@ -95,7 +95,7 @@ def add( @unary_op def asin(node: NodeInput, name: Optional[str] = None) -> Node: - """! Apply inverse sine function on the input node element-wise. + """Apply inverse sine function on the input node element-wise. @param node: One of: input node, array or scalar. @param name: Optional new name for output node. @@ -106,7 +106,7 @@ def asin(node: NodeInput, name: Optional[str] = None) -> Node: @unary_op def atan(node: NodeInput, name: Optional[str] = None) -> Node: - """! Apply inverse tangent function on the input node element-wise. + """Apply inverse tangent function on the input node element-wise. @param node: One of: input node, array or scalar. @param name: Optional new name for output node. @@ -127,7 +127,7 @@ def avg_pool( auto_pad: Optional[str] = None, name: Optional[str] = None, ) -> Node: - """! Return average pooling node. + """Return average pooling node. @param data_batch: The input node providing data. @param strides: The window movement strides. @@ -170,7 +170,7 @@ def batch_norm_inference( epsilon: float, name: Optional[str] = None, ) -> Node: - """! Perform layer normalizes a input tensor by mean and variance with appling scale and offset. + """Perform layer normalizes a input tensor by mean and variance with appling scale and offset. @param data: The input tensor with data for normalization. @param gamma: The scalar scaling for normalized value. @@ -199,7 +199,7 @@ def binary_convolution( auto_pad: str = "EXPLICIT", name: Optional[str] = None, ) -> Node: - """! Create node performing convolution with binary weights, binary input and integer output. + """Create node performing convolution with binary weights, binary input and integer output. @param data: The node providing data batch tensor. @param filter: The node providing filters tensor. @@ -236,7 +236,7 @@ def broadcast( mode: str = "NUMPY", name: Optional[str] = None, ) -> Node: - """! Create a node which broadcasts the input node's values along specified axes to a desired shape. + """Create a node which broadcasts the input node's values along specified axes to a desired shape. @param data: The node with input tensor data. @param target_shape: The node with a new shape we want to broadcast tensor to. @@ -262,7 +262,7 @@ def ctc_greedy_decoder( merge_repeated: bool = True, name: Optional[str] = None, ) -> Node: - """! Perform greedy decoding on the logits given in input (best path). + """Perform greedy decoding on the logits given in input (best path). @param data: Logits on which greedy decoding is performed. @param sequence_mask: The tensor with sequence masks for each sequence in the batch. @@ -278,7 +278,7 @@ def ctc_greedy_decoder( @unary_op def ceiling(node: NodeInput, name: Optional[str] = None) -> Node: - """! Return node which applies ceiling to the input node element-wise. + """Return node which applies ceiling to the input node element-wise. @param node: The node providing data to ceiling operation. @param name: Optional name for output node. @@ -291,7 +291,13 @@ def ceiling(node: NodeInput, name: Optional[str] = None) -> Node: def clamp( data: NodeInput, min_value: ScalarData, max_value: ScalarData, name: Optional[str] = None ) -> Node: - """! Perform clamp element-wise on data from input node. + """Perform clamp element-wise on data from input node. + + @param data: Input tensor. One of: input node, array or scalar. + @param min_value: The lower bound of the range. Scalar value. + @param max_value: The upper bound of the range. Scalar value. + @param name: Optional output node name. + @return The new node performing a clamp operation on its input data element-wise. Performs a clipping operation on an input value between a pair of boundary values. @@ -302,18 +308,12 @@ def clamp( Clamp uses the following logic: - ~~~~~~~~~~~~~~~~~~~~~~~~{.py} + @code{.py} if data < min_value: data=min_value elif data > max_value: data=max_value - ~~~~~~~~~~~~~~~~~~~~~~~~ - - @param data: Input tensor. One of: input node, array or scalar. - @param min_value: The lower bound of the range. Scalar value. - @param max_value: The upper bound of the range. Scalar value. - @param name: Optional output node name. - @return The new node performing a clamp operation on its input data element-wise. + @endcode """ return _get_node_factory_opset1().create( "Clamp", [as_node(data)], {"min": min_value, "max": max_value} @@ -322,7 +322,7 @@ def clamp( @nameable_op def concat(nodes: List[NodeInput], axis: int, name: Optional[str] = None) -> Node: - """! Concatenate input nodes into single new node along specified axis. + """Concatenate input nodes into single new node along specified axis. @param nodes: The nodes we want concatenate into single new node. @param axis: The axis along which we want to concatenate input nodes. @@ -334,7 +334,7 @@ def concat(nodes: List[NodeInput], axis: int, name: Optional[str] = None) -> Nod @nameable_op def constant(value: NumericData, dtype: NumericType = None, name: Optional[str] = None) -> Constant: - """! Create a Constant node from provided value. + """Create a Constant node from provided value. @param value: One of: array of values or scalar to initialize node with. @param dtype: The data type of provided data. @@ -348,7 +348,7 @@ def constant(value: NumericData, dtype: NumericType = None, name: Optional[str] def convert( data: NodeInput, destination_type: Union[str, NumericType], name: Optional[str] = None ) -> Node: - """! Return node which casts input node values to specified type. + """Return node which casts input node values to specified type. @param data: Node which produces the input tensor. @param destination_type: Provides the target type for the conversion. @@ -364,7 +364,7 @@ def convert( @binary_op def convert_like(data: NodeInput, like: NodeInput, name: Optional[str] = None) -> Node: - """! Return node which casts data node values to the type of another node. + """Return node which casts data node values to the type of another node. @param data: Node which produces the input tensor @param like: Node which provides the target type information for the conversion @@ -385,7 +385,7 @@ def convolution( auto_pad: str = "EXPLICIT", name: Optional[str] = None, ) -> Node: - """! Return node performing batched convolution operation. + """Return node performing batched convolution operation. @param data: The node providing data batch tensor. @param filter: The node providing filters tensor. @@ -423,7 +423,7 @@ def convolution_backprop_data( output_padding: Optional[List[int]] = None, name: Optional[str] = None, ) -> Node: - """! Create node performing a batched-convolution backprop data operation. + """Create node performing a batched-convolution backprop data operation. @param data: The node producing data from forward-prop @param filters: The node producing the filters from forward-prop. @@ -469,7 +469,7 @@ def convolution_backprop_data( @unary_op def cos(node: NodeInput, name: Optional[str] = None) -> Node: - """! Apply cosine function on the input node element-wise. + """Apply cosine function on the input node element-wise. @param node: One of: input node, array or scalar. @param name: Optional new name for output node. @@ -480,7 +480,7 @@ def cos(node: NodeInput, name: Optional[str] = None) -> Node: @unary_op def cosh(node: NodeInput, name: Optional[str] = None) -> Node: - """! Apply hyperbolic cosine function on the input node element-wise. + """Apply hyperbolic cosine function on the input node element-wise. @param node: One of: input node, array or scalar. @param name: Optional new name for output node. @@ -503,7 +503,7 @@ def deformable_convolution( deformable_group: int = 1, name: Optional[str] = None, ) -> Node: - """! Create node performing deformable convolution. + """Create node performing deformable convolution. @param data: The node providing data batch tensor. @param filter: The node providing filters tensor. @@ -548,7 +548,7 @@ def deformable_psroi_pooling( offsets: Optional[NodeInput] = None, name: Optional[str] = None, ) -> Node: - """! Return node performing DeformablePSROIPooling operation. + """Return node performing DeformablePSROIPooling operation. DeformablePSROIPooling computes position-sensitive pooling on regions of interest specified by input. @@ -589,7 +589,7 @@ def deformable_psroi_pooling( @nameable_op def depth_to_space(node: Node, mode: str, block_size: int = 1, name: str = None) -> Node: - """! Rearranges input tensor from depth into blocks of spatial data. + """Rearranges input tensor from depth into blocks of spatial data. Values from the height and width dimensions are moved to the depth dimension. @@ -626,7 +626,7 @@ def detection_output( aux_box_preds: NodeInput = None, name: Optional[str] = None, ) -> Node: - """! Generate the detection output using information on location and confidence predictions. + """Generate the detection output using information on location and confidence predictions. @param box_logits: The 2D input tensor with box logits. @param class_preds: The 2D input tensor with class predictions. @@ -635,6 +635,7 @@ def detection_output( @param aux_class_preds: The 2D input tensor with additional class predictions information. @param aux_box_preds: The 2D input tensor with additional box predictions information. @param name: Optional name for the output node. + @return Node representing DetectionOutput operation. Available attributes are: @@ -726,7 +727,7 @@ def detection_output( Required: no Example of attribute dictionary: - ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~{.py} + @code{.py} # just required ones attrs = { 'num_classes': 85, @@ -743,11 +744,9 @@ def detection_output( 'input_height': [32], 'input_width': [32], } - ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ + @endcode Optional attributes which are absent from dictionary will be set with corresponding default. - - @return Node representing DetectionOutput operation. """ requirements = [ ("num_classes", True, np.integer, is_positive_value), @@ -786,7 +785,7 @@ def divide( auto_broadcast: str = "NUMPY", name: Optional[str] = None, ) -> Node: - """! Return node which applies f(x) = A/B to the input nodes element-wise. + """Return node which applies f(x) = A/B to the input nodes element-wise. @param left_node: The node providing dividend data. @param right_node: The node providing divisor data. @@ -801,7 +800,7 @@ def divide( @nameable_op def elu(data: NodeInput, alpha: NumericType, name: Optional[str] = None) -> Node: - """! Perform Exponential Linear Unit operation element-wise on data from input node. + """Perform Exponential Linear Unit operation element-wise on data from input node. Computes exponential linear: alpha * (exp(data) - 1) if < 0, data otherwise. @@ -823,7 +822,7 @@ def equal( auto_broadcast: str = "NUMPY", name: Optional[str] = None, ) -> Node: - """! Return node which checks if input nodes are equal element-wise. + """Return node which checks if input nodes are equal element-wise. @param left_node: The first input node for equal operation. @param right_node: The second input node for equal operation. @@ -839,7 +838,7 @@ def equal( @unary_op def erf(node: NodeInput, name: Optional[str] = None) -> Node: - """! Return node which calculates Gauss error function element-wise with given tensor. + """Return node which calculates Gauss error function element-wise with given tensor. @param node: The node providing data for operation. @param name: The optional name for new output node. @@ -850,7 +849,7 @@ def erf(node: NodeInput, name: Optional[str] = None) -> Node: @unary_op def exp(node: NodeInput, name: Optional[str] = None) -> Node: - """! Return node which applies exponential function to the input node element-wise. + """Return node which applies exponential function to the input node element-wise. @param node: The node providing data for operation. @param name: The optional name for new output node. @@ -870,18 +869,28 @@ def fake_quantize( auto_broadcast: str = "NUMPY", name: Optional[str] = None, ) -> Node: - r"""! Perform an element-wise linear quantization on input data. + r"""Perform an element-wise linear quantization on input data. + + @param data: The node with data tensor. + @param input_low: The node with the minimum for input values. + @param input_high: The node with the maximum for input values. + @param output_low: The node with the minimum quantized value. + @param output_high: The node with the maximum quantized value. + @param levels: The number of quantization levels. Integer value. + @param auto_broadcast: The type of broadcasting specifies rules used for + auto-broadcasting of input tensors. + @return New node with quantized value. Input floating point values are quantized into a discrete set of floating point values. - ~~~~~~~~~~~~~{.py} + @code{.py} if x <= input_low: output = output_low if x > input_high: output = output_high else: output = fake_quantize(output) - ~~~~~~~~~~~~~ + @endcode Fake quantize uses the following logic: @@ -889,16 +898,6 @@ def fake_quantize( \dfrac{round( \dfrac{data - input\_low}{(input\_high - input\_low)\cdot (levels-1)})} {(levels-1)\cdot (output\_high - output\_low)} + output\_low \f] - - @param data: The node with data tensor. - @param input_low: The node with the minimum for input values. - @param input_high: The node with the maximum for input values. - @param output_low: The node with the minimum quantized value. - @param output_high: The node with the maximum quantized value. - @param levels: The number of quantization levels. Integer value. - @param auto_broadcast: The type of broadcasting specifies rules used for - auto-broadcasting of input tensors. - @return New node with quantized value. """ return _get_node_factory_opset1().create( "FakeQuantize", @@ -909,7 +908,7 @@ def fake_quantize( @unary_op def floor(node: NodeInput, name: Optional[str] = None) -> Node: - """! Return node which applies floor to the input node element-wise. + """Return node which applies floor to the input node element-wise. @param node: The input node providing data. @param name: The optional name for new output node. @@ -925,7 +924,7 @@ def floor_mod( auto_broadcast: str = "NUMPY", name: Optional[str] = None, ) -> Node: - """! Return node performing element-wise FloorMod (division reminder) with two given tensors. + """Return node performing element-wise FloorMod (division reminder) with two given tensors. @param left_node: The first input node for FloorMod operation. @param right_node: The second input node for FloorMod operation. @@ -942,7 +941,7 @@ def floor_mod( def gather( data: NodeInput, indices: NodeInput, axis: NodeInput, name: Optional[str] = None ) -> Node: - """! Return Gather node which takes slices from axis of data according to indices. + """Return Gather node which takes slices from axis of data according to indices. @param data: The tensor from which slices are gathered. @param indices: Tensor with indexes to gather. @@ -962,13 +961,20 @@ def gather_tree( end_token: NodeInput, name: Optional[str] = None, ) -> Node: - """! Perform GatherTree operation. + """Perform GatherTree operation. + + @param step_ids: The tensor with indices from per each step. + @param parent_idx: The tensor with with parent beam indices. + @param max_seq_len: The tensor with maximum lengths for each sequence in the batch. + @param end_token: The scalar tensor with value of the end marker in a sequence. + @param name: Optional name for output node. + @return The new node performing a GatherTree operation. The GatherTree node generates the complete beams from the indices per each step and the parent beam indices. GatherTree uses the following logic: - ~~~~~~~~~~~~~{.py} + @code{.py} for batch in range(BATCH_SIZE): for beam in range(BEAM_WIDTH): max_sequence_in_beam = min(MAX_TIME, max_seq_len[batch]) @@ -979,15 +985,7 @@ def gather_tree( final_idx[level, batch, beam] = step_idx[level, batch, parent] parent = parent_idx[level, batch, parent] - ~~~~~~~~~~~~~ - - - @param step_ids: The tensor with indices from per each step. - @param parent_idx: The tensor with with parent beam indices. - @param max_seq_len: The tensor with maximum lengths for each sequence in the batch. - @param end_token: The scalar tensor with value of the end marker in a sequence. - @param name: Optional name for output node. - @return The new node performing a GatherTree operation. + @endcode """ node_inputs = as_nodes(step_ids, parent_idx, max_seq_len, end_token) return _get_node_factory_opset1().create("GatherTree", node_inputs) @@ -1000,7 +998,7 @@ def greater( auto_broadcast: str = "NUMPY", name: Optional[str] = None, ) -> Node: - """! Return node which checks if left input node is greater than the right node element-wise. + """Return node which checks if left input node is greater than the right node element-wise. @param left_node: The first input node providing data. @param right_node: The second input node providing data. @@ -1021,7 +1019,7 @@ def greater_equal( auto_broadcast: str = "NUMPY", name: Optional[str] = None, ) -> Node: - """! Return node which checks if left node is greater or equal to the right node element-wise. + """Return node which checks if left node is greater or equal to the right node element-wise. @param left_node: The first input node providing data. @param right_node: The second input node providing data. @@ -1037,7 +1035,7 @@ def greater_equal( def grn(data: Node, bias: float, name: Optional[str] = None) -> Node: - r"""! Perform Global Response Normalization with L2 norm (across channels only). + r"""Perform Global Response Normalization with L2 norm (across channels only). Computes GRN operation on channels for input tensor: @@ -1062,7 +1060,7 @@ def group_convolution( auto_pad: str = "EXPLICIT", name: Optional[str] = None, ) -> Node: - """! Perform Group Convolution operation on data from input node. + """Perform Group Convolution operation on data from input node. @param data: The node producing input data. @param filters: The node producing filters data. @@ -1109,7 +1107,7 @@ def group_convolution_backprop_data( output_padding: Optional[List[int]] = None, name: Optional[str] = None, ) -> Node: - """! Perform Group Convolution operation on data from input node. + """Perform Group Convolution operation on data from input node. @param data: The node producing input data. @param filters: The node producing filter data. @@ -1163,19 +1161,19 @@ def group_convolution_backprop_data( @nameable_op def hard_sigmoid(data: Node, alpha: NodeInput, beta: NodeInput, name: Optional[str] = None) -> Node: - """! Perform Hard Sigmoid operation element-wise on data from input node. - - Hard Sigmoid uses the following logic: - - ~~~~~~~~~~~~~{.py} - y = max(0, min(1, alpha * data + beta)) - ~~~~~~~~~~~~~ + """Perform Hard Sigmoid operation element-wise on data from input node. @param data: The node with data tensor. @param alpha: A node producing the alpha parameter. @param beta: A node producing the beta parameter @param name: Optional output node name. @return The new node performing a Hard Sigmoid element-wise on input tensor. + + Hard Sigmoid uses the following logic: + + @code{.py} + y = max(0, min(1, alpha * data + beta)) + @endcode """ return _get_node_factory_opset1().create("HardSigmoid", [data, as_node(alpha), as_node(beta)]) @@ -1184,12 +1182,13 @@ def hard_sigmoid(data: Node, alpha: NodeInput, beta: NodeInput, name: Optional[s def interpolate( image: Node, output_shape: NodeInput, attrs: dict, name: Optional[str] = None ) -> Node: - """! Perform interpolation of independent slices in input tensor. + """Perform interpolation of independent slices in input tensor. @param image: The node providing input tensor with data for interpolation. @param output_shape: 1D tensor describing output shape for spatial axes. @param attrs: The dictionary containing key, value pairs for attributes. @param name: Optional name for the output node. + @return Node representing interpolation operation. Available attributes are: @@ -1224,7 +1223,7 @@ def interpolate( Required: no Example of attribute dictionary: - ~~~~~~~~~~~~~ + @code{.py} # just required ones attrs = { 'axes': [2, 3], @@ -1237,10 +1236,8 @@ def interpolate( 'antialias': True, 'pads_begin': [2, 2, 2], } - ~~~~~~~~~~~~~ + @endcode Optional attributes which are absent from dictionary will be set with corresponding default. - - @return Node representing interpolation operation. """ requirements = [ ("axes", True, np.integer, is_non_negative_value), @@ -1263,7 +1260,7 @@ def less( auto_broadcast: str = "NUMPY", name: Optional[str] = None, ) -> Node: - """! Return node which checks if left input node is less than the right node element-wise. + """Return node which checks if left input node is less than the right node element-wise. @param left_node: The first input node providing data. @param right_node: The second input node providing data. @@ -1284,7 +1281,7 @@ def less_equal( auto_broadcast: str = "NUMPY", name: Optional[str] = None, ) -> Node: - """! Return node which checks if left input node is less or equal the right node element-wise. + """Return node which checks if left input node is less or equal the right node element-wise. @param left_node: The first input node providing data. @param right_node: The second input node providing data. @@ -1301,7 +1298,7 @@ def less_equal( @unary_op def log(node: NodeInput, name: Optional[str] = None) -> Node: - """! Return node which applies natural logarithm to the input node element-wise. + """Return node which applies natural logarithm to the input node element-wise. @param node: The input node providing data for operation. @param name: The optional new name for output node. @@ -1317,7 +1314,7 @@ def logical_and( auto_broadcast: str = "NUMPY", name: Optional[str] = None, ) -> Node: - """! Return node which perform logical and operation on input nodes element-wise. + """Return node which perform logical and operation on input nodes element-wise. @param left_node: The first input node providing data. @param right_node: The second input node providing data. @@ -1333,7 +1330,7 @@ def logical_and( @unary_op def logical_not(node: NodeInput, name: Optional[str] = None) -> Node: - """! Return node which applies element-wise logical negation to the input node. + """Return node which applies element-wise logical negation to the input node. @param node: The input node providing data. @param name: The optional new name for output node. @@ -1349,7 +1346,7 @@ def logical_or( auto_broadcast: str = "NUMPY", name: Optional[str] = None, ) -> Node: - """! Return node which performs logical OR operation on input nodes element-wise. + """Return node which performs logical OR operation on input nodes element-wise. @param left_node: The first input node providing data. @param right_node: The second input node providing data. @@ -1370,7 +1367,7 @@ def logical_xor( auto_broadcast: str = "NUMPY", name: Optional[str] = None, ) -> Node: - """! Return node which performs logical XOR operation on input nodes element-wise. + """Return node which performs logical XOR operation on input nodes element-wise. @param left_node: The first input node providing data. @param right_node: The second input node providing data. @@ -1394,7 +1391,7 @@ def lrn( size: int = 5, name: Optional[str] = None, ) -> Node: - """! Return a node which performs element-wise Local Response Normalization (LRN) operation. + """Return a node which performs element-wise Local Response Normalization (LRN) operation. @param data: Input data. @param alpha: A scale factor (usually positive). @@ -1423,7 +1420,7 @@ def lstm_cell( clip: float = 0.0, name: Optional[str] = None, ) -> Node: - """! Return a node which performs LSTMCell operation. + """Return a node which performs LSTMCell operation. @param X: The input tensor with shape: [batch_size, input_size]. @param initial_hidden_state: The hidden state tensor with shape: [batch_size, hidden_size]. @@ -1489,7 +1486,7 @@ def lstm_sequence( clip: float = 0.0, name: Optional[str] = None, ) -> Node: - """! Return a node which performs LSTMSequence operation. + """Return a node which performs LSTMSequence operation. @param X: The input tensor. Shape: [batch_size, seq_length, input_size]. @param initial_hidden_state: The hidden state tensor. @@ -1559,7 +1556,7 @@ def matmul( transpose_b: bool, name: Optional[str] = None, ) -> Node: - """! Return the Matrix Multiplication operation. + """Return the Matrix Multiplication operation. @param data_a: left-hand side matrix @param data_b: right-hand side matrix @@ -1584,7 +1581,7 @@ def max_pool( auto_pad: Optional[str] = None, name: Optional[str] = None, ) -> Node: - """! Perform max pooling operation with given parameters on provided data. + """Perform max pooling operation with given parameters on provided data. @param data: The node providing input data. @param strides: The distance (in pixels) to slide the filter on the feature map @@ -1623,7 +1620,7 @@ def maximum( auto_broadcast: str = "NUMPY", name: Optional[str] = None, ) -> Node: - """! Return node which applies the maximum operation to input nodes elementwise.""" + """Return node which applies the maximum operation to input nodes elementwise.""" return _get_node_factory_opset1().create( "Maximum", [left_node, right_node], {"auto_broadcast": auto_broadcast.upper()} ) @@ -1636,7 +1633,7 @@ def minimum( auto_broadcast: str = "NUMPY", name: Optional[str] = None, ) -> Node: - """! Return node which applies the minimum operation to input nodes elementwise.""" + """Return node which applies the minimum operation to input nodes elementwise.""" return _get_node_factory_opset1().create( "Minimum", [left_node, right_node], {"auto_broadcast": auto_broadcast.upper()} ) @@ -1649,7 +1646,7 @@ def mod( auto_broadcast: str = "NUMPY", name: Optional[str] = None, ) -> Node: - """! Return node performing element-wise division reminder with two given tensors. + """Return node performing element-wise division reminder with two given tensors. @param left_node: The first input node for mod operation. @param right_node: The second input node for mod operation. @@ -1669,7 +1666,7 @@ def multiply( auto_broadcast: str = "NUMPY", name: Optional[str] = None, ) -> Node: - """! Return node which applies f(x) = A*B to the input nodes elementwise.""" + """Return node which applies f(x) = A*B to the input nodes elementwise.""" return _get_node_factory_opset1().create( "Multiply", [left_node, right_node], {"auto_broadcast": auto_broadcast.upper()} ) @@ -1677,7 +1674,7 @@ def multiply( @unary_op def negative(node: NodeInput, name: Optional[str] = None) -> Node: - """! Return node which applies f(x) = -x to the input node elementwise.""" + """Return node which applies f(x) = -x to the input node elementwise.""" return _get_node_factory_opset1().create("Negative", [node]) @@ -1692,7 +1689,7 @@ def non_max_suppression( sort_result_descending: bool = True, name: Optional[str] = None, ) -> Node: - """! Return a node which performs NonMaxSuppression. + """Return a node which performs NonMaxSuppression. @param boxes: Tensor with box coordinates. @param scores: Tensor with box scores. @@ -1725,7 +1722,7 @@ def non_max_suppression( def normalize_l2( data: NodeInput, axes: NodeInput, eps: float, eps_mode: str, name: Optional[str] = None ) -> Node: - """! Construct an NormalizeL2 operation. + """Construct an NormalizeL2 operation. @param data: Node producing the input tensor @param axes: Node indicating axes along which L2 reduction is calculated @@ -1745,7 +1742,7 @@ def not_equal( auto_broadcast: str = "NUMPY", name: Optional[str] = None, ) -> Node: - """! Return node which checks if input nodes are unequal element-wise. + """Return node which checks if input nodes are unequal element-wise. @param left_node: The first input node for not-equal operation. @param right_node: The second input node for not-equal operation. @@ -1768,7 +1765,7 @@ def one_hot( axis: int, name: Optional[str] = None, ) -> Node: - """! Create node performing one-hot encoding on input data. + """Create node performing one-hot encoding on input data. @param indices: Input tensor of rank N with indices of any supported integer data type. @param depth: Scalar of any supported integer type that specifies number of classes and @@ -1795,7 +1792,7 @@ def pad( arg_pad_value: Optional[NodeInput] = None, name: Optional[str] = None, ) -> Node: - """! Return a generic padding operation. + """Return a generic padding operation. @param arg: The node producing input tensor to be padded. @param pads_begin: number of padding elements to be added before position 0 @@ -1817,7 +1814,7 @@ def pad( def parameter( shape: TensorShape, dtype: NumericType = np.float32, name: Optional[str] = None ) -> Parameter: - """! Return an ngraph Parameter object.""" + """Return an ngraph Parameter object.""" element_type = get_element_type(dtype) return Parameter(element_type, PartialShape(shape)) @@ -1829,7 +1826,7 @@ def power( auto_broadcast: str = "NUMPY", name: Optional[str] = None, ) -> Node: - """! Return node which perform element-wise exponentiation operation. + """Return node which perform element-wise exponentiation operation. @param left_node: The node providing the base of operation. @param right_node: The node providing the exponent of operation. @@ -1845,21 +1842,21 @@ def power( @nameable_op def prelu(data: NodeInput, slope: NodeInput, name: Optional[str] = None) -> Node: - """! Perform Parametrized Relu operation element-wise on data from input node. + """Perform Parametrized Relu operation element-wise on data from input node. + + @param data: The node with data tensor. + @param slope: The node with the multipliers for negative values. + @param name: Optional output node name. + @return The new node performing a PRelu operation on tensor's channels. PRelu uses the following logic: - ~~~~~~~~~~~~~{.py} + @code{.py} if data < 0: data = data * slope elif data >= 0: data = data - ~~~~~~~~~~~~~ - - @param data: The node with data tensor. - @param slope: The node with the multipliers for negative values. - @param name: Optional output node name. - @return The new node performing a PRelu operation on tensor's channels. + @endcode """ return _get_node_factory_opset1().create("PRelu", as_nodes(data, slope)) @@ -1868,7 +1865,7 @@ def prelu(data: NodeInput, slope: NodeInput, name: Optional[str] = None) -> Node def prior_box_clustered( output_size: Node, image_size: NodeInput, attrs: dict, name: Optional[str] = None ) -> Node: - """! Generate prior boxes of specified sizes normalized to the input image size. + """Generate prior boxes of specified sizes normalized to the input image size. @param output_size: 1D tensor with two integer elements [height, width]. Specifies the spatial size of generated grid with boxes. @@ -1876,6 +1873,7 @@ def prior_box_clustered( specifies shape of the image for which boxes are generated. @param attrs: The dictionary containing key, value pairs for attributes. @param name: Optional name for the output node. + @return Node representing PriorBoxClustered operation. Available attributes are: @@ -1916,7 +1914,7 @@ def prior_box_clustered( Required: no Example of attribute dictionary: - ~~~~~~~~~~~~~{.py} + @code{.py} # just required ones attrs = { 'offset': 85, @@ -1927,11 +1925,9 @@ def prior_box_clustered( 'clip': False, 'step_widths': [1.5, 2.0, 2.5] } - ~~~~~~~~~~~~~ + @endcode Optional attributes which are absent from dictionary will be set with corresponding default. - - @return Node representing PriorBoxClustered operation. """ requirements = [ ("widths", False, np.floating, is_positive_value), @@ -1954,12 +1950,13 @@ def prior_box_clustered( def prior_box( layer_shape: Node, image_shape: NodeInput, attrs: dict, name: Optional[str] = None ) -> Node: - """! Generate prior boxes of specified sizes and aspect ratios across all dimensions. + """Generate prior boxes of specified sizes and aspect ratios across all dimensions. @param layer_shape: Shape of layer for which prior boxes are computed. @param image_shape: Shape of image to which prior boxes are scaled. @param attrs: The dictionary containing key, value pairs for attributes. @param name: Optional name for the output node. + @return Node representing prior box operation. Available attributes are: @@ -2027,7 +2024,7 @@ def prior_box( Required: no Example of attribute dictionary: - ~~~~~~~~~~~~~{.py} + @code{.py} # just required ones attrs = { 'offset': 85, @@ -2039,11 +2036,9 @@ def prior_box( 'clip': True, 'fixed_size': [32, 64, 128] } - ~~~~~~~~~~~~~ + @endcode Optional attributes which are absent from dictionary will be set with corresponding default. - - @return Node representing prior box operation. """ requirements = [ ("offset", True, np.floating, is_non_negative_value), @@ -2073,13 +2068,14 @@ def proposal( attrs: dict, name: Optional[str] = None, ) -> Node: - """! Filter bounding boxes and outputs only those with the highest prediction confidence. + """Filter bounding boxes and outputs only those with the highest prediction confidence. @param class_probs: 4D input floating point tensor with class prediction scores. @param bbox_deltas: 4D input floating point tensor with box logits. @param image_shape: The 1D input tensor with 3 or 4 elements describing image shape. @param attrs: The dictionary containing key, value pairs for attributes. @param name: Optional name for the output node. + @return Node representing Proposal operation. * base_size The size of the anchor to which scale and ratio attributes are applied. Range of values: a positive unsigned integer number @@ -2159,23 +2155,21 @@ def proposal( Example of attribute dictionary: - ~~~~~~~~~~~~~{.py} - # just required ones - attrs = { - 'base_size': 85, - 'pre_nms_topn': 10, - 'post_nms_topn': 20, - 'nms_thresh': 0.34, - 'feat_stride': 16, - 'min_size': 32, - 'ratio': [0.1, 1.5, 2.0, 2.5], - 'scale': [2, 3, 3, 4], - } - ~~~~~~~~~~~~~ + @code{.py} + # just required ones + attrs = { + 'base_size': 85, + 'pre_nms_topn': 10, + 'post_nms_topn': 20, + 'nms_thresh': 0.34, + 'feat_stride': 16, + 'min_size': 32, + 'ratio': [0.1, 1.5, 2.0, 2.5], + 'scale': [2, 3, 3, 4], + } + @endcode Optional attributes which are absent from dictionary will be set with corresponding default. - - @return Node representing Proposal operation. """ requirements = [ ("base_size", True, np.unsignedinteger, is_positive_value), @@ -2213,7 +2207,7 @@ def psroi_pooling( mode: str, name: Optional[str] = None, ) -> Node: - """! Return a node which produces a PSROIPooling operation. + """Return a node which produces a PSROIPooling operation. @param input: Input feature map {N, C, ...} @param coords: Coordinates of bounding boxes @@ -2242,7 +2236,7 @@ def psroi_pooling( @nameable_op def range(start: Node, stop: NodeInput, step: NodeInput, name: Optional[str] = None) -> Node: - """! Return a node which produces the Range operation. + """Return a node which produces the Range operation. @param start: The start value of the generated range @param stop: The stop value of the generated range @@ -2255,7 +2249,7 @@ def range(start: Node, stop: NodeInput, step: NodeInput, name: Optional[str] = N @unary_op def relu(node: NodeInput, name: Optional[str] = None) -> Node: - """! Perform rectified linear unit operation on input node element-wise. + """Perform rectified linear unit operation on input node element-wise. @param node: One of: input node, array or scalar. @param name: The optional output node name. @@ -2268,7 +2262,7 @@ def relu(node: NodeInput, name: Optional[str] = None) -> Node: def reduce_logical_and( node: NodeInput, reduction_axes: NodeInput, keep_dims: bool = False, name: Optional[str] = None ) -> Node: - """! Logical AND reduction operation on input tensor, eliminating the specified reduction axes. + """Logical AND reduction operation on input tensor, eliminating the specified reduction axes. @param node: The tensor we want to reduce. @param reduction_axes: The axes to eliminate through AND operation. @@ -2285,7 +2279,7 @@ def reduce_logical_and( def reduce_logical_or( node: NodeInput, reduction_axes: NodeInput, keep_dims: bool = False, name: Optional[str] = None ) -> Node: - """! Logical OR reduction operation on input tensor, eliminating the specified reduction axes. + """Logical OR reduction operation on input tensor, eliminating the specified reduction axes. @param node: The tensor we want to reduce. @param reduction_axes: The axes to eliminate through OR operation. @@ -2302,7 +2296,7 @@ def reduce_logical_or( def reduce_max( node: NodeInput, reduction_axes: NodeInput, keep_dims: bool = False, name: Optional[str] = None ) -> Node: - """! Max-reduction operation on input tensor, eliminating the specified reduction axes. + """Max-reduction operation on input tensor, eliminating the specified reduction axes. @param node: The tensor we want to max-reduce. @param reduction_axes: The axes to eliminate through max operation. @@ -2318,7 +2312,7 @@ def reduce_max( def reduce_mean( node: NodeInput, reduction_axes: NodeInput, keep_dims: bool = False, name: Optional[str] = None ) -> Node: - """! Mean-reduction operation on input tensor, eliminating the specified reduction axes. + """Mean-reduction operation on input tensor, eliminating the specified reduction axes. @param node: The tensor we want to mean-reduce. @param reduction_axes: The axes to eliminate through mean operation. @@ -2335,7 +2329,7 @@ def reduce_mean( def reduce_min( node: NodeInput, reduction_axes: NodeInput, keep_dims: bool = False, name: Optional[str] = None ) -> Node: - """! Min-reduction operation on input tensor, eliminating the specified reduction axes. + """Min-reduction operation on input tensor, eliminating the specified reduction axes. @param node: The tensor we want to min-reduce. @param reduction_axes: The axes to eliminate through min operation. @@ -2351,7 +2345,7 @@ def reduce_min( def reduce_prod( node: NodeInput, reduction_axes: NodeInput, keep_dims: bool = False, name: Optional[str] = None ) -> Node: - """! Product-reduction operation on input tensor, eliminating the specified reduction axes. + """Product-reduction operation on input tensor, eliminating the specified reduction axes. @param node: The tensor we want to product-reduce. @param reduction_axes: The axes to eliminate through product operation. @@ -2368,7 +2362,7 @@ def reduce_prod( def reduce_sum( node: NodeInput, reduction_axes: NodeInput, keep_dims: bool = False, name: Optional[str] = None ) -> Node: - """! Perform element-wise sums of the input tensor, eliminating the specified reduction axes. + """Perform element-wise sums of the input tensor, eliminating the specified reduction axes. @param node: The node providing data for operation. @param reduction_axes: The axes to eliminate through summation. @@ -2394,7 +2388,7 @@ def region_yolo( anchors: List[float] = None, name: Optional[str] = None, ) -> Node: - """! Return a node which produces the RegionYolo operation. + """Return a node which produces the RegionYolo operation. @param input: Input data @param coords: Number of coordinates for each region @@ -2431,7 +2425,7 @@ def region_yolo( def reshape( node: NodeInput, output_shape: NodeInput, special_zero: bool, name: Optional[str] = None ) -> Node: - """! Return reshaped node according to provided parameters. + """Return reshaped node according to provided parameters. @param node: The tensor we want to reshape. @param output_shape: The node with a new shape for input tensor. @@ -2450,7 +2444,7 @@ def reshape( @unary_op def result(data: NodeInput, name: Optional[str] = None) -> Node: - """! Return a node which represents an output of a graph (Function). + """Return a node which represents an output of a graph (Function). @param data: The tensor containing the input data @return Result node @@ -2466,7 +2460,7 @@ def reverse_sequence( seq_axis: NumericData, name: Optional[str] = None, ) -> Node: - """! Return a node which produces a ReverseSequence operation. + """Return a node which produces a ReverseSequence operation. @param input: tensor with input data to reverse @param seq_lengths: 1D tensor of integers with sequence lengths in the input tensor. @@ -2489,7 +2483,7 @@ def select( auto_broadcast: str = "numpy", name: Optional[str] = None, ) -> Node: - """! Perform an element-wise selection operation on input tensors. + """Perform an element-wise selection operation on input tensors. @param cond: Tensor with selection mask of type `boolean`. @param then_node: Tensor providing data to be selected if respective `cond` @@ -2512,7 +2506,7 @@ def select( def selu( data: NodeInput, alpha: NodeInput, lambda_value: NodeInput, name: Optional[str] = None ) -> Node: - """! Perform a Scaled Exponential Linear Unit (SELU) operation on input node element-wise. + """Perform a Scaled Exponential Linear Unit (SELU) operation on input node element-wise. @param data: input node, array or scalar. @param alpha: Alpha coefficient of SELU operation @@ -2525,7 +2519,7 @@ def selu( @nameable_op def shape_of(data: NodeInput, name: Optional[str] = None) -> Node: - """! Return a node which produces a tensor containing the shape of its input data. + """Return a node which produces a tensor containing the shape of its input data. @param data: The tensor containing the input data. @return ShapeOf node @@ -2535,7 +2529,7 @@ def shape_of(data: NodeInput, name: Optional[str] = None) -> Node: @unary_op def sigmoid(data: NodeInput, name: Optional[str] = None) -> Node: - """! Return a node which applies the sigmoid function element-wise. + """Return a node which applies the sigmoid function element-wise. @param data: The tensor containing the input data @return Sigmoid node @@ -2545,7 +2539,7 @@ def sigmoid(data: NodeInput, name: Optional[str] = None) -> Node: @unary_op def sign(node: NodeInput, name: Optional[str] = None) -> Node: - """! Perform element-wise sign operation. + """Perform element-wise sign operation. @param node: One of: input node, array or scalar. @param name: The optional new name for output node. @@ -2557,7 +2551,7 @@ def sign(node: NodeInput, name: Optional[str] = None) -> Node: @unary_op def sin(node: NodeInput, name: Optional[str] = None) -> Node: - """! Apply sine function on the input node element-wise. + """Apply sine function on the input node element-wise. @param node: One of: input node, array or scalar. @param name: Optional new name for output node. @@ -2568,7 +2562,7 @@ def sin(node: NodeInput, name: Optional[str] = None) -> Node: @unary_op def sinh(node: NodeInput, name: Optional[str] = None) -> Node: - """! Apply hyperbolic sine function on the input node element-wise. + """Apply hyperbolic sine function on the input node element-wise. @param node: One of: input node, array or scalar. @param name: Optional new name for output node. @@ -2579,7 +2573,7 @@ def sinh(node: NodeInput, name: Optional[str] = None) -> Node: @nameable_op def softmax(data: NodeInput, axis: int, name: Optional[str] = None) -> Node: - """! Apply softmax operation on each element of input tensor. + """Apply softmax operation on each element of input tensor. @param data: The tensor providing input data. @param axis: An axis along which Softmax should be calculated @@ -2590,7 +2584,7 @@ def softmax(data: NodeInput, axis: int, name: Optional[str] = None) -> Node: @nameable_op def space_to_depth(data: Node, mode: str, block_size: int = 1, name: str = None) -> Node: - """! Perform SpaceToDepth operation on the input tensor. + """Perform SpaceToDepth operation on the input tensor. SpaceToDepth rearranges blocks of spatial data into depth. The operator returns a copy of the input tensor where values from the height @@ -2613,7 +2607,7 @@ def space_to_depth(data: Node, mode: str, block_size: int = 1, name: str = None) @nameable_op def split(data: NodeInput, axis: NodeInput, num_splits: int, name: Optional[str] = None) -> Node: - """! Return a node which splits the input tensor into same-length slices. + """Return a node which splits the input tensor into same-length slices. @param data: The input tensor to be split @param axis: Axis along which the input data will be split @@ -2629,7 +2623,7 @@ def split(data: NodeInput, axis: NodeInput, num_splits: int, name: Optional[str] @unary_op def sqrt(node: NodeInput, name: Optional[str] = None) -> Node: - """! Return node which applies square root to the input node element-wise. + """Return node which applies square root to the input node element-wise. @param node: One of: input node, array or scalar. @param name: Optional new name for output node. @@ -2642,7 +2636,7 @@ def sqrt(node: NodeInput, name: Optional[str] = None) -> Node: def squared_difference( x1: NodeInput, x2: NodeInput, auto_broadcast: str = "NUMPY", name: Optional[str] = None ) -> Node: - r"""! Perform an element-wise squared difference between two tensors. + r"""Perform an element-wise squared difference between two tensors. \f[ y[i] = (x_1[i] - x_2[i])^2 \f] @@ -2660,7 +2654,13 @@ def squared_difference( @nameable_op def squeeze(data: NodeInput, axes: NodeInput, name: Optional[str] = None) -> Node: - """! Perform squeeze operation on input tensor. + """Perform squeeze operation on input tensor. + + @param data: The node with data tensor. + @param axes: List of non-negative integers, indicate the dimensions to squeeze. + One of: input node or array. + @param name: Optional new name for output node. + @return The new node performing a squeeze operation on input tensor. Remove single-dimensional entries from the shape of a tensor. Takes a parameter `axes` with a list of axes to squeeze. @@ -2673,12 +2673,6 @@ def squeeze(data: NodeInput, axes: NodeInput, name: Optional[str] = None) -> Nod Inputs: tensor with shape [1, 2, 1, 3, 1, 1], axes=[2, 4] Result: tensor with shape [1, 2, 3, 1] - - @param data: The node with data tensor. - @param axes: List of non-negative integers, indicate the dimensions to squeeze. - One of: input node or array. - @param name: Optional new name for output node. - @return The new node performing a squeeze operation on input tensor. """ return _get_node_factory_opset1().create("Squeeze", as_nodes(data, axes)) @@ -2696,7 +2690,7 @@ def strided_slice( ellipsis_mask: Optional[List[int]] = None, name: Optional[str] = None, ) -> Node: - """! Return a node which dynamically repeats(replicates) the input data tensor. + """Return a node which dynamically repeats(replicates) the input data tensor. @param data: The tensor to be sliced @param begin: 1D tensor with begin indexes for input blob slicing @@ -2737,7 +2731,7 @@ def subtract( auto_broadcast: str = "NUMPY", name: Optional[str] = None, ) -> Node: - """! Return node which applies f(x) = A-B to the input nodes element-wise. + """Return node which applies f(x) = A-B to the input nodes element-wise. @param left_node: The node providing data for left hand side of operator. @param right_node: The node providing data for right hand side of operator. @@ -2753,7 +2747,7 @@ def subtract( @unary_op def tan(node: NodeInput, name: Optional[str] = None) -> Node: - """! Apply tangent function on the input node element-wise. + """Apply tangent function on the input node element-wise. @param node: One of: input node, array or scalar. @param name: Optional new name for output node. @@ -2764,7 +2758,7 @@ def tan(node: NodeInput, name: Optional[str] = None) -> Node: @unary_op def tanh(node: NodeInput, name: Optional[str] = None) -> Node: - """! Return node which applies hyperbolic tangent to the input node element-wise. + """Return node which applies hyperbolic tangent to the input node element-wise. @param node: One of: input node, array or scalar. @param name: Optional new name for output node. @@ -2784,7 +2778,7 @@ def tensor_iterator( concat_output_desc: List[TensorIteratorConcatOutputDesc], name: Optional[str] = None, ) -> Node: - """! Perform recurrent execution of the network described in the body, iterating through the data. + """Perform recurrent execution of the network described in the body, iterating through the data. @param inputs: The provided to TensorIterator operator. @param graph_body: The graph representing the body we execute. @@ -2818,7 +2812,7 @@ def tensor_iterator( @nameable_op def tile(data: NodeInput, repeats: NodeInput, name: Optional[str] = None) -> Node: - """! Return a node which dynamically repeats(replicates) the input data tensor. + """Return a node which dynamically repeats(replicates) the input data tensor. @param data: The input tensor to be tiled @param repeats: Per-dimension replication factors @@ -2836,7 +2830,7 @@ def topk( sort: str, name: Optional[str] = None, ) -> Node: - """! Return a node which performs TopK. + """Return a node which performs TopK. @param data: Input data. @param k: K. @@ -2854,7 +2848,7 @@ def topk( @nameable_op def transpose(data: NodeInput, input_order: NodeInput, name: Optional[str] = None) -> Node: - """! Return a node which transposes the data in the input tensor. + """Return a node which transposes the data in the input tensor. @param data: The input tensor to be transposed @param input_order: Permutation of axes to be applied to the input tensor @@ -2864,7 +2858,7 @@ def transpose(data: NodeInput, input_order: NodeInput, name: Optional[str] = Non def unsqueeze(data: NodeInput, axes: NodeInput, name: Optional[str] = None) -> Node: - """! Perform unsqueeze operation on input tensor. + """Perform unsqueeze operation on input tensor. Insert single-dimensional entries to the shape of a tensor. Takes one required argument axes, a list of dimensions that will be inserted. @@ -2885,7 +2879,7 @@ def unsqueeze(data: NodeInput, axes: NodeInput, name: Optional[str] = None) -> N def variadic_split( data: NodeInput, axis: NodeInput, split_lengths: NodeInput, name: Optional[str] = None ) -> Node: - """! Return a node which splits the input tensor into variadic length slices. + """Return a node which splits the input tensor into variadic length slices. @param data: The input tensor to be split @param axis: Axis along which the input data will be split diff --git a/ngraph/python/src/ngraph/opset2/ops.py b/ngraph/python/src/ngraph/opset2/ops.py index 8cac9770f01677..0f7317a5a157a1 100644 --- a/ngraph/python/src/ngraph/opset2/ops.py +++ b/ngraph/python/src/ngraph/opset2/ops.py @@ -14,7 +14,7 @@ # limitations under the License. # ****************************************************************************** -"""! Factory functions for all ngraph ops.""" +"""Factory functions for all ngraph ops.""" from typing import Callable, Iterable, List, Optional, Set, Union import numpy as np @@ -66,7 +66,7 @@ def batch_to_space( crops_end: NodeInput, name: Optional[str] = None, ) -> Node: - """! Perform BatchToSpace operation on the input tensor. + """Perform BatchToSpace operation on the input tensor. BatchToSpace permutes data from the batch dimension of the data tensor into spatial dimensions. @@ -84,14 +84,13 @@ def batch_to_space( @unary_op def gelu(node: NodeInput, name: Optional[str] = None) -> Node: - r"""! Perform Gaussian Error Linear Unit operation element-wise on data from input node. + r"""Perform Gaussian Error Linear Unit operation element-wise on data from input node. Computes GELU function: \f[ f(x) = 0.5\cdot x\cdot(1 + erf( \dfrac{x}{\sqrt{2}}) \f] - For more information refer to: - `Gaussian Error Linear Unit (GELU) `_ + For more information refer to [Gaussian Error Linear Unit (GELU)](https://arxiv.org/pdf/1606.08415.pdf>) @param node: Input tensor. One of: input node, array or scalar. @param name: Optional output node name. @@ -108,7 +107,7 @@ def mvn( eps: float = 1e-9, name: str = None, ) -> Node: - r"""! Perform Mean Variance Normalization operation on data from input node. + r"""Perform Mean Variance Normalization operation on data from input node. Computes MVN on the input tensor `data` (called `X`) using formula: @@ -131,7 +130,7 @@ def mvn( @nameable_op def reorg_yolo(input: Node, stride: List[int], name: Optional[str] = None) -> Node: - """! Return a node which produces the ReorgYolo operation. + """Return a node which produces the ReorgYolo operation. @param input: Input data @param stride: Stride to reorganize input by @@ -150,7 +149,7 @@ def roi_pooling( method: str, name: Optional[str] = None, ) -> Node: - """! Return a node which produces an ROIPooling operation. + """Return a node which produces an ROIPooling operation. @param input: Input feature map {N, C, ...} @param coords: Coordinates of bounding boxes @@ -175,7 +174,7 @@ def space_to_batch( pads_end: NodeInput, name: Optional[str] = None, ) -> Node: - """! Perform SpaceToBatch operation on the input tensor. + """Perform SpaceToBatch operation on the input tensor. SpaceToBatch permutes data tensor blocks of spatial data into batch dimension. The operator returns a copy of the input tensor where values from spatial blocks dimensions diff --git a/ngraph/python/src/ngraph/opset3/ops.py b/ngraph/python/src/ngraph/opset3/ops.py index 8eff85166b9a92..f6bb1bd147c13c 100644 --- a/ngraph/python/src/ngraph/opset3/ops.py +++ b/ngraph/python/src/ngraph/opset3/ops.py @@ -14,7 +14,7 @@ # limitations under the License. # ****************************************************************************** -"""! Factory functions for all ngraph ops.""" +"""Factory functions for all ngraph ops.""" from typing import Callable, Iterable, List, Optional, Set, Union import numpy as np @@ -60,7 +60,7 @@ @nameable_op def assign(new_value: NodeInput, variable_id: str, name: Optional[str] = None) -> Node: - """! Return a node which produces the Assign operation. + """Return a node which produces the Assign operation. @param new_value: Node producing a value to be assigned to a variable. @param variable_id: Id of a variable to be updated. @@ -82,7 +82,7 @@ def broadcast( broadcast_spec: str = "NUMPY", name: Optional[str] = None, ) -> Node: - """! Create a node which broadcasts the input node's values along specified axes to a desired shape. + """Create a node which broadcasts the input node's values along specified axes to a desired shape. @param data: The node with input tensor data. @param target_shape: The node with a new shape we want to broadcast tensor to. @@ -109,7 +109,7 @@ def bucketize( with_right_bound: bool = True, name: Optional[str] = None, ) -> Node: - """! Return a node which produces the Bucketize operation. + """Return a node which produces the Bucketize operation. @param data: Input data to bucketize @param buckets: 1-D of sorted unique boundaries for buckets @@ -134,7 +134,7 @@ def cum_sum( reverse: bool = False, name: Optional[str] = None, ) -> Node: - """! Construct a cumulative summation operation. + """Construct a cumulative summation operation. @param arg: The tensor to be summed. @param axis: zero dimension tensor specifying axis position along which sum will be performed. @@ -156,7 +156,7 @@ def embedding_bag_offsets_sum( per_sample_weights: Optional[NodeInput] = None, name: Optional[str] = None, ) -> Node: - """! Return a node which performs sums of bags of embeddings without the intermediate embeddings. + """Return a node which performs sums of bags of embeddings without the intermediate embeddings. @param emb_table: Tensor containing the embedding lookup table. @param indices: Tensor with indices. @@ -183,7 +183,7 @@ def embedding_bag_packed_sum( per_sample_weights: Optional[NodeInput] = None, name: Optional[str] = None, ) -> Node: - """! Return an EmbeddingBagPackedSum node. + """Return an EmbeddingBagPackedSum node. EmbeddingSegmentsSum constructs an output tensor by replacing every index in a given input tensor with a row (from the weights matrix) at that index @@ -211,7 +211,7 @@ def embedding_segments_sum( per_sample_weights: Optional[NodeInput] = None, name: Optional[str] = None, ) -> Node: - """! Return an EmbeddingSegmentsSum node. + """Return an EmbeddingSegmentsSum node. EmbeddingSegmentsSum constructs an output tensor by replacing every index in a given input tensor with a row (from the weights matrix) at that index @@ -248,7 +248,7 @@ def extract_image_patches( auto_pad: str, name: Optional[str] = None, ) -> Node: - """! Return a node which produces the ExtractImagePatches operation. + """Return a node which produces the ExtractImagePatches operation. @param image: 4-D Input data to extract image patches. @param sizes: Patch size in the format of [size_rows, size_cols]. @@ -280,7 +280,7 @@ def gru_cell( linear_before_reset: bool = False, name: Optional[str] = None, ) -> Node: - """! Perform GRUCell operation on the tensor from input node. + """Perform GRUCell operation on the tensor from input node. GRUCell represents a single GRU Cell that computes the output using the formula described in the paper: https://arxiv.org/abs/1406.1078 @@ -342,7 +342,7 @@ def non_max_suppression( output_type: str = "i64", name: Optional[str] = None, ) -> Node: - """! Return a node which performs NonMaxSuppression. + """Return a node which performs NonMaxSuppression. @param boxes: Tensor with box coordinates. @param scores: Tensor with box scores. @@ -375,7 +375,7 @@ def non_max_suppression( @nameable_op def non_zero(data: NodeInput, output_type: str = "i64", name: Optional[str] = None,) -> Node: - """! Return the indices of the elements that are non-zero. + """Return the indices of the elements that are non-zero. @param data: Input data. @param output_type: Output tensor type. @@ -391,7 +391,7 @@ def non_zero(data: NodeInput, output_type: str = "i64", name: Optional[str] = No @nameable_op def read_value(init_value: NodeInput, variable_id: str, name: Optional[str] = None) -> Node: - """! Return a node which produces the Assign operation. + """Return a node which produces the Assign operation. @param init_value: Node producing a value to be returned instead of an unassigned variable. @param variable_id: Id of a variable to be read. @@ -419,7 +419,7 @@ def rnn_cell( clip: float = 0.0, name: Optional[str] = None, ) -> Node: - """! Perform RNNCell operation on tensor from input node. + """Perform RNNCell operation on tensor from input node. It follows notation and equations defined as in ONNX standard: https://github.com/onnx/onnx/blob/master/docs/Operators.md#RNN @@ -475,7 +475,7 @@ def roi_align( mode: str, name: Optional[str] = None, ) -> Node: - """! Return a node which performs ROIAlign. + """Return a node which performs ROIAlign. @param data: Input data. @param rois: RoIs (Regions of Interest) to pool over. @@ -509,23 +509,23 @@ def scatter_elements_update( axis: NodeInput, name: Optional[str] = None, ) -> Node: - """! Return a node which produces a ScatterElementsUpdate operation. + """Return a node which produces a ScatterElementsUpdate operation. + + @param data: The input tensor to be updated. + @param indices: The tensor with indexes which will be updated. + @param updates: The tensor with update values. + @param axis: The axis for scatter. + @return ScatterElementsUpdate node ScatterElementsUpdate creates a copy of the first input tensor with updated elements specified with second and third input tensors. - For each entry in `updates`, the target index in `data` is obtained by combining the corresponding entry in `indices` with the index of the entry itself: the index-value for dimension equal to `axis` is obtained from the value of the corresponding entry in `indices` and the index-value for dimension not equal to `axis` is obtained from the index of the entry itself. - @param data: The input tensor to be updated. - @param indices: The tensor with indexes which will be updated. - @param updates: The tensor with update values. - @param axis: The axis for scatter. - @return ScatterElementsUpdate node """ return _get_node_factory_opset3().create( "ScatterElementsUpdate", as_nodes(data, indices, updates, axis) @@ -536,7 +536,7 @@ def scatter_elements_update( def scatter_update( data: Node, indices: NodeInput, updates: NodeInput, axis: NodeInput, name: Optional[str] = None ) -> Node: - """! Return a node which produces a ScatterUpdate operation. + """Return a node which produces a ScatterUpdate operation. ScatterUpdate sets new values to slices from data addressed by indices. @@ -554,7 +554,7 @@ def scatter_update( @nameable_op def shape_of(data: NodeInput, output_type: str = "i64", name: Optional[str] = None) -> Node: - """! Return a node which produces a tensor containing the shape of its input data. + """Return a node which produces a tensor containing the shape of its input data. @param data: The tensor containing the input data. @param output_type: Output element type. @@ -569,7 +569,17 @@ def shape_of(data: NodeInput, output_type: str = "i64", name: Optional[str] = No @nameable_op def shuffle_channels(data: Node, axis: int, groups: int, name: Optional[str] = None) -> Node: - """! Perform permutation on data in the channel dimension of the input tensor. + """Perform permutation on data in the channel dimension of the input tensor. + + @param data: The node with input tensor. + @param axis: Channel dimension index in the data tensor. + A negative value means that the index should be calculated + from the back of the input data shape. + @param group: The channel dimension specified by the axis parameter + should be split into this number of groups. + @param name: Optional output node name. + @return The new node performing a permutation on data in the channel dimension + of the input tensor. The operation is the equivalent with the following transformation of the input tensor `data` of shape [N, C, H, W]: @@ -582,7 +592,7 @@ def shuffle_channels(data: Node, axis: int, groups: int, name: Optional[str] = N For example: - ~~~~~~~~~~~~~{.py} + @code{.py} Inputs: tensor of shape [1, 6, 2, 2] data = [[[[ 0., 1.], [ 2., 3.]], @@ -603,17 +613,7 @@ def shuffle_channels(data: Node, axis: int, groups: int, name: Optional[str] = N [[ 4., 5.], [ 6., 7.]], [[12., 13.], [14., 15.]], [[20., 21.], [22., 23.]]]] - ~~~~~~~~~~~~~ - - @param data: The node with input tensor. - @param axis: Channel dimension index in the data tensor. - A negative value means that the index should be calculated - from the back of the input data shape. - @param group: The channel dimension specified by the axis parameter - should be split into this number of groups. - @param name: Optional output node name. - @return The new node performing a permutation on data in the channel dimension - of the input tensor. + @endcode """ return _get_node_factory_opset3().create( "ShuffleChannels", [as_node(data)], {"axis": axis, "groups": groups} @@ -630,7 +630,7 @@ def topk( index_element_type: str = "i32", name: Optional[str] = None, ) -> Node: - """! Return a node which performs TopK. + """Return a node which performs TopK. @param data: Input data. @param k: K. diff --git a/ngraph/python/src/ngraph/opset4/ops.py b/ngraph/python/src/ngraph/opset4/ops.py index 005158eaa85900..7bfd213e0d3a7a 100644 --- a/ngraph/python/src/ngraph/opset4/ops.py +++ b/ngraph/python/src/ngraph/opset4/ops.py @@ -14,7 +14,7 @@ # limitations under the License. # ****************************************************************************** -"""! Factory functions for all ngraph ops.""" +"""Factory functions for all ngraph ops.""" from typing import Callable, Iterable, List, Optional, Set, Union import numpy as np @@ -70,7 +70,7 @@ def ctc_loss( unique: bool = False, name: Optional[str] = None, ) -> Node: - """! Return a node which performs CTCLoss. + """Return a node which performs CTCLoss. @param logits: 3-D tensor of logits. @param logit_length: 1-D tensor of lengths for each object from a batch. @@ -108,7 +108,7 @@ def non_max_suppression( output_type: str = "i64", name: Optional[str] = None, ) -> Node: - """! Return a node which performs NonMaxSuppression. + """Return a node which performs NonMaxSuppression. @param boxes: Tensor with box coordinates. @param scores: Tensor with box scores. @@ -141,7 +141,7 @@ def non_max_suppression( @nameable_op def softplus(data: NodeInput, name: Optional[str] = None) -> Node: - """! Apply SoftPlus operation on each element of input tensor. + """Apply SoftPlus operation on each element of input tensor. @param data: The tensor providing input data. @return The new node with SoftPlus operation applied on each element. @@ -151,7 +151,7 @@ def softplus(data: NodeInput, name: Optional[str] = None) -> Node: @nameable_op def mish(data: NodeInput, name: Optional[str] = None,) -> Node: - """! Return a node which performs Mish. + """Return a node which performs Mish. @param data: Tensor with input data floating point type. @return The new node which performs Mish @@ -161,7 +161,7 @@ def mish(data: NodeInput, name: Optional[str] = None,) -> Node: @nameable_op def hswish(data: NodeInput, name: Optional[str] = None,) -> Node: - """! Return a node which performs HSwish (hard version of Swish). + """Return a node which performs HSwish (hard version of Swish). @param data: Tensor with input data floating point type. @return The new node which performs HSwish @@ -175,7 +175,7 @@ def swish( beta: Optional[NodeInput] = None, name: Optional[str] = None, ) -> Node: - """! Return a node which performing Swish activation function Swish(x, beta=1.0) = x * sigmoid(x * beta)). + """Return a node which performing Swish activation function Swish(x, beta=1.0) = x * sigmoid(x * beta)). @param data: Tensor with input data floating point type. @return The new node which performs Swish @@ -187,7 +187,7 @@ def swish( @nameable_op def acosh(node: NodeInput, name: Optional[str] = None) -> Node: - """! Apply hyperbolic inverse cosine function on the input node element-wise. + """Apply hyperbolic inverse cosine function on the input node element-wise. @param node: One of: input node, array or scalar. @param name: Optional new name for output node. @@ -198,7 +198,7 @@ def acosh(node: NodeInput, name: Optional[str] = None) -> Node: @nameable_op def asinh(node: NodeInput, name: Optional[str] = None) -> Node: - """! Apply hyperbolic inverse sinus function on the input node element-wise. + """Apply hyperbolic inverse sinus function on the input node element-wise. @param node: One of: input node, array or scalar. @param name: Optional new name for output node. @@ -209,7 +209,7 @@ def asinh(node: NodeInput, name: Optional[str] = None) -> Node: @nameable_op def atanh(node: NodeInput, name: Optional[str] = None) -> Node: - """! Apply hyperbolic inverse tangent function on the input node element-wise. + """Apply hyperbolic inverse tangent function on the input node element-wise. @param node: One of: input node, array or scalar. @param name: Optional new name for output node. @@ -226,7 +226,7 @@ def proposal( attrs: dict, name: Optional[str] = None, ) -> Node: - """! Filter bounding boxes and outputs only those with the highest prediction confidence. + """Filter bounding boxes and outputs only those with the highest prediction confidence. @param class_probs: 4D input floating point tensor with class prediction scores. @param bbox_deltas: 4D input floating point tensor with corrected predictions of bounding boxes @@ -295,8 +295,9 @@ def proposal( Object Detection API models Default value: "" (empty string) Required: no + Example of attribute dictionary: - ~~~~~~~~~~~~~~~~~~~~~~~~{.py} + @code{.py} # just required ones attrs = { 'base_size': 85, @@ -308,7 +309,7 @@ def proposal( 'ratio': [0.1, 1.5, 2.0, 2.5], 'scale': [2, 3, 3, 4], } - ~~~~~~~~~~~~~~~~~~~~~~~~ + @endcode Optional attributes which are absent from dictionary will be set with corresponding default. @return Node representing Proposal operation. """ @@ -340,7 +341,7 @@ def proposal( def reduce_l1( node: NodeInput, reduction_axes: NodeInput, keep_dims: bool = False, name: Optional[str] = None ) -> Node: - """! L1-reduction operation on input tensor, eliminating the specified reduction axes. + """L1-reduction operation on input tensor, eliminating the specified reduction axes. @param node: The tensor we want to mean-reduce. @param reduction_axes: The axes to eliminate through mean operation. @@ -357,7 +358,7 @@ def reduce_l1( def reduce_l2( node: NodeInput, reduction_axes: NodeInput, keep_dims: bool = False, name: Optional[str] = None ) -> Node: - """! L2-reduction operation on input tensor, eliminating the specified reduction axes. + """L2-reduction operation on input tensor, eliminating the specified reduction axes. @param node: The tensor we want to mean-reduce. @param reduction_axes: The axes to eliminate through mean operation. @@ -385,7 +386,7 @@ def lstm_cell( clip: float = 0.0, name: Optional[str] = None, ) -> Node: - """! Return a node which performs LSTMCell operation. + """Return a node which performs LSTMCell operation. @param X: The input tensor with shape: [batch_size, input_size]. @param initial_hidden_state: The hidden state tensor with shape: [batch_size, hidden_size]. diff --git a/ngraph/python/src/ngraph/opset6/__init__.py b/ngraph/python/src/ngraph/opset6/__init__.py index b81f6d5321d096..7451fb403a0243 100644 --- a/ngraph/python/src/ngraph/opset6/__init__.py +++ b/ngraph/python/src/ngraph/opset6/__init__.py @@ -43,6 +43,7 @@ from ngraph.opset1.ops import cos from ngraph.opset1.ops import cosh from ngraph.opset1.ops import ctc_greedy_decoder +from ngraph.opset6.ops import ctc_greedy_decoder_seq_len from ngraph.opset4.ops import ctc_loss from ngraph.opset3.ops import cum_sum from ngraph.opset3.ops import cum_sum as cumsum @@ -97,7 +98,7 @@ from ngraph.opset4.ops import mish from ngraph.opset1.ops import mod from ngraph.opset1.ops import multiply -from ngraph.opset2.ops import mvn +from ngraph.opset6.ops import mvn from ngraph.opset1.ops import negative from ngraph.opset5.ops import non_max_suppression from ngraph.opset3.ops import non_zero diff --git a/ngraph/python/src/ngraph/opset6/ops.py b/ngraph/python/src/ngraph/opset6/ops.py index 667765e6a0623d..fe8ca220f7cb45 100644 --- a/ngraph/python/src/ngraph/opset6/ops.py +++ b/ngraph/python/src/ngraph/opset6/ops.py @@ -58,6 +58,38 @@ # -------------------------------------------- ops ------------------------------------------------ +@nameable_op +def ctc_greedy_decoder_seq_len( + data: NodeInput, + sequence_length: NodeInput, + blank_index: Optional[NodeInput] = None, + merge_repeated: bool = True, + classes_index_type: str = "i32", + sequence_length_type: str = "i32", + name: Optional[str] = None, +) -> Node: + """Return a node which performs CTCGreedyDecoderSeqLen. + + @param data: The input 3D tensor. Shape: [batch_size, seq_length, num_classes] + @param sequence_length: Input 1D tensor with sequence length. Shape: [batch_size] + @param blank_index: Scalar or 1D tensor with specifies the class index to use for the blank class. + Optional parameter. Default value is num_classes-1. + @return: The new node which performs CTCGreedyDecoderSeqLen. + """ + if blank_index is not None: + inputs = as_nodes(data, sequence_length, blank_index) + else: + inputs = as_nodes(data, sequence_length) + + attributes = { + "merge_repeated": merge_repeated, + "classes_index_type": classes_index_type, + "sequence_length_type": sequence_length_type + } + + return _get_node_factory_opset6().create("CTCGreedyDecoderSeqLen", inputs, attributes) + + @nameable_op def gather_elements( data: NodeInput, @@ -79,3 +111,34 @@ def gather_elements( } return _get_node_factory_opset6().create("GatherElements", inputs, attributes) + + +@nameable_op +def mvn( + data: Node, + axes: Node, + normalize_variance: bool, + eps: float, + eps_mode: str, + name: Optional[str] = None, +) -> Node: + """Return a node which performs MeanVarianceNormalization (MVN). + + @param data: The node with data tensor. + @param axes: The node with axes to reduce on. + @param normalize_variance: Denotes whether to perform variance normalization. + @param eps: The number added to the variance to avoid division by zero + when normalizing the value. Scalar value. + @param eps_mode: how eps is applied (`inside_sqrt` or `outside_sqrt`) + @param name: Optional output node name. + @return The new node performing a MVN operation on input tensor. + """ + inputs = as_nodes(data, axes) + + attributes = { + "normalize_variance": normalize_variance, + "eps": eps, + "eps_mode": eps_mode + } + + return _get_node_factory_opset6().create("MVN", inputs, attributes) diff --git a/ngraph/python/src/ngraph/opset_utils.py b/ngraph/python/src/ngraph/opset_utils.py index dcd6ebef74cf72..09cb5aab9e5519 100644 --- a/ngraph/python/src/ngraph/opset_utils.py +++ b/ngraph/python/src/ngraph/opset_utils.py @@ -27,7 +27,7 @@ def _get_node_factory(opset_version: Optional[str] = None) -> NodeFactory: - """! Return NodeFactory configured to create operators from specified opset version.""" + """Return NodeFactory configured to create operators from specified opset version.""" if opset_version: return NodeFactory(opset_version) else: diff --git a/ngraph/python/src/ngraph/utils/__init__.py b/ngraph/python/src/ngraph/utils/__init__.py index 822c874567c30c..4ec425fca3ed66 100644 --- a/ngraph/python/src/ngraph/utils/__init__.py +++ b/ngraph/python/src/ngraph/utils/__init__.py @@ -13,4 +13,4 @@ # See the License for the specific language governing permissions and # limitations under the License. # ****************************************************************************** -"""! Generic utilities. Factor related functions out to separate files.""" +"""Generic utilities. Factor related functions out to separate files.""" diff --git a/ngraph/python/src/ngraph/utils/broadcasting.py b/ngraph/python/src/ngraph/utils/broadcasting.py index 1321ae96fd96e1..a6c52f44560541 100644 --- a/ngraph/python/src/ngraph/utils/broadcasting.py +++ b/ngraph/python/src/ngraph/utils/broadcasting.py @@ -26,7 +26,7 @@ def get_broadcast_axes( output_shape: TensorShape, input_shape: TensorShape, axis: int = None ) -> AxisSet: - """! Generate a list of broadcast axes for ngraph++ broadcast. + """Generate a list of broadcast axes for ngraph++ broadcast. Informally, a broadcast "adds" axes to the input tensor, replicating elements from the input tensor as needed to fill the new dimensions. diff --git a/ngraph/python/src/ngraph/utils/decorators.py b/ngraph/python/src/ngraph/utils/decorators.py index 49a5b5540e9900..6c3ca386ed6b55 100644 --- a/ngraph/python/src/ngraph/utils/decorators.py +++ b/ngraph/python/src/ngraph/utils/decorators.py @@ -27,7 +27,7 @@ def _set_node_friendly_name(node: Node, **kwargs: Any) -> Node: def nameable_op(node_factory_function: Callable) -> Callable: - """! Set the name to the ngraph operator returned by the wrapped function.""" + """Set the name to the ngraph operator returned by the wrapped function.""" @wraps(node_factory_function) def wrapper(*args: Any, **kwargs: Any) -> Node: @@ -39,7 +39,7 @@ def wrapper(*args: Any, **kwargs: Any) -> Node: def unary_op(node_factory_function: Callable) -> Callable: - """! Convert the first input value to a Constant Node if a numeric value is detected.""" + """Convert the first input value to a Constant Node if a numeric value is detected.""" @wraps(node_factory_function) def wrapper(input_value: NodeInput, *args: Any, **kwargs: Any) -> Node: @@ -52,7 +52,7 @@ def wrapper(input_value: NodeInput, *args: Any, **kwargs: Any) -> Node: def binary_op(node_factory_function: Callable) -> Callable: - """! Convert the first two input values to Constant Nodes if numeric values are detected.""" + """Convert the first two input values to Constant Nodes if numeric values are detected.""" @wraps(node_factory_function) def wrapper(left: NodeInput, right: NodeInput, *args: Any, **kwargs: Any) -> Node: diff --git a/ngraph/python/src/ngraph/utils/input_validation.py b/ngraph/python/src/ngraph/utils/input_validation.py index e65965a933edbd..2128bee88fdb7f 100644 --- a/ngraph/python/src/ngraph/utils/input_validation.py +++ b/ngraph/python/src/ngraph/utils/input_validation.py @@ -14,7 +14,7 @@ # limitations under the License. # ****************************************************************************** -"""! Helper functions for validating user input.""" +"""Helper functions for validating user input.""" import logging from typing import Any, Callable, Dict, Iterable, List, Optional, Tuple, Type @@ -27,7 +27,7 @@ def assert_list_of_ints(value_list: Iterable[int], message: str) -> None: - """! Verify that the provided value is an iterable of integers.""" + """Verify that the provided value is an iterable of integers.""" try: for value in value_list: if not isinstance(value, int): @@ -39,7 +39,7 @@ def assert_list_of_ints(value_list: Iterable[int], message: str) -> None: def _check_value(op_name, attr_key, value, val_type, cond=None): # type: (str, str, Any, Type, Optional[Callable[[Any], bool]]) -> bool - """! Check whether provided value satisfies specified criteria. + """Check whether provided value satisfies specified criteria. @param op_name: The operator name which attributes are checked. @param attr_key: The attribute name. @@ -67,7 +67,7 @@ def _check_value(op_name, attr_key, value, val_type, cond=None): def check_valid_attribute(op_name, attr_dict, attr_key, val_type, cond=None, required=False): # type: (str, dict, str, Type, Optional[Callable[[Any], bool]], Optional[bool]) -> bool - """! Check whether specified attribute satisfies given criteria. + """Check whether specified attribute satisfies given criteria. @param op_name: The operator name which attributes are checked. @param attr_dict: Dictionary containing key-value attributes to check. @@ -110,7 +110,7 @@ def check_valid_attributes( requirements, # type: List[Tuple[str, bool, Type, Optional[Callable]]] ): # type: (...) -> bool - """! Perform attributes validation according to specified type, value criteria. + """Perform attributes validation according to specified type, value criteria. @param op_name: The operator name which attributes are checked. @param attributes: The dictionary with user provided attributes to check. @@ -130,7 +130,7 @@ def check_valid_attributes( def is_positive_value(x): # type: (Any) -> bool - """! Determine whether the specified x is positive value. + """Determine whether the specified x is positive value. @param x: The value to check. @@ -140,7 +140,7 @@ def is_positive_value(x): # type: (Any) -> bool def is_non_negative_value(x): # type: (Any) -> bool - """! Determine whether the specified x is non-negative value. + """Determine whether the specified x is non-negative value. @param x: The value to check. diff --git a/ngraph/python/src/ngraph/utils/node_factory.py b/ngraph/python/src/ngraph/utils/node_factory.py index bc5ea0eca9d1f3..c75e516a650a74 100644 --- a/ngraph/python/src/ngraph/utils/node_factory.py +++ b/ngraph/python/src/ngraph/utils/node_factory.py @@ -9,10 +9,10 @@ class NodeFactory(object): - """! Factory front-end to create node objects.""" + """Factory front-end to create node objects.""" def __init__(self, opset_version: str = DEFAULT_OPSET) -> None: - """! Create the NodeFactory object. + """Create the NodeFactory object. @param opset_version: The opset version the factory will use to produce ops from. """ @@ -24,7 +24,7 @@ def create( arguments: List[Union[Node, Output]], attributes: Optional[Dict[str, Any]] = None, ) -> Node: - """! Create node object from provided description. + """Create node object from provided description. The user does not have to provide all node's attributes, but only required ones. @@ -84,7 +84,7 @@ def _arguments_as_outputs(arguments: List[Union[Node, Output]]) -> List[Output]: @staticmethod def _normalize_attr_name(attr_name: str, prefix: str) -> str: - """! Normalize attribute name. + """Normalize attribute name. @param attr_name: The attribute name. @param prefix: The prefix to attach to attribute name. @@ -98,7 +98,7 @@ def _normalize_attr_name(attr_name: str, prefix: str) -> str: @classmethod def _normalize_attr_name_getter(cls, attr_name: str) -> str: - """! Normalize atr name to be suitable for getter function name. + """Normalize atr name to be suitable for getter function name. @param attr_name: The attribute name to normalize @@ -108,7 +108,7 @@ def _normalize_attr_name_getter(cls, attr_name: str) -> str: @classmethod def _normalize_attr_name_setter(cls, attr_name: str) -> str: - """! Normalize attribute name to be suitable for setter function name. + """Normalize attribute name to be suitable for setter function name. @param attr_name: The attribute name to normalize @@ -118,7 +118,7 @@ def _normalize_attr_name_setter(cls, attr_name: str) -> str: @staticmethod def _get_node_attr_value(node: Node, attr_name: str) -> Any: - """! Get provided node attribute value. + """Get provided node attribute value. @param node: The node we retrieve attribute value from. @param attr_name: The attribute name. @@ -132,7 +132,7 @@ def _get_node_attr_value(node: Node, attr_name: str) -> Any: @staticmethod def _set_node_attr_value(node: Node, attr_name: str, value: Any) -> None: - """! Set the node attribute value. + """Set the node attribute value. @param node: The node we change attribute value for. @param attr_name: The attribute name. diff --git a/ngraph/python/src/ngraph/utils/reduction.py b/ngraph/python/src/ngraph/utils/reduction.py index c8d7a75872d36a..48f5f83cf90337 100644 --- a/ngraph/python/src/ngraph/utils/reduction.py +++ b/ngraph/python/src/ngraph/utils/reduction.py @@ -20,7 +20,7 @@ def get_reduction_axes(node: Node, reduction_axes: Optional[Iterable[int]]) -> Iterable[int]: - """! Get reduction axes if it is None and convert it to set if its type is different. + """Get reduction axes if it is None and convert it to set if its type is different. If reduction_axes is None we default to reduce all axes. diff --git a/ngraph/python/src/ngraph/utils/tensor_iterator_types.py b/ngraph/python/src/ngraph/utils/tensor_iterator_types.py index d29db21b929d2b..3e7d70b952e7bc 100644 --- a/ngraph/python/src/ngraph/utils/tensor_iterator_types.py +++ b/ngraph/python/src/ngraph/utils/tensor_iterator_types.py @@ -13,7 +13,7 @@ # See the License for the specific language governing permissions and # limitations under the License. # ****************************************************************************** -"""! Helper classes for aggregating TensorIterator input/output desciptor attributes.""" +"""Helper classes for aggregating TensorIterator input/output desciptor attributes.""" from typing import List @@ -22,14 +22,14 @@ class GraphBody(object): - """! Class containing graph parameters and results.""" + """Class containing graph parameters and results.""" def __init__(self, parameters: List[Parameter], results: List[Node],) -> None: self.parameters = parameters self.results = results def serialize(self) -> dict: - """! Serialize GraphBody as a dictionary.""" + """Serialize GraphBody as a dictionary.""" return { "parameters": self.parameters, "results": self.results, @@ -37,14 +37,14 @@ def serialize(self) -> dict: class TensorIteratorInputDesc(object): - """! Represents a generic input descriptor for TensorIterator operator.""" + """Represents a generic input descriptor for TensorIterator operator.""" def __init__(self, input_idx: int, body_parameter_idx: int,) -> None: self.input_idx = input_idx self.body_parameter_idx = body_parameter_idx def serialize(self) -> dict: - """! Serialize TensorIteratorInputDesc as a dictionary.""" + """Serialize TensorIteratorInputDesc as a dictionary.""" return { "input_idx": self.input_idx, "body_parameter_idx": self.body_parameter_idx, @@ -52,7 +52,7 @@ def serialize(self) -> dict: class TensorIteratorSliceInputDesc(TensorIteratorInputDesc): - """! Represents a TensorIterator graph body input formed from slices of TensorIterator input.""" + """Represents a TensorIterator graph body input formed from slices of TensorIterator input.""" def __init__( self, @@ -72,7 +72,7 @@ def __init__( self.axis = axis def serialize(self) -> dict: - """! Serialize TensorIteratorSliceInputDesc as a dictionary.""" + """Serialize TensorIteratorSliceInputDesc as a dictionary.""" output = super().serialize() output["start"] = self.start output["stride"] = self.stride @@ -83,7 +83,7 @@ def serialize(self) -> dict: class TensorIteratorMergedInputDesc(TensorIteratorInputDesc): - """! Represents a TensorIterator graph body input with initial value in the first iteration. + """Represents a TensorIterator graph body input with initial value in the first iteration. Later on, this input value is computed inside graph body. """ @@ -93,28 +93,28 @@ def __init__(self, input_idx: int, body_parameter_idx: int, body_value_idx: int, self.body_value_idx = body_value_idx def serialize(self) -> dict: - """! Serialize TensorIteratorMergedInputDesc as a dictionary.""" + """Serialize TensorIteratorMergedInputDesc as a dictionary.""" output = super().serialize() output["body_value_idx"] = self.body_value_idx return output class TensorIteratorInvariantInputDesc(TensorIteratorInputDesc): - """! Represents a TensorIterator graph body input that has invariant value during iteration.""" + """Represents a TensorIterator graph body input that has invariant value during iteration.""" def __init__(self, input_idx: int, body_parameter_idx: int,) -> None: super().__init__(input_idx, body_parameter_idx) class TensorIteratorOutputDesc(object): - """! Represents a generic output descriptor for TensorIterator operator.""" + """Represents a generic output descriptor for TensorIterator operator.""" def __init__(self, body_value_idx: int, output_idx: int,) -> None: self.body_value_idx = body_value_idx self.output_idx = output_idx def serialize(self) -> dict: - """! Serialize TensorIteratorOutputDesc as a dictionary.""" + """Serialize TensorIteratorOutputDesc as a dictionary.""" return { "body_value_idx": self.body_value_idx, "output_idx": self.output_idx, @@ -122,21 +122,21 @@ def serialize(self) -> dict: class TensorIteratorBodyOutputDesc(TensorIteratorOutputDesc): - """! Represents an output from a specific iteration.""" + """Represents an output from a specific iteration.""" def __init__(self, body_value_idx: int, output_idx: int, iteration: int,) -> None: super().__init__(body_value_idx, output_idx) self.iteration = iteration def serialize(self) -> dict: - """! Serialize TensorIteratorBodyOutputDesc as a dictionary.""" + """Serialize TensorIteratorBodyOutputDesc as a dictionary.""" output = super().serialize() output["iteration"] = self.iteration return output class TensorIteratorConcatOutputDesc(TensorIteratorOutputDesc): - """! Represents an output produced by concatenation of output from each iteration.""" + """Represents an output produced by concatenation of output from each iteration.""" def __init__( self, @@ -156,7 +156,7 @@ def __init__( self.axis = axis def serialize(self) -> dict: - """! Serialize TensorIteratorConcatOutputDesc as a dictionary.""" + """Serialize TensorIteratorConcatOutputDesc as a dictionary.""" output = super().serialize() output["start"] = self.start output["stride"] = self.stride diff --git a/ngraph/python/src/ngraph/utils/types.py b/ngraph/python/src/ngraph/utils/types.py index 183484cc8d49c2..ab48e2b44d39e7 100644 --- a/ngraph/python/src/ngraph/utils/types.py +++ b/ngraph/python/src/ngraph/utils/types.py @@ -13,7 +13,7 @@ # See the License for the specific language governing permissions and # limitations under the License. # ****************************************************************************** -"""! Functions related to converting between Python and numpy types and ngraph types.""" +"""Functions related to converting between Python and numpy types and ngraph types.""" import logging from typing import List, Union @@ -66,7 +66,7 @@ def get_element_type(data_type: NumericType) -> NgraphType: - """! Return an ngraph element type for a Python type or numpy.dtype.""" + """Return an ngraph element type for a Python type or numpy.dtype.""" if data_type is int: log.warning("Converting int type of undefined bitwidth to 32-bit ngraph integer.") return NgraphType.i32 @@ -85,7 +85,7 @@ def get_element_type(data_type: NumericType) -> NgraphType: def get_element_type_str(data_type: NumericType) -> str: - """! Return an ngraph element type string representation for a Python type or numpy dtype.""" + """Return an ngraph element type string representation for a Python type or numpy dtype.""" if data_type is int: log.warning("Converting int type of undefined bitwidth to 32-bit ngraph integer.") return "i32" @@ -105,7 +105,7 @@ def get_element_type_str(data_type: NumericType) -> str: def get_dtype(ngraph_type: NgraphType) -> np.dtype: - """! Return a numpy.dtype for an ngraph element type.""" + """Return a numpy.dtype for an ngraph element type.""" np_type = next( (np_type for (ng_type, np_type) in ngraph_to_numpy_types_map if ng_type == ngraph_type), None, @@ -118,14 +118,14 @@ def get_dtype(ngraph_type: NgraphType) -> np.dtype: def get_ndarray(data: NumericData) -> np.ndarray: - """! Wrap data into a numpy ndarray.""" + """Wrap data into a numpy ndarray.""" if type(data) == np.ndarray: return data return np.array(data) def get_shape(data: NumericData) -> TensorShape: - """! Return a shape of NumericData.""" + """Return a shape of NumericData.""" if type(data) == np.ndarray: return data.shape # type: ignore elif type(data) == list: @@ -134,7 +134,7 @@ def get_shape(data: NumericData) -> TensorShape: def make_constant_node(value: NumericData, dtype: NumericType = None) -> Constant: - """! Return an ngraph Constant node with the specified value.""" + """Return an ngraph Constant node with the specified value.""" ndarray = get_ndarray(value) if dtype: element_type = get_element_type(dtype) @@ -145,7 +145,7 @@ def make_constant_node(value: NumericData, dtype: NumericType = None) -> Constan def as_node(input_value: NodeInput) -> Node: - """! Return input values as nodes. Scalars will be converted to Constant nodes.""" + """Return input values as nodes. Scalars will be converted to Constant nodes.""" if issubclass(type(input_value), Node): return input_value if issubclass(type(input_value), Output): @@ -154,5 +154,5 @@ def as_node(input_value: NodeInput) -> Node: def as_nodes(*input_values: NodeInput) -> List[Node]: - """! Return input values as nodes. Scalars will be converted to Constant nodes.""" + """Return input values as nodes. Scalars will be converted to Constant nodes.""" return [as_node(input_value) for input_value in input_values] diff --git a/ngraph/python/tests/__init__.py b/ngraph/python/tests/__init__.py index 053760b209b4ed..9d0f874fc33fa3 100644 --- a/ngraph/python/tests/__init__.py +++ b/ngraph/python/tests/__init__.py @@ -207,9 +207,6 @@ def xfail_test(reason="Mark the test as expected to fail", strict=True): xfail_issue_39704 = xfail_test(reason="ResNet101_DUC_HDC - AssertionError: zoo models results mismatch") -xfail_issue_43208 = xfail_test(reason="GPT-2 - AssertionError: zoo models results mismatch") -xfail_issue_43209 = xfail_test(reason="GPT-2-LM-HEAD - AssertionError: zoo models results mismatch") -xfail_issue_43213 = xfail_test(reason="RetinaNet Resnet101 - AssertionError: zoo models results mismatch") xfail_issue_37973 = xfail_test(reason="TF Inception V2 - AssertionError: zoo models results mismatch") xfail_issue_47430 = xfail_test(reason="FCN ResNet models - AssertionError: zoo models results mismatch") xfail_issue_47495 = xfail_test(reason="BertSquad-10 from MSFT - AssertionError: zoo models results mismatch") diff --git a/ngraph/python/tests/test_ngraph/test_create_op.py b/ngraph/python/tests/test_ngraph/test_create_op.py index 9caa81cc0d2146..eda402201dfb9b 100644 --- a/ngraph/python/tests/test_ngraph/test_create_op.py +++ b/ngraph/python/tests/test_ngraph/test_create_op.py @@ -77,6 +77,46 @@ def test_ctc_greedy_decoder(dtype): assert list(node.get_output_shape(0)) == expected_shape +@pytest.mark.parametrize("fp_dtype, int_dtype, int_ci, int_sl, merge_repeated, blank_index", + [ + (np.float32, np.int32, "i32", "i32", True, True), + (np.float32, np.int32, "i64", "i32", True, True), + (np.float32, np.int32, "i32", "i64", True, True), + (np.float32, np.int32, "i64", "i64", True, True), + (np.float64, np.int64, "i32", "i32", False, True), + (np.float64, np.int64, "i64", "i32", False, True), + (np.float64, np.int64, "i32", "i64", False, True), + (np.float64, np.int64, "i64", "i64", False, True), + (np.float32, np.int32, "i32", "i32", True, False), + (np.float32, np.int32, "i64", "i32", True, False), + (np.float32, np.int32, "i32", "i64", True, False), + (np.float32, np.int32, "i64", "i64", True, False), + (np.float64, np.int64, "i32", "i32", False, False), + (np.float64, np.int64, "i64", "i32", False, False), + (np.float64, np.int64, "i32", "i64", False, False), + (np.float64, np.int64, "i64", "i64", False, False) + ],) +def test_ctc_greedy_decoder_seq_len(fp_dtype, int_dtype, int_ci, int_sl, merge_repeated, blank_index): + input0_shape = [8, 20, 128] + input1_shape = [8] + input2_shape = [1] + expected_shape = [8, 20] + + parameter_input0 = ng.parameter(input0_shape, name="Input0", dtype=fp_dtype) + parameter_input1 = ng.parameter(input1_shape, name="Input1", dtype=int_dtype) + parameter_input2 = None + if blank_index: + parameter_input2 = ng.parameter(input2_shape, name="Input2", dtype=int_dtype) + + node = ng.ctc_greedy_decoder_seq_len( + parameter_input0, parameter_input1, parameter_input2, merge_repeated, int_ci, int_sl + ) + + assert node.get_type_name() == "CTCGreedyDecoderSeqLen" + assert node.get_output_size() == 2 + assert list(node.get_output_shape(0)) == expected_shape + + @pytest.mark.parametrize("dtype", np_types) def test_deformable_convolution(dtype): strides = np.array([1, 1]) diff --git a/ngraph/python/tests/test_ngraph/test_normalization.py b/ngraph/python/tests/test_ngraph/test_normalization.py index e792ebae483ef3..7a8d23f133a821 100644 --- a/ngraph/python/tests/test_ngraph/test_normalization.py +++ b/ngraph/python/tests/test_ngraph/test_normalization.py @@ -115,3 +115,43 @@ def test_batch_norm_inference(): result = run_op_node([data, gamma, beta, mean, variance], ng.batch_norm_inference, epsilon) assert np.allclose(result, excepted) + + +def test_mvn_no_variance(): + data = np.array([1, 2, 3, 4, 5, 6, 7, 8, 9, + 1, 2, 3, 4, 5, 6, 7, 8, 9, + 1, 2, 3, 4, 5, 6, 7, 8, 9], dtype=np.float32).reshape([1, 3, 3, 3]) + axes = np.array([2, 3], dtype=np.int64) + epsilon = 1e-9 + normalize_variance = False + eps_mode = "outside_sqrt" + excepted = np.array([-4, -3, -2, -1, 0, 1, 2, 3, 4, + -4, -3, -2, -1, 0, 1, 2, 3, 4, + -4, -3, -2, -1, 0, 1, 2, 3, 4], dtype=np.float32).reshape([1, 3, 3, 3]) + + result = run_op_node([data], ng.mvn, axes, normalize_variance, epsilon, eps_mode) + + assert np.allclose(result, excepted) + + +def test_mvn(): + data = np.array([1, 2, 3, 4, 5, 6, 7, 8, 9, + 1, 2, 3, 4, 5, 6, 7, 8, 9, + 1, 2, 3, 4, 5, 6, 7, 8, 9], dtype=np.float32).reshape([1, 3, 3, 3]) + axes = np.array([2, 3], dtype=np.int64) + epsilon = 1e-9 + normalize_variance = True + eps_mode = "outside_sqrt" + excepted = np.array([-1.5491934, -1.161895, -0.7745967, + -0.38729835, 0., 0.38729835, + 0.7745967, 1.161895, 1.5491934, + -1.5491934, -1.161895, -0.7745967, + -0.38729835, 0., 0.38729835, + 0.7745967, 1.161895, 1.5491934, + -1.5491934, -1.161895, -0.7745967, + -0.38729835, 0., 0.38729835, + 0.7745967, 1.161895, 1.5491934], dtype=np.float32).reshape([1, 3, 3, 3]) + + result = run_op_node([data], ng.mvn, axes, normalize_variance, epsilon, eps_mode) + + assert np.allclose(result, excepted) diff --git a/ngraph/python/tests/test_onnx/test_backend.py b/ngraph/python/tests/test_onnx/test_backend.py index 6f56f0fac3e79e..5deda8cdf9e9fb 100644 --- a/ngraph/python/tests/test_onnx/test_backend.py +++ b/ngraph/python/tests/test_onnx/test_backend.py @@ -235,8 +235,7 @@ def expect_fail(test_case_path, xfail): # type: (str) -> None "OnnxBackendNodeModelTest.test_argmin_negative_axis_keepdims_random_select_last_index_cpu", "OnnxBackendNodeModelTest.test_argmin_no_keepdims_random_select_last_index_cpu"), (xfail_issue_38091, - "OnnxBackendNodeModelTest.test_gather_negative_indices_cpu", - "OnnxBackendNodeModelTest.test_mvn_cpu",), + "OnnxBackendNodeModelTest.test_gather_negative_indices_cpu"), (xfail_issue_47317, "OnnxBackendPyTorchOperatorModelTest.test_operator_add_size1_broadcast_cpu", "OnnxBackendPyTorchOperatorModelTest.test_operator_add_size1_singleton_broadcast_cpu",), @@ -682,7 +681,7 @@ def expect_fail(test_case_path, xfail): # type: (str) -> None "OnnxBackendNodeModelTest.test_squeeze_cpu", "OnnxBackendNodeModelTest.test_squeeze_negative_axes_cpu",), (xfail_issue_44976, - "OnnxBackendNodeModelTest.test_quantizelinear_axis_cpu",) + "OnnxBackendNodeModelTest.test_quantizelinear_axis_cpu",), ] for test_group in tests_expected_to_fail: diff --git a/ngraph/python/tests/test_onnx/test_zoo_models.py b/ngraph/python/tests/test_onnx/test_zoo_models.py index 9b1bf9550c8972..643c95eb0bbac2 100644 --- a/ngraph/python/tests/test_onnx/test_zoo_models.py +++ b/ngraph/python/tests/test_onnx/test_zoo_models.py @@ -36,9 +36,6 @@ xfail_issue_38726, xfail_issue_40686, xfail_issue_39704, - xfail_issue_43208, - xfail_issue_43209, - xfail_issue_43213, xfail_issue_37973, xfail_issue_47430, xfail_issue_47495) @@ -127,6 +124,9 @@ def tinyyolov3_post_processing(outputs : Sequence[Any]) -> Sequence[Any]: "yolov4": {"atol": 1e-04, "rtol": 0.001}, "tinyyolov3": {"atol": 1e-04, "rtol": 0.001}, "tiny-yolov3-11": {"atol": 1e-04, "rtol": 0.001}, + "GPT2": {"atol": 5e-06, "rtol": 0.01}, + "GPT-2-LM-HEAD": {"atol": 4e-06}, + "test_retinanet_resnet101": {"atol": 1.3e-06}, } zoo_models = [] @@ -177,9 +177,6 @@ def tinyyolov3_post_processing(outputs : Sequence[Any]) -> Sequence[Any]: execution_xfail_list = [ # ONNX Model Zoo (xfail_issue_39704, "test_onnx_model_zoo_vision_object_detection_segmentation_duc_model_ResNet101_DUC_7_ResNet101_DUC_HDC_ResNet101_DUC_HDC_cpu"), - (xfail_issue_43213, "test_onnx_model_zoo_vision_object_detection_segmentation_retinanet_model_retinanet_9_test_retinanet_resnet101_retinanet_9_cpu"), - (xfail_issue_43208, "test_onnx_model_zoo_text_machine_comprehension_gpt_2_model_gpt2_10_GPT2_model_cpu"), - (xfail_issue_43209, "test_onnx_model_zoo_text_machine_comprehension_gpt_2_model_gpt2_lm_head_10_GPT_2_LM_HEAD_model_cpu"), (xfail_issue_40957, "test_onnx_model_zoo_text_machine_comprehension_roberta_model_roberta_base_11_roberta_base_11_roberta_base_11_cpu"), (xfail_issue_40957, "test_onnx_model_zoo_text_machine_comprehension_bert_squad_model_bertsquad_8_download_sample_8_bertsquad8_cpu"), (xfail_issue_39669, "test_onnx_model_zoo_text_machine_comprehension_t5_model_t5_encoder_12_t5_encoder_cpu"), diff --git a/ngraph/test/CMakeLists.txt b/ngraph/test/CMakeLists.txt index 8465d01bc90c5c..fc7f38e31deccd 100644 --- a/ngraph/test/CMakeLists.txt +++ b/ngraph/test/CMakeLists.txt @@ -367,7 +367,8 @@ if (NGRAPH_ONNX_IMPORT_ENABLE AND NOT NGRAPH_USE_PROTOBUF_LITE) list(APPEND SRC onnx/onnx_import_exceptions.cpp onnx/onnx_import_library.cpp - onnx/onnx_editor.cpp) + onnx/onnx_editor.cpp + onnx/onnx_tensor_names.cpp) endif() foreach(BACKEND_NAME ${ACTIVE_BACKEND_LIST}) diff --git a/ngraph/test/attributes.cpp b/ngraph/test/attributes.cpp index e64960316936b5..f94497f258533b 100644 --- a/ngraph/test/attributes.cpp +++ b/ngraph/test/attributes.cpp @@ -517,8 +517,8 @@ TEST(attributes, max_pool_op) TEST(attributes, mod_op) { NodeBuilder::get_ops().register_factory(); - auto A = make_shared(element::f32, Shape{0, 2}); - auto B = make_shared(element::f32, Shape{2, 0}); + auto A = make_shared(element::f32, Shape{1, 2}); + auto B = make_shared(element::f32, Shape{2, 1}); auto auto_broadcast = op::AutoBroadcastType::NUMPY; diff --git a/ngraph/test/backend/convolution.in.cpp b/ngraph/test/backend/convolution.in.cpp index 752d5916f9326b..731f91143b2001 100644 --- a/ngraph/test/backend/convolution.in.cpp +++ b/ngraph/test/backend/convolution.in.cpp @@ -20,8 +20,10 @@ #include "runtime/backend.hpp" #include "util/all_close.hpp" #include "util/all_close_f.hpp" +#include "util/engine/test_engines.hpp" #include "util/known_element_types.hpp" #include "util/ndarray.hpp" +#include "util/test_case.hpp" #include "util/test_control.hpp" #include "util/test_tools.hpp" @@ -29,7 +31,1034 @@ using namespace std; using namespace ngraph; static string s_manifest = "${MANIFEST}"; +using TestEngine = test::ENGINE_CLASS_NAME(${BACKEND_NAME}); +static void ConvolutionTest(const std::vector& inputs, + const Shape inputs_shape, + const std::vector& filters, + const Shape filter_shape, + const std::vector& outputs, + const Shape outputs_shape, + const Strides& strides, + const CoordinateDiff& padding, + const Strides& dilations) +{ + const CoordinateDiff pads_begin{padding}; + const CoordinateDiff pads_end{padding}; + const op::PadType auto_pad{op::PadType::EXPLICIT}; + + auto inputs_param = make_shared(element::f32, inputs_shape); + auto filters_param = make_shared(element::f32, filter_shape); + auto conv = make_shared( + inputs_param, filters_param, strides, pads_begin, pads_end, dilations, auto_pad); + auto f = make_shared(conv, ParameterVector{inputs_param, filters_param}); + + auto test_case = test::TestCase(f); + test_case.add_input(inputs); + test_case.add_input(filters); + test_case.add_expected_output(outputs_shape, outputs); + test_case.run(); +} + +// --------------------- 1D convolution ------------------------------------------ +// clang-format off +NGRAPH_TEST(${BACKEND_NAME}, convolution_1D_1batch_1channel) +{ + const Strides strides{1}; + const CoordinateDiff padding{0}; + const Strides dilations{1}; + + const Shape inputs_shape{1, 1, 6}; + const std::vector inputs{1.0f, 3.0f, 3.0f, 0.0f, 1.0f, 2.0f}; + + const Shape filter_shape{1, 1, 3}; + const std::vector filters{2.0f, 0.0f, 1.0f}; + + const Shape outputs_shape{1, 1, 4}; + const std::vector outputs{5.0f, 6.0f, 7.0f, 2.0f}; + + ConvolutionTest(inputs, inputs_shape, filters, filter_shape, outputs, outputs_shape, + strides, padding, dilations); +} + +NGRAPH_TEST(${BACKEND_NAME}, convolution_1D_1batch_1channel_padding) +{ + const Strides strides{1}; + const CoordinateDiff padding{1}; + const Strides dilations{1}; + + const Shape inputs_shape{1, 1, 4}; + const std::vector inputs{1.0f, 3.0f, 3.0f, 0.0f}; + + const Shape filter_shape{1, 1, 3}; + const std::vector filters{2.0f, 0.0f, 1.0f}; + + const Shape outputs_shape{1, 1, 4}; + const std::vector outputs{3.0f, 5.0f, 6.0f, 6.0f}; + + ConvolutionTest(inputs, inputs_shape, filters, filter_shape, outputs, outputs_shape, + strides, padding, dilations); +} + +NGRAPH_TEST(${BACKEND_NAME}, convolution_1D_1batch_1channel_stride) +{ + const Strides strides{2}; + const CoordinateDiff padding{0}; + const Strides dilations{1}; + + const Shape inputs_shape{1, 1, 5}; + const std::vector inputs{1.0f, 3.0f, 3.0f, 0.0f, 1.0f}; + + const Shape filter_shape{1, 1, 3}; + const std::vector filters{2.0f, 0.0f, 1.0f}; + + const Shape outputs_shape{1, 1, 2}; + const std::vector outputs{5.0f, 7.0f}; + + ConvolutionTest(inputs, inputs_shape, filters, filter_shape, outputs, outputs_shape, + strides, padding, dilations); +} + +NGRAPH_TEST(${BACKEND_NAME}, convolution_1D_1batch_1channel_dilation) +{ + const Strides strides{1}; + const CoordinateDiff padding{0}; + const Strides dilations{2}; + + const Shape inputs_shape{1, 1, 7}; + const std::vector inputs{1.0f, 3.0f, 3.0f, 0.0f, 1.0f, 2.0f, 3.0f}; + + const Shape filter_shape{1, 1, 3}; + const std::vector filters{2.0f, 0.0f, 1.0f}; + + const Shape outputs_shape{1, 1, 3}; + const std::vector outputs{3.0f, 8.0f, 9.0f}; + + ConvolutionTest(inputs, inputs_shape, filters, filter_shape, outputs, outputs_shape, + strides, padding, dilations); +} + +NGRAPH_TEST(${BACKEND_NAME}, convolution_1D_1batch_1channel_padding_stride_dilation) +{ + const Strides strides{2}; + const CoordinateDiff padding{2}; + const Strides dilations{2}; + + const Shape inputs_shape{1, 1, 7}; + const std::vector inputs{1.0f, 3.0f, 3.0f, 0.0f, 1.0f, 2.0f, 3.0f}; + + const Shape filter_shape{1, 1, 3}; + const std::vector filters{2.0f, 0.0f, 1.0f}; + + const Shape outputs_shape{1, 1, 4}; + const std::vector outputs{3.0f, 3.0f, 9.0f, 2.0f}; + + ConvolutionTest(inputs, inputs_shape, filters, filter_shape, outputs, outputs_shape, + strides, padding, dilations); +} + +NGRAPH_TEST(${BACKEND_NAME}, convolution_1D_1batch_2channel) +{ + const Strides strides{1}; + const CoordinateDiff padding{0}; + const Strides dilations{1}; + + const Shape inputs_shape{1, 2, 4}; + const std::vector inputs{ + // channel 1 + 1.0f, 3.0f, 2.0f, 1.0f, + // channel 2 + 2.0f, 2.0f, 3.0f, 1.0f}; + + const Shape filter_shape{1, 2, 3}; + const std::vector filters{ + // channel 1 + 2.0f, 0.0f, 1.0f, + // channel 2 + 1.0f, 0.0f, 2.0f}; + + const Shape outputs_shape{1, 1, 2}; + const std::vector outputs{12.0f, 11.0f}; + + ConvolutionTest(inputs, inputs_shape, filters, filter_shape, outputs, outputs_shape, + strides, padding, dilations); +} + +NGRAPH_TEST(${BACKEND_NAME}, convolution_1D_1batch_2filter) +{ + const Strides strides{1}; + const CoordinateDiff padding{0}; + const Strides dilations{1}; + + const Shape inputs_shape{1, 1, 4}; + const std::vector inputs{1.0f, 3.0f, 2.0f, 1.0f}; + + const Shape filter_shape{2, 1, 3}; + const std::vector filters{ + // filter 1 + 2.0f, 0.0f, 1.0f, + // filter 2 + 1.0f, 0.0f, 2.0f}; + + const Shape outputs_shape{1, 2, 2}; + const std::vector outputs{ + // channel 1 + 4.0f, 7.0f, + // channel 2 + 5.0f, 5.0f}; + + ConvolutionTest(inputs, inputs_shape, filters, filter_shape, outputs, outputs_shape, + strides, padding, dilations); +} + +NGRAPH_TEST(${BACKEND_NAME}, convolution_1D_2batch_1channel) +{ + const Strides strides{1}; + const CoordinateDiff padding{0}; + const Strides dilations{1}; + + const Shape inputs_shape{2, 1, 4}; + const std::vector inputs{ + // batch 1 + 1.0f, 3.0f, 2.0f, 1.0f, + // batch 2 + 2.0f, 2.0f, 3.0f, 1.0f}; + + const Shape filter_shape{1, 1, 3}; + const std::vector filters{2.0f, 0.0f, 1.0f}; + + const Shape outputs_shape{2, 1, 2}; + const std::vector outputs{ + // batch 1 + 4.0f, 7.0f, + // batch 2 + 7.0f, 5.0f}; + + ConvolutionTest(inputs, inputs_shape, filters, filter_shape, outputs, outputs_shape, + strides, padding, dilations); +} + +// --------------------- 2D convolution ------------------------------------------ +NGRAPH_TEST(${BACKEND_NAME}, convolution_2D_1batch_1channel) +{ + const Strides strides{1, 1}; + const CoordinateDiff padding{0, 0}; + const Strides dilations{1, 1}; + + const Shape inputs_shape{1, 1, 4, 4}; + const std::vector inputs{1.0f, 3.0f, 5.0f, 7.0f, + 7.0f, 5.0f, 3.0f, 1.0f, + 2.0f, 4.0f, 6.0f, 8.0f, + 8.0f, 6.0f, 4.0f, 2.0f}; + + const Shape filter_shape{1, 1, 3, 3}; + const std::vector filters{1.0f, 2.0f, 3.0f, + 0.0f, 1.0f, 0.0f, + 3.0f, 2.0f, 1.0f}; + + const Shape outputs_shape{1, 1, 2, 2}; + const std::vector outputs{47.0f, 69.0f, + 70.0f, 48.0f}; + + ConvolutionTest(inputs, inputs_shape, filters, filter_shape, outputs, outputs_shape, + strides, padding, dilations); +} + +NGRAPH_TEST(${BACKEND_NAME}, convolution_2D_1batch_1channel_padding) +{ + const Strides strides{1, 1}; + const CoordinateDiff padding{1, 1}; + const Strides dilations{1, 1}; + + const Shape inputs_shape{1, 1, 4, 4}; + const std::vector inputs{1.0f, 3.0f, 5.0f, 7.0f, + 7.0f, 5.0f, 3.0f, 1.0f, + 2.0f, 4.0f, 6.0f, 8.0f, + 8.0f, 6.0f, 4.0f, 2.0f}; + + const Shape filter_shape{1, 1, 3, 3}; + const std::vector filters{1.0f, 2.0f, 3.0f, + 0.0f, 1.0f, 0.0f, + 2.0f, 1.0f, 2.0f}; + + const Shape outputs_shape{1, 1, 4, 4}; + const std::vector outputs{18.0f, 28.0f, 20.0f, 14.0f, + 28.0f, 47.0f, 67.0f, 40.0f, + 51.0f, 60.0f, 40.0f, 23.0f, + 24.0f, 34.0f, 44.0f, 24.0f}; + + ConvolutionTest(inputs, inputs_shape, filters, filter_shape, outputs, outputs_shape, + strides, padding, dilations); +} + +NGRAPH_TEST(${BACKEND_NAME}, convolution_2D_1batch_1channel_stride) +{ + const Strides strides{2, 2}; + const CoordinateDiff padding{0, 0}; + const Strides dilations{1, 1}; + + const Shape inputs_shape{1, 1, 5, 5}; + const std::vector inputs{1.0f, 3.0f, 5.0f, 7.0f, 9.0f, + 7.0f, 5.0f, 3.0f, 1.0f, 0.0f, + 2.0f, 4.0f, 6.0f, 8.0f, 10.0f, + 8.0f, 6.0f, 4.0f, 2.0f, 0.0f, + 2.0f, 4.0f, 6.0f, 8.0f, 10.0f}; + + const Shape filter_shape{1, 1, 3, 3}; + const std::vector filters{1.0f, 2.0f, 3.0f, + 1.0f, 1.0f, 1.0f, + 3.0f, 2.0f, 1.0f}; + + const Shape outputs_shape{1, 1, 2, 2}; + const std::vector outputs{57.0f, 94.0f, + 66.0f, 102.0f}; + + ConvolutionTest(inputs, inputs_shape, filters, filter_shape, outputs, outputs_shape, + strides, padding, dilations); +} + +NGRAPH_TEST(${BACKEND_NAME}, convolution_2D_1batch_1channel_dilation) +{ + const Strides strides{1, 1}; + const CoordinateDiff padding{0, 0}; + const Strides dilations{2, 2}; + + const Shape inputs_shape{1, 1, 7, 7}; + const std::vector inputs{1.0f, 3.0f, 5.0f, 7.0f, 9.0f, 11.0f, 13.0f, + 7.0f, 5.0f, 3.0f, 1.0f, -1.0f, -3.0f, -5.0f, + 2.0f, 4.0f, 6.0f, 8.0f, 10.0f, 12.0f, 14.0f, + 8.0f, 6.0f, 4.0f, 2.0f, 0.0f, -2.0f, -4.0f, + 2.0f, 4.0f, 6.0f, 8.0f, 10.0f, 12.0f, 14.0f, + 7.0f, 5.0f, 3.0f, 1.0f, -1.0f, -3.0f, -5.0f, + 8.0f, 6.0f, 4.0f, 2.0f, 0.0f, -2.0f, -4.0f}; + + const Shape filter_shape{1, 1, 3, 3}; + const std::vector filters{1.0f, 2.0f, 3.0f, + 1.0f, 1.0f, 0.0f, + 3.0f, 1.0f, 2.0f}; + + const Shape outputs_shape{1, 1, 3, 3}; + const std::vector outputs{78.0f, 106.0f, 134.0f, + 44.0f, 16.0f, -12.0f, + 80.0f, 84.0f, 88.0f}; + + ConvolutionTest(inputs, inputs_shape, filters, filter_shape, outputs, outputs_shape, + strides, padding, dilations); +} + +NGRAPH_TEST(${BACKEND_NAME}, convolution_2D_1batch_1channel_padding_strides_dilation) +{ + const Strides strides{2, 2}; + const CoordinateDiff padding{2, 2}; + const Strides dilations{2, 2}; + + const Shape inputs_shape{1, 1, 7, 7}; + const std::vector inputs{1.0f, 3.0f, 5.0f, 7.0f, 9.0f, 11.0f, 13.0f, + 7.0f, 5.0f, 3.0f, 1.0f, -1.0f, -3.0f, -5.0f, + 2.0f, 4.0f, 6.0f, 8.0f, 10.0f, 12.0f, 14.0f, + 8.0f, 6.0f, 4.0f, 2.0f, 0.0f, -2.0f, -4.0f, + 2.0f, 4.0f, 6.0f, 8.0f, 10.0f, 12.0f, 14.0f, + 7.0f, 5.0f, 3.0f, 1.0f, -1.0f, -3.0f, -5.0f, + 8.0f, 6.0f, 4.0f, 2.0f, 0.0f, -2.0f, -4.0f}; + + const Shape filter_shape{1, 1, 3, 3}; + const std::vector filters{1.0f, 2.0f, 3.0f, + 1.0f, 1.0f, 0.0f, + 3.0f, 1.0f, 2.0f}; + + const Shape outputs_shape{1, 1, 4, 4}; + const std::vector outputs{15.0f, 38.0f, 70.0f, 66.0f, + 33.0f, 78.0f, 134.0f, 103.0f, + 40.0f, 80.0f, 88.0f, 58.0f, + 30.0f, 56.0f, 72.0f, 34.0f}; + + ConvolutionTest(inputs, inputs_shape, filters, filter_shape, outputs, outputs_shape, + strides, padding, dilations); +} + +NGRAPH_TEST(${BACKEND_NAME}, convolution_2D_1batch_2channel) +{ + const Strides strides{1, 1}; + const CoordinateDiff padding{0, 0}; + const Strides dilations{1, 1}; + + const Shape inputs_shape{1, 2, 4, 4}; + const std::vector inputs{ + // channel 1 + 1.0f, 3.0f, 5.0f, 7.0f, + 7.0f, 5.0f, 3.0f, 1.0f, + 2.0f, 4.0f, 6.0f, 8.0f, + 8.0f, 6.0f, 4.0f, 2.0f, + // channel 2 + -1.0f, 3.0f, -5.0f, 7.0f, + 7.0f, -5.0f, 3.0f, -1.0f, + -2.0f, 4.0f, -6.0f, 8.0f, + 8.0f, -6.0f, 4.0f, -2.0f}; + + const Shape filter_shape{1, 2, 3, 3}; + const std::vector filters{ + // channel 1 + 5.0f, 3.0f, 5.0f, + 1.0f, 3.0f, 1.0f, + 4.0f, 2.0f, 4.0f, + // channel 2 + -5.0f, 3.0f, 5.0f, + 1.0f, -3.0f, 1.0f, + 4.0f, 2.0f, -4.0f}; + + const Shape outputs_shape{1, 1, 2, 2}; + const std::vector outputs{142.0f, 102.0f, + 94.0f, 160.0f}; + + ConvolutionTest(inputs, inputs_shape, filters, filter_shape, outputs, outputs_shape, + strides, padding, dilations); +} + +NGRAPH_TEST(${BACKEND_NAME}, convolution_2D_1batch_2filter) +{ + const Strides strides{1, 1}; + const CoordinateDiff padding{0, 0}; + const Strides dilations{1, 1}; + + const Shape inputs_shape{1, 1, 4, 4}; + const std::vector inputs{ + 1.0f, 3.0f, 5.0f, 7.0f, + 7.0f, 5.0f, 3.0f, 1.0f, + 2.0f, 4.0f, 6.0f, 8.0f, + 8.0f, 6.0f, 4.0f, 2.0f}; + + const Shape filter_shape{2, 1, 3, 3}; + const std::vector filters{ + // channel 1 + 5.0f, 3.0f, 5.0f, + 1.0f, 3.0f, 1.0f, + 4.0f, 2.0f, 4.0f, + // channel 2 + -5.0f, 3.0f, 5.0f, + 1.0f, -3.0f, 1.0f, + 4.0f, 2.0f, -4.0f}; + + const Shape outputs_shape{1, 2, 2, 2}; + const std::vector outputs{ + // channel 1 + 104.0f, 140.0f, + 145.0f, 109.0f, + // channel 2 + 16.0f, 28.0f, + 19.0f, 7.0f}; + + ConvolutionTest(inputs, inputs_shape, filters, filter_shape, outputs, outputs_shape, + strides, padding, dilations); +} + +NGRAPH_TEST(${BACKEND_NAME}, convolution_2D_2batch_1channel) +{ + const Strides strides{1, 1}; + const CoordinateDiff padding{0, 0}; + const Strides dilations{1, 1}; + + const Shape inputs_shape{2, 1, 4, 4}; + const std::vector inputs{ + // batch 1 + 1.0f, 3.0f, 2.0f, 1.0f, + 1.0f, 3.0f, 3.0f, 1.0f, + 2.0f, 1.0f, 1.0f, 3.0f, + 3.0f, 2.0f, 3.0f, 3.0f, + // batch 2 + -1.0f, 3.0f, 2.0f, -1.0f, + 1.0f, 3.0f, -3.0f, 1.0f, + -2.0f, -1.0f, 1.0f, 3.0f, + 3.0f, 2.0f, 3.0f, -3.0f}; + + const Shape filter_shape{1, 1, 3, 3}; + const std::vector filters{-5.0f, 3.0f, 5.0f, + 1.0f, -3.0f, 1.0f, + 4.0f, 2.0f, -4.0f}; + + const Shape outputs_shape{2, 1, 2, 2}; + const std::vector outputs{ + // batch 1 + 15.0f, -15.0f, + 23.0f, 2.0f, + // batch 2 + -1.0f, -15.0f, + -5.0f, 6.0f}; + + + ConvolutionTest(inputs, inputs_shape, filters, filter_shape, outputs, outputs_shape, + strides, padding, dilations); +} + +// --------------------- 3D convolution ------------------------------------------ +NGRAPH_TEST(${BACKEND_NAME}, convolution_3D_1batch_1channel) +{ + const Strides strides{1, 1, 1}; + const CoordinateDiff padding{0, 0, 0}; + const Strides dilations{1, 1, 1}; + + const Shape inputs_shape{1, 1, 4, 4, 4}; + const std::vector inputs{ + // depth: 1 + 1.0f, 3.0f, 2.0f, 1.0f, + 1.0f, 3.0f, 3.0f, 1.0f, + 2.0f, 1.0f, 1.0f, 3.0f, + 3.0f, 2.0f, 3.0f, 3.0f, + // depth: 2 + 1.0f, 3.0f, 2.0f, 1.0f, + 1.0f, 3.0f, 3.0f, 1.0f, + 2.0f, 1.0f, 1.0f, 3.0f, + 3.0f, 2.0f, 3.0f, 3.0f, + // depth: 3 + 1.0f, 3.0f, 2.0f, 1.0f, + 1.0f, 3.0f, 3.0f, 1.0f, + 2.0f, 1.0f, 1.0f, 3.0f, + 3.0f, 2.0f, 3.0f, 3.0f, + // depth: 4 + 1.0f, 3.0f, 2.0f, 1.0f, + 1.0f, 3.0f, 3.0f, 1.0f, + 2.0f, 1.0f, 1.0f, 3.0f, + 3.0f, 2.0f, 3.0f, 3.0f + }; + + const Shape filter_shape{1, 1, 3, 3, 3}; + const std::vector filters{ + // depth: 1 + 1.0f, 2.0f, 3.0f, + 0.0f, 1.0f, 0.0f, + 2.0f, 1.0f, 2.0f, + // depth: 2 + 1.0f, 2.0f, 3.0f, + 0.0f, 1.0f, 0.0f, + 2.0f, 1.0f, 2.0f, + // depth: 3 + 1.0f, 2.0f, 3.0f, + 0.0f, 1.0f, 0.0f, + 2.0f, 1.0f, 2.0f}; + + const Shape outputs_shape{1, 1, 2, 2, 2}; + const std::vector outputs{ + // depth: 1 + 69.0f, 66.0f, + 93.0f, 78.0f, + // depth: 2 + 69.0f, 66.0f, + 93.0f, 78.0f}; + + ConvolutionTest(inputs, inputs_shape, filters, filter_shape, outputs, outputs_shape, + strides, padding, dilations); +} + +NGRAPH_TEST(${BACKEND_NAME}, convolution_3D_1batch_1channel_padding) +{ + const Strides strides{1, 1, 1}; + const CoordinateDiff padding{1, 1, 1}; + const Strides dilations{1, 1, 1}; + + const Shape inputs_shape{1, 1, 4, 4, 4}; + const std::vector inputs{ + // depth: 1 + 1.0f, 3.0f, 2.0f, 1.0f, + 1.0f, 3.0f, 3.0f, 1.0f, + 2.0f, 1.0f, 1.0f, 3.0f, + 3.0f, 2.0f, 3.0f, 3.0f, + // depth: 2 + 1.0f, 3.0f, 2.0f, 1.0f, + 1.0f, 3.0f, 3.0f, 1.0f, + 2.0f, 1.0f, 1.0f, 3.0f, + 3.0f, 2.0f, 3.0f, 3.0f, + // depth: 3 + 1.0f, 3.0f, 2.0f, 1.0f, + 1.0f, 3.0f, 3.0f, 1.0f, + 2.0f, 1.0f, 1.0f, 3.0f, + 3.0f, 2.0f, 3.0f, 3.0f, + // depth: 4 + 1.0f, 3.0f, 2.0f, 1.0f, + 1.0f, 3.0f, 3.0f, 1.0f, + 2.0f, 1.0f, 1.0f, 3.0f, + 3.0f, 2.0f, 3.0f, 3.0f + }; + + const Shape filter_shape{1, 1, 3, 3, 3}; + const std::vector filters{ + // depth: 1 + 1.0f, 2.0f, 3.0f, + 0.0f, 1.0f, 0.0f, + 2.0f, 1.0f, 2.0f, + // depth: 2 + 1.0f, 2.0f, 3.0f, + 0.0f, 1.0f, 0.0f, + 2.0f, 1.0f, 2.0f, + // depth: 3 + 1.0f, 2.0f, 3.0f, + 0.0f, 1.0f, 0.0f, + 2.0f, 1.0f, 2.0f}; + + const Shape outputs_shape{1, 1, 4, 4, 4}; + const std::vector outputs{ + // depth: 1 + 16.0f, 28.0f, 26.0f, 16.0f, + 32.0f, 46.0f, 44.0f, 20.0f, + 40.0f, 62.0f, 52.0f, 34.0f, + 20.0f, 18.0f, 30.0f, 20.0f, + // depth: 2 + 24.0f, 42.0f, 39.0f, 24.0f, + 48.0f, 69.0f, 66.0f, 30.0f, + 60.0f, 93.0f, 78.0f, 51.0f, + 30.0f, 27.0f, 45.0f, 30.0f, + // depth: 3 + 24.0f, 42.0f, 39.0f, 24.0f, + 48.0f, 69.0f, 66.0f, 30.0f, + 60.0f, 93.0f, 78.0f, 51.0f, + 30.0f, 27.0f, 45.0f, 30.0f, + // depth: 4 + 16.0f, 28.0f, 26.0f, 16.0f, + 32.0f, 46.0f, 44.0f, 20.0f, + 40.0f, 62.0f, 52.0f, 34.0f, + 20.0f, 18.0f, 30.0f, 20.0f,}; + + ConvolutionTest(inputs, inputs_shape, filters, filter_shape, outputs, outputs_shape, + strides, padding, dilations); +} + +NGRAPH_TEST(${BACKEND_NAME}, convolution_3D_1batch_1channel_stride) +{ + const Strides strides{2, 2, 2}; + const CoordinateDiff padding{0, 0, 0}; + const Strides dilations{1, 1, 1}; + + const Shape inputs_shape{1, 1, 5, 5, 5}; + const std::vector inputs{ + // depth: 1 + 1.0f, 3.0f, 2.0f, 1.0f, 2.0f, + 1.0f, 3.0f, 3.0f, 1.0f, 2.0f, + 2.0f, 1.0f, 1.0f, 3.0f, 2.0f, + 3.0f, 2.0f, 3.0f, 3.0f, 2.0f, + 3.0f, 2.0f, 3.0f, 3.0f, 2.0f, + // depth: 2 + 1.0f, 3.0f, 2.0f, 1.0f, 2.0f, + 1.0f, 3.0f, 3.0f, 1.0f, 2.0f, + 2.0f, 1.0f, 1.0f, 3.0f, 2.0f, + 3.0f, 2.0f, 3.0f, 3.0f, 2.0f, + 3.0f, 2.0f, 3.0f, 3.0f, 2.0f, + // depth: 3 + 1.0f, 3.0f, 2.0f, 1.0f, 2.0f, + 1.0f, 3.0f, 3.0f, 1.0f, 2.0f, + 2.0f, 1.0f, 1.0f, 3.0f, 2.0f, + 3.0f, 2.0f, 3.0f, 3.0f, 2.0f, + 3.0f, 2.0f, 3.0f, 3.0f, 2.0f, + // depth: 4 + 1.0f, 3.0f, 2.0f, 1.0f, 2.0f, + 1.0f, 3.0f, 3.0f, 1.0f, 2.0f, + 2.0f, 1.0f, 1.0f, 3.0f, 2.0f, + 3.0f, 2.0f, 3.0f, 3.0f, 2.0f, + 3.0f, 2.0f, 3.0f, 3.0f, 2.0f, + // depth: 5 + 1.0f, 3.0f, 2.0f, 1.0f, 2.0f, + 1.0f, 3.0f, 3.0f, 1.0f, 2.0f, + 2.0f, 1.0f, 1.0f, 3.0f, 2.0f, + 3.0f, 2.0f, 3.0f, 3.0f, 2.0f, + 3.0f, 2.0f, 3.0f, 3.0f, 2.0f, + }; + + const Shape filter_shape{1, 1, 3, 3, 3}; + const std::vector filters{ + // depth: 1 + 1.0f, 2.0f, 3.0f, + 0.0f, 1.0f, 0.0f, + 2.0f, 1.0f, 2.0f, + // depth: 2 + 1.0f, 2.0f, 3.0f, + 0.0f, 1.0f, 0.0f, + 2.0f, 1.0f, 2.0f, + // depth: 3 + 1.0f, 2.0f, 3.0f, + 0.0f, 1.0f, 0.0f, + 2.0f, 1.0f, 2.0f}; + + const Shape outputs_shape{1, 1, 2, 2, 2}; + const std::vector outputs{ + // depth: 1 + 69.0f, 60.0f, + 69.0f, 87.0f, + // depth: 2 + 69.0f, 60.0f, + 69.0f, 87.0f}; + + ConvolutionTest(inputs, inputs_shape, filters, filter_shape, outputs, outputs_shape, + strides, padding, dilations); +} + +NGRAPH_TEST(${BACKEND_NAME}, convolution_3D_1batch_1channel_padding_strides_dilation) +{ + const Strides strides{2, 2, 2}; + const CoordinateDiff padding{2, 2, 2}; + const Strides dilations{2, 2, 2}; + + const Shape inputs_shape{1, 1, 7, 7, 7}; + const std::vector inputs{ + // depth: 1 + 1.0f, 3.0f, 2.0f, 1.0f, 1.0f, 2.0f, 3.0f, + 1.0f, 3.0f, 3.0f, 1.0f, 1.0f, 2.0f, 3.0f, + 2.0f, 1.0f, 1.0f, 3.0f, 1.0f, 2.0f, 3.0f, + 3.0f, 2.0f, 3.0f, 3.0f, 1.0f, 2.0f, 3.0f, + 3.0f, 2.0f, 3.0f, 3.0f, 1.0f, 2.0f, 3.0f, + 3.0f, 2.0f, 3.0f, 3.0f, 1.0f, 2.0f, 3.0f, + 3.0f, 2.0f, 3.0f, 3.0f, 1.0f, 2.0f, 3.0f, + // depth: 2 + 1.0f, 3.0f, 2.0f, 1.0f, 1.0f, 2.0f, 3.0f, + 1.0f, 3.0f, 3.0f, 1.0f, 1.0f, 2.0f, 3.0f, + 2.0f, 1.0f, 1.0f, 3.0f, 1.0f, 2.0f, 3.0f, + 3.0f, 2.0f, 3.0f, 3.0f, 1.0f, 2.0f, 3.0f, + 3.0f, 2.0f, 3.0f, 3.0f, 1.0f, 2.0f, 3.0f, + 3.0f, 2.0f, 3.0f, 3.0f, 1.0f, 2.0f, 3.0f, + 3.0f, 2.0f, 3.0f, 3.0f, 1.0f, 2.0f, 3.0f, + // depth: 3 + 1.0f, 3.0f, 2.0f, 1.0f, 1.0f, 2.0f, 3.0f, + 1.0f, 3.0f, 3.0f, 1.0f, 1.0f, 2.0f, 3.0f, + 2.0f, 1.0f, 1.0f, 3.0f, 1.0f, 2.0f, 3.0f, + 3.0f, 2.0f, 3.0f, 3.0f, 1.0f, 2.0f, 3.0f, + 3.0f, 2.0f, 3.0f, 3.0f, 1.0f, 2.0f, 3.0f, + 3.0f, 2.0f, 3.0f, 3.0f, 1.0f, 2.0f, 3.0f, + 3.0f, 2.0f, 3.0f, 3.0f, 1.0f, 2.0f, 3.0f, + // depth: 4 + 1.0f, 3.0f, 2.0f, 1.0f, 1.0f, 2.0f, 3.0f, + 1.0f, 3.0f, 3.0f, 1.0f, 1.0f, 2.0f, 3.0f, + 2.0f, 1.0f, 1.0f, 3.0f, 1.0f, 2.0f, 3.0f, + 3.0f, 2.0f, 3.0f, 3.0f, 1.0f, 2.0f, 3.0f, + 3.0f, 2.0f, 3.0f, 3.0f, 1.0f, 2.0f, 3.0f, + 3.0f, 2.0f, 3.0f, 3.0f, 1.0f, 2.0f, 3.0f, + 3.0f, 2.0f, 3.0f, 3.0f, 1.0f, 2.0f, 3.0f, + // depth: 5 + 1.0f, 3.0f, 2.0f, 1.0f, 1.0f, 2.0f, 3.0f, + 1.0f, 3.0f, 3.0f, 1.0f, 1.0f, 2.0f, 3.0f, + 2.0f, 1.0f, 1.0f, 3.0f, 1.0f, 2.0f, 3.0f, + 3.0f, 2.0f, 3.0f, 3.0f, 1.0f, 2.0f, 3.0f, + 3.0f, 2.0f, 3.0f, 3.0f, 1.0f, 2.0f, 3.0f, + 3.0f, 2.0f, 3.0f, 3.0f, 1.0f, 2.0f, 3.0f, + 3.0f, 2.0f, 3.0f, 3.0f, 1.0f, 2.0f, 3.0f, + // depth: 6 + 1.0f, 3.0f, 2.0f, 1.0f, 1.0f, 2.0f, 3.0f, + 1.0f, 3.0f, 3.0f, 1.0f, 1.0f, 2.0f, 3.0f, + 2.0f, 1.0f, 1.0f, 3.0f, 1.0f, 2.0f, 3.0f, + 3.0f, 2.0f, 3.0f, 3.0f, 1.0f, 2.0f, 3.0f, + 3.0f, 2.0f, 3.0f, 3.0f, 1.0f, 2.0f, 3.0f, + 3.0f, 2.0f, 3.0f, 3.0f, 1.0f, 2.0f, 3.0f, + 3.0f, 2.0f, 3.0f, 3.0f, 1.0f, 2.0f, 3.0f, + // depth: 7 + 1.0f, 3.0f, 2.0f, 1.0f, 1.0f, 2.0f, 3.0f, + 1.0f, 3.0f, 3.0f, 1.0f, 1.0f, 2.0f, 3.0f, + 2.0f, 1.0f, 1.0f, 3.0f, 1.0f, 2.0f, 3.0f, + 3.0f, 2.0f, 3.0f, 3.0f, 1.0f, 2.0f, 3.0f, + 3.0f, 2.0f, 3.0f, 3.0f, 1.0f, 2.0f, 3.0f, + 3.0f, 2.0f, 3.0f, 3.0f, 1.0f, 2.0f, 3.0f, + 3.0f, 2.0f, 3.0f, 3.0f, 1.0f, 2.0f, 3.0f, + }; + + const Shape filter_shape{1, 1, 3, 3, 3}; + const std::vector filters{ + // depth: 1 + 1.0f, 2.0f, 3.0f, + 0.0f, 1.0f, 0.0f, + 2.0f, 1.0f, 2.0f, + // depth: 2 + 1.0f, 2.0f, 3.0f, + 0.0f, 1.0f, 0.0f, + 2.0f, 1.0f, 2.0f, + // depth: 3 + 1.0f, 2.0f, 3.0f, + 0.0f, 1.0f, 0.0f, + 2.0f, 1.0f, 2.0f}; + + const Shape outputs_shape{1, 1, 4, 4, 4}; + const std::vector outputs{ + // depth: 1 + 10.0f, 18.0f, 20.0f, 16.0f, + 38.0f, 40.0f, 54.0f, 30.0f, + 38.0f, 42.0f, 52.0f, 30.0f, + 36.0f, 30.0f, 30.0f, 20.0f, + // depth: 2 + 15.0f, 27.0f, 30.0f, 24.0f, + 57.0f, 60.0f, 81.0f, 45.0f, + 57.0f, 63.0f, 78.0f, 45.0f, + 54.0f, 45.0f, 45.0f, 30.0f, + // depth: 3 + 15.0f, 27.0f, 30.0f, 24.0f, + 57.0f, 60.0f, 81.0f, 45.0f, + 57.0f, 63.0f, 78.0f, 45.0f, + 54.0f, 45.0f, 45.0f, 30.0f, + // depth: 4 + 10.0f, 18.0f, 20.0f, 16.0f, + 38.0f, 40.0f, 54.0f, 30.0f, + 38.0f, 42.0f, 52.0f, 30.0f, + 36.0f, 30.0f, 30.0f, 20.0f}; + + ConvolutionTest(inputs, inputs_shape, filters, filter_shape, outputs, outputs_shape, + strides, padding, dilations); +} + +NGRAPH_TEST(${BACKEND_NAME}, convolution_3D_1batch_2channel) +{ + const Strides strides{1, 1, 1}; + const CoordinateDiff padding{0, 0, 0}; + const Strides dilations{1, 1, 1}; + + const Shape inputs_shape{1, 2, 4, 4, 4}; + const std::vector inputs{ + // -- channel 1 -- + // depth: 1 + 1.0f, 3.0f, 2.0f, 1.0f, + 1.0f, 3.0f, 3.0f, 1.0f, + 2.0f, 1.0f, 1.0f, 3.0f, + 3.0f, 2.0f, 3.0f, 3.0f, + // depth: 2 + 1.0f, 3.0f, 2.0f, 1.0f, + 1.0f, 3.0f, 3.0f, 1.0f, + 2.0f, 1.0f, 1.0f, 3.0f, + 3.0f, 2.0f, 3.0f, 3.0f, + // depth: 3 + 1.0f, 3.0f, 2.0f, 1.0f, + 1.0f, 3.0f, 3.0f, 1.0f, + 2.0f, 1.0f, 1.0f, 3.0f, + 3.0f, 2.0f, 3.0f, 3.0f, + // depth: 4 + 1.0f, 3.0f, 2.0f, 1.0f, + 1.0f, 3.0f, 3.0f, 1.0f, + 2.0f, 1.0f, 1.0f, 3.0f, + 3.0f, 2.0f, 3.0f, 3.0f, + // -- channel 2 -- + // depth: 1 + 1.0f, 3.0f, 2.0f, 1.0f, + 1.0f, 3.0f, 3.0f, 1.0f, + 2.0f, 1.0f, 1.0f, 3.0f, + 3.0f, 2.0f, 3.0f, 3.0f, + // depth: 2 + 1.0f, 3.0f, 2.0f, 1.0f, + 1.0f, 3.0f, 3.0f, 1.0f, + 2.0f, 1.0f, 1.0f, 3.0f, + 3.0f, 2.0f, 3.0f, 3.0f, + // depth: 3 + 1.0f, 3.0f, 2.0f, 1.0f, + 1.0f, 3.0f, 3.0f, 1.0f, + 2.0f, 1.0f, 1.0f, 3.0f, + 3.0f, 2.0f, 3.0f, 3.0f, + // depth: 4 + 1.0f, 3.0f, 2.0f, 1.0f, + 1.0f, 3.0f, 3.0f, 1.0f, + 2.0f, 1.0f, 1.0f, 3.0f, + 3.0f, 2.0f, 3.0f, 3.0f + }; + + const Shape filter_shape{1, 2, 3, 3, 3}; + const std::vector filters{ + // -- channel 1 -- + // depth: 1 + 1.0f, 2.0f, 3.0f, + 0.0f, 1.0f, 0.0f, + 2.0f, 1.0f, 2.0f, + // depth: 2 + 1.0f, 2.0f, 3.0f, + 0.0f, 1.0f, 0.0f, + 2.0f, 1.0f, 2.0f, + // depth: 3 + 1.0f, 2.0f, 3.0f, + 0.0f, 1.0f, 0.0f, + 2.0f, 1.0f, 2.0f, + // -- channel 2 -- + // depth: 1 + 1.0f, 2.0f, 3.0f, + 0.0f, 1.0f, 0.0f, + 2.0f, 1.0f, 2.0f, + // depth: 2 + 1.0f, 2.0f, 3.0f, + 0.0f, 1.0f, 0.0f, + 2.0f, 1.0f, 2.0f, + // depth: 3 + 1.0f, 2.0f, 3.0f, + 0.0f, 1.0f, 0.0f, + 2.0f, 1.0f, 2.0f + }; + + const Shape outputs_shape{1, 1, 2, 2, 2}; + const std::vector outputs{ + // depth: 1 + 138.0f, 132.0f, + 186.0f, 156.0f, + // depth: 2 + 138.0f, 132.0f, + 186.0f, 156.0f}; + + ConvolutionTest(inputs, inputs_shape, filters, filter_shape, outputs, outputs_shape, + strides, padding, dilations); +} + +NGRAPH_TEST(${BACKEND_NAME}, convolution_3D_1batch_2filter) +{ + const Strides strides{1, 1, 1}; + const CoordinateDiff padding{0, 0, 0}; + const Strides dilations{1, 1, 1}; + + const Shape inputs_shape{1, 1, 4, 4, 4}; + const std::vector inputs{ + // depth: 1 + 1.0f, 3.0f, 2.0f, 1.0f, + 1.0f, 3.0f, 3.0f, 1.0f, + 2.0f, 1.0f, 1.0f, 3.0f, + 3.0f, 2.0f, 3.0f, 3.0f, + // depth: 2 + 1.0f, 3.0f, 2.0f, 1.0f, + 1.0f, 3.0f, 3.0f, 1.0f, + 2.0f, 1.0f, 1.0f, 3.0f, + 3.0f, 2.0f, 3.0f, 3.0f, + // depth: 3 + 1.0f, 3.0f, 2.0f, 1.0f, + 1.0f, 3.0f, 3.0f, 1.0f, + 2.0f, 1.0f, 1.0f, 3.0f, + 3.0f, 2.0f, 3.0f, 3.0f, + // depth: 4 + 1.0f, 3.0f, 2.0f, 1.0f, + 1.0f, 3.0f, 3.0f, 1.0f, + 2.0f, 1.0f, 1.0f, 3.0f, + 3.0f, 2.0f, 3.0f, 3.0f}; + + const Shape filter_shape{2, 1, 3, 3, 3}; + const std::vector filters{ + // -- filter 1 -- + // depth: 1 + 1.0f, 2.0f, 3.0f, + 0.0f, 1.0f, 0.0f, + 2.0f, 1.0f, 2.0f, + // depth: 2 + 1.0f, 2.0f, 3.0f, + 0.0f, 1.0f, 0.0f, + 2.0f, 1.0f, 2.0f, + // depth: 3 + 1.0f, 2.0f, 3.0f, + 0.0f, 1.0f, 0.0f, + 2.0f, 1.0f, 2.0f, + // -- filter 2 -- + // depth: 1 + 1.0f, 2.0f, 3.0f, + 0.0f, 1.0f, 0.0f, + 2.0f, 1.0f, 2.0f, + // depth: 2 + 1.0f, 2.0f, 3.0f, + 0.0f, 1.0f, 0.0f, + 2.0f, 1.0f, 2.0f, + // depth: 3 + 1.0f, 2.0f, 3.0f, + 0.0f, 1.0f, 0.0f, + 2.0f, 1.0f, 2.0f + }; + + const Shape outputs_shape{1, 2, 2, 2, 2}; + const std::vector outputs{ + // -- out 1 -- + // depth: 1 + 69.0f, 66.0f, + 93.0f, 78.0f, + // depth: 2 + 69.0f, 66.0f, + 93.0f, 78.0f, + // -- out 2 -- + // depth: 1 + 69.0f, 66.0f, + 93.0f, 78.0f, + // depth: 2 + 69.0f, 66.0f, + 93.0f, 78.0f}; + + ConvolutionTest(inputs, inputs_shape, filters, filter_shape, outputs, outputs_shape, + strides, padding, dilations); +} + +NGRAPH_TEST(${BACKEND_NAME}, convolution_3D_2batch_1channel) +{ + const Strides strides{1, 1, 1}; + const CoordinateDiff padding{0, 0, 0}; + const Strides dilations{1, 1, 1}; + + const Shape inputs_shape{2, 1, 4, 4, 4}; + const std::vector inputs{ + // -- batch 1 -- + // depth: 1 + 1.0f, 3.0f, 2.0f, 1.0f, + 1.0f, 3.0f, 3.0f, 1.0f, + 2.0f, 1.0f, 1.0f, 3.0f, + 3.0f, 2.0f, 3.0f, 3.0f, + // depth: 2 + 1.0f, 3.0f, 2.0f, 1.0f, + 1.0f, 3.0f, 3.0f, 1.0f, + 2.0f, 1.0f, 1.0f, 3.0f, + 3.0f, 2.0f, 3.0f, 3.0f, + // depth: 3 + 1.0f, 3.0f, 2.0f, 1.0f, + 1.0f, 3.0f, 3.0f, 1.0f, + 2.0f, 1.0f, 1.0f, 3.0f, + 3.0f, 2.0f, 3.0f, 3.0f, + // depth: 4 + 1.0f, 3.0f, 2.0f, 1.0f, + 1.0f, 3.0f, 3.0f, 1.0f, + 2.0f, 1.0f, 1.0f, 3.0f, + 3.0f, 2.0f, 3.0f, 3.0f, + // -- batch 2 -- + // depth: 1 + 1.0f, 3.0f, 2.0f, 1.0f, + 1.0f, 3.0f, 3.0f, 1.0f, + 2.0f, 1.0f, 1.0f, 3.0f, + 3.0f, 2.0f, 3.0f, 3.0f, + // depth: 2 + 1.0f, 3.0f, 2.0f, 1.0f, + 1.0f, 3.0f, 3.0f, 1.0f, + 2.0f, 1.0f, 1.0f, 3.0f, + 3.0f, 2.0f, 3.0f, 3.0f, + // depth: 3 + 1.0f, 3.0f, 2.0f, 1.0f, + 1.0f, 3.0f, 3.0f, 1.0f, + 2.0f, 1.0f, 1.0f, 3.0f, + 3.0f, 2.0f, 3.0f, 3.0f, + // depth: 4 + 1.0f, 3.0f, 2.0f, 1.0f, + 1.0f, 3.0f, 3.0f, 1.0f, + 2.0f, 1.0f, 1.0f, 3.0f, + 3.0f, 2.0f, 3.0f, 3.0f}; + + const Shape filter_shape{1, 1, 3, 3, 3}; + const std::vector filters{ + // depth: 1 + 1.0f, 2.0f, 3.0f, + 0.0f, 1.0f, 0.0f, + 2.0f, 1.0f, 2.0f, + // depth: 2 + 1.0f, 2.0f, 3.0f, + 0.0f, 1.0f, 0.0f, + 2.0f, 1.0f, 2.0f, + // depth: 3 + 1.0f, 2.0f, 3.0f, + 0.0f, 1.0f, 0.0f, + 2.0f, 1.0f, 2.0f}; + + const Shape outputs_shape{2, 1, 2, 2, 2}; + const std::vector outputs{ + // -- batch 1 -- + // depth: 1 + 69.0f, 66.0f, + 93.0f, 78.0f, + // depth: 2 + 69.0f, 66.0f, + 93.0f, 78.0f, + // -- batch 2 -- + // depth: 1 + 69.0f, 66.0f, + 93.0f, 78.0f, + // depth: 2 + 69.0f, 66.0f, + 93.0f, 78.0f}; + + ConvolutionTest(inputs, inputs_shape, filters, filter_shape, outputs, outputs_shape, + strides, padding, dilations); +} +// ---------------------- other tests ------------------------------------------ +// clang-format on NGRAPH_TEST(${BACKEND_NAME}, convolution_outlining) { Shape shape_a{1, 2, 2, 2}; diff --git a/ngraph/test/backend/gather_elements.in.cpp b/ngraph/test/backend/gather_elements.in.cpp index affb59860926bb..5c9544965e975e 100644 --- a/ngraph/test/backend/gather_elements.in.cpp +++ b/ngraph/test/backend/gather_elements.in.cpp @@ -368,15 +368,15 @@ NGRAPH_TEST(${BACKEND_NAME}, evaluate_2D_gather_elements_2x2x1_data_float32) test_case.add_input(data); test_case.add_input(indices); - test_case.add_expected_output(vector{5, - 4, - 1, - 4, - - 1, - 4, - 5, - 4}); + test_case.add_expected_output({5, + 4, + 1, + 4, + + 1, + 4, + 5, + 4}); // clang-format on test_case.run(); } diff --git a/ngraph/test/backend/group_convolution.in.cpp b/ngraph/test/backend/group_convolution.in.cpp index d371326c80ce88..848709abe9fa58 100644 --- a/ngraph/test/backend/group_convolution.in.cpp +++ b/ngraph/test/backend/group_convolution.in.cpp @@ -34,6 +34,131 @@ static string s_manifest = "${MANIFEST}"; using TestEngine = test::ENGINE_CLASS_NAME(${BACKEND_NAME}); +static void GroupConvolutionTest(const std::vector& inputs, + const Shape inputs_shape, + const std::vector& filters, + const Shape filter_shape, + const std::vector& outputs, + const Shape outputs_shape, + const Strides& strides, + const CoordinateDiff& padding, + const Strides& dilations) +{ + const CoordinateDiff pads_begin{padding}; + const CoordinateDiff pads_end{padding}; + const op::PadType auto_pad{op::PadType::EXPLICIT}; + + auto inputs_param = make_shared(element::f32, inputs_shape); + auto filters_param = make_shared(element::f32, filter_shape); + auto conv = make_shared( + inputs_param, filters_param, strides, pads_begin, pads_end, dilations, auto_pad); + auto f = make_shared(conv, ParameterVector{inputs_param, filters_param}); + + auto test_case = test::TestCase(f); + test_case.add_input(inputs); + test_case.add_input(filters); + test_case.add_expected_output(outputs_shape, outputs); + test_case.run(); +} + +// --------------------- 1D group convolution ------------------------------------------ +// clang-format off +NGRAPH_TEST(${BACKEND_NAME}, group_convolution_1D_1group_1batch_1channel) +{ + const Strides strides{1}; + const CoordinateDiff padding{0}; + const Strides dilations{1}; + + const Shape inputs_shape{1, 1, 6}; + const std::vector inputs{1.0f, 3.0f, 3.0f, 0.0f, 1.0f, 2.0f}; + + const Shape filter_shape{1, 1, 1, 3}; + const std::vector filters{2.0f, 0.0f, 1.0f}; + + const Shape outputs_shape{1, 1, 4}; + const std::vector outputs{5.0f, 6.0f, 7.0f, 2.0f}; + + GroupConvolutionTest(inputs, inputs_shape, filters, filter_shape, outputs, outputs_shape, + strides, padding, dilations); +} + +NGRAPH_TEST(${BACKEND_NAME}, group_convolution_1D_2group_1batch_2channel) +{ + const Strides strides{1}; + const CoordinateDiff padding{0}; + const Strides dilations{1}; + + const Shape inputs_shape{1, 2, 6}; + const std::vector inputs{1.0f, 3.0f, 3.0f, 0.0f, 1.0f, 2.0f, + 1.0f, 3.0f, 3.0f, 0.0f, 1.0f, 2.0f}; + + const Shape filter_shape{2, 1, 1, 3}; + const std::vector filters{1.0f, 0.0f, 3.0f, + 3.0f, 0.0f, 1.0f}; + + const Shape outputs_shape{1, 2, 4}; + const std::vector outputs{10.0f, 3.0f, 6.0f, 6.0f, + 6.0f, 9.0f, 10.0f, 2.0f}; + + GroupConvolutionTest(inputs, inputs_shape, filters, filter_shape, outputs, outputs_shape, + strides, padding, dilations); +} + +NGRAPH_TEST(${BACKEND_NAME}, group_convolution_1D_2group_1batch_2_filters_2channel) +{ + const Strides strides{1}; + const CoordinateDiff padding{0}; + const Strides dilations{1}; + + const Shape inputs_shape{1, 2, 6}; + const std::vector inputs{1.0f, 3.0f, 3.0f, 0.0f, 1.0f, 2.0f, + -1.0f, -3.0f, -3.0f, 0.0f, 1.0f, 2.0f}; + + const Shape filter_shape{2, 2, 1, 3}; + const std::vector filters{1.0f, 0.0f, 3.0f, + 3.0f, 0.0f, 1.0f, + -3.0f, 0.0f, 1.0f, + 3.0f, 2.0f, -1.0f}; + + const Shape outputs_shape{1, 4, 4}; + const std::vector outputs{10.0f, 3.0f, 6.0f, 6.0f, + 6.0f, 9.0f, 10.0f, 2.0f, + 0.0f, 9.0f, 10.0f, 2.0f, + -6.0f, -15.0f, -10.0f, 0.0f}; + + GroupConvolutionTest(inputs, inputs_shape, filters, filter_shape, outputs, outputs_shape, + strides, padding, dilations); +} + +NGRAPH_TEST(${BACKEND_NAME}, group_convolution_1D_2group_2batch_2channel) +{ + const Strides strides{1}; + const CoordinateDiff padding{0}; + const Strides dilations{1}; + + const Shape inputs_shape{2, 2, 6}; + const std::vector inputs{// -- batch 1 -- + 1.0f, 3.0f, 3.0f, 0.0f, 1.0f, 2.0f, + 1.0f, 3.0f, 3.0f, 0.0f, 1.0f, 2.0f, + // -- batch 2 -- + 1.0f, 3.0f, 3.0f, 0.0f, 1.0f, 2.0f, + 1.0f, 3.0f, 3.0f, 0.0f, 1.0f, 2.0f}; + + const Shape filter_shape{2, 1, 1, 3}; + const std::vector filters{1.0f, 0.0f, 3.0f, + 3.0f, 0.0f, 1.0f}; + + const Shape outputs_shape{2, 2, 4}; + const std::vector outputs{10.0f, 3.0f, 6.0f, 6.0f, + 6.0f, 9.0f, 10.0f, 2.0f, + 10.0f, 3.0f, 6.0f, 6.0f, + 6.0f, 9.0f, 10.0f, 2.0f}; + + GroupConvolutionTest(inputs, inputs_shape, filters, filter_shape, outputs, outputs_shape, + strides, padding, dilations); +} +// // clang-format on + NGRAPH_TEST(${BACKEND_NAME}, dyn_group_convolution_backprop_data) { Shape shape_filter{6, 1, 3, 3}; diff --git a/ngraph/test/build_graph.cpp b/ngraph/test/build_graph.cpp index 1279735c806d63..1cd93e0511236c 100644 --- a/ngraph/test/build_graph.cpp +++ b/ngraph/test/build_graph.cpp @@ -22,6 +22,7 @@ #include "util/test_tools.hpp" #include +#include NGRAPH_SUPPRESS_DEPRECATED_START @@ -108,7 +109,7 @@ TEST(build_graph, function_undeclared_parameters) } catch (const ngraph_error& error) { - EXPECT_EQ(error.what(), std::string("Function references undeclared parameter")); + EXPECT_HAS_SUBSTRING(error.what(), std::string("Function references undeclared parameter")); } catch (...) { @@ -363,3 +364,91 @@ TEST(build_graph, build_graph_with_remove_result) nodes = f->get_ops(); EXPECT_EQ(nodes.size(), 5); } + +TEST(build_graph, build_graph_with_add_parameter) +{ + auto arg = make_shared(element::f32, Shape{2, 4}); + auto arg2 = make_shared(element::f32, Shape{2, 2}); + auto init_const = op::Constant::create(element::f32, Shape{2, 2}, {0, 0, 0, 0}); + auto read = make_shared(init_const, "v0"); + std::vector> args = {arg, read}; + auto pattern = make_shared(args, 1); + auto res = make_shared(pattern); + const auto axis = op::Constant::create(element::i64, Shape{}, {1}); + auto crop = make_shared(pattern, axis, 3); + auto res2 = make_shared(crop, "v0"); + + auto f = make_shared(ResultVector({res, res2}), ParameterVector{arg}); + + NodeVector nodes = f->get_ops(); + EXPECT_EQ(nodes.size(), 8); + ParameterVector params = f->get_parameters(); + EXPECT_EQ(params.size(), 1); + + pattern->input(1).replace_source_output(arg2->output(0)); + + f->add_parameters(ParameterVector({arg2})); + params = f->get_parameters(); + EXPECT_EQ(params.size(), 2); + EXPECT_EQ(params[1], arg2); + nodes = f->get_ops(); + EXPECT_EQ(nodes.size(), 7); +} + +TEST(build_graph, build_graph_with_remove_parameter) +{ + auto arg = make_shared(element::f32, Shape{2, 4}); + auto arg2 = make_shared(element::f32, Shape{2, 2}); + auto init_const = op::Constant::create(element::f32, Shape{2, 2}, {0, 0, 0, 0}); + auto read = make_shared(init_const, "v0"); + std::vector> args = {arg, arg2}; + auto pattern = make_shared(args, 1); + auto res = make_shared(pattern); + const auto axis = op::Constant::create(element::i64, Shape{}, {1}); + auto crop = make_shared(pattern, axis, 3); + auto res2 = make_shared(crop, "v0"); + + auto f = make_shared(ResultVector({res, res2}), ParameterVector{arg, arg2}); + + NodeVector nodes = f->get_ops(); + EXPECT_EQ(nodes.size(), 7); + ParameterVector params = f->get_parameters(); + EXPECT_EQ(params.size(), 2); + + pattern->input(1).replace_source_output(read->output(0)); + f->remove_parameter(arg2); + params = f->get_parameters(); + EXPECT_EQ(params.size(), 1); + nodes = f->get_ops(); + EXPECT_EQ(nodes.size(), 8); +} + +TEST(build_graph, build_graph_with_remove_parameter_indexing) +{ + auto arg = make_shared(element::f32, Shape{2, 4}); + auto arg2 = make_shared(element::f32, Shape{2, 2}); + auto init_const = op::Constant::create(element::f32, Shape{2, 2}, {0, 0, 0, 0}); + auto read = make_shared(init_const, "v0"); + std::vector> args = {arg2, arg}; + auto pattern = make_shared(args, 1); + auto res = make_shared(pattern); + const auto axis = op::Constant::create(element::i64, Shape{}, {1}); + auto crop = make_shared(pattern, axis, 3); + auto res2 = make_shared(crop, "v0"); + + auto f = make_shared(ResultVector({res, res2}), ParameterVector{arg2, arg}); + + NodeVector nodes = f->get_ops(); + EXPECT_EQ(nodes.size(), 7); + ParameterVector params = f->get_parameters(); + EXPECT_EQ(params.size(), 2); + + pattern->input(0).replace_source_output(read->output(0)); + f->remove_parameter(arg2); + params = f->get_parameters(); + EXPECT_EQ(params.size(), 1); + nodes = f->get_ops(); + EXPECT_EQ(nodes.size(), 8); + + f->validate_nodes_and_infer_types(); +} \ No newline at end of file diff --git a/ngraph/test/constant_folding.cpp b/ngraph/test/constant_folding.cpp index 8e8b3cb0e4073c..d7c26acf9f3cd7 100644 --- a/ngraph/test/constant_folding.cpp +++ b/ngraph/test/constant_folding.cpp @@ -1996,8 +1996,6 @@ TEST(constant_folding, constant_dyn_reshape_shape_not_originally_constant) dyn_reshape->set_friendly_name("test"); auto f = make_shared(dyn_reshape, ParameterVector{}); - ASSERT_TRUE(dyn_reshape->get_output_partial_shape(0).is_dynamic()); - pass::Manager pass_manager; pass_manager.register_pass(); pass_manager.run_passes(f); diff --git a/ngraph/test/models/onnx/mvn_v6.prototxt b/ngraph/test/models/onnx/mvn_v6.prototxt new file mode 100644 index 00000000000000..8dc05acc8d4910 --- /dev/null +++ b/ngraph/test/models/onnx/mvn_v6.prototxt @@ -0,0 +1,57 @@ +ir_version: 4 +producer_name: "backend-test" +graph { + node { + input: "X" + output: "Y" + op_type: "MeanVarianceNormalization" + } + name: "test_mvn" + input { + name: "X" + type { + tensor_type { + elem_type: 1 + shape { + dim { + dim_value: 3 + } + dim { + dim_value: 3 + } + dim { + dim_value: 3 + } + dim { + dim_value: 1 + } + } + } + } + } + output { + name: "Y" + type { + tensor_type { + elem_type: 1 + shape { + dim { + dim_value: 3 + } + dim { + dim_value: 3 + } + dim { + dim_value: 3 + } + dim { + dim_value: 1 + } + } + } + } + } +} +opset_import { + version: 10 +} diff --git a/ngraph/test/models/onnx/tensor_names.prototxt b/ngraph/test/models/onnx/tensor_names.prototxt new file mode 100644 index 00000000000000..184674b3bf04d1 --- /dev/null +++ b/ngraph/test/models/onnx/tensor_names.prototxt @@ -0,0 +1,58 @@ +ir_version: 7 +producer_name: "test_model" +graph { + node { + input: "input" + output: "relu_t" + op_type: "Relu" + name: "relu" + } + node { + input: "relu_t" + output: "final_output" + name: "ident" + op_type: "Identity" + } + name: "test_model" + input { + name: "input" + type { + tensor_type { + elem_type: 1 + shape { + dim { + dim_value: 1 + } + dim { + dim_value: 50 + } + dim { + dim_value: 50 + } + } + } + } + } + output { + name: "final_output" + type { + tensor_type { + elem_type: 1 + shape { + dim { + dim_value: 1 + } + dim { + dim_value: 50 + } + dim { + dim_value: 50 + } + } + } + } + } +} +opset_import { + version: 13 +} diff --git a/ngraph/test/onnx/onnx_import.in.cpp b/ngraph/test/onnx/onnx_import.in.cpp index d832a127b9bd3b..475a1bba3328eb 100644 --- a/ngraph/test/onnx/onnx_import.in.cpp +++ b/ngraph/test/onnx/onnx_import.in.cpp @@ -116,8 +116,12 @@ NGRAPH_TEST(${BACKEND_NAME}, onnx_node_names_check) [](std::shared_ptr op) { return std::string(op->get_type_name()) == "Add"; }); EXPECT_EQ(additions.size(), 2); - EXPECT_EQ(additions.at(0)->get_friendly_name(), "X"); - EXPECT_EQ(additions.at(1)->get_friendly_name(), "Y"); + EXPECT_EQ(additions.at(0)->get_friendly_name(), "add_node1"); + EXPECT_EQ(additions.at(0)->get_output_tensor(0).get_names(), + std::unordered_set{"X"}); + EXPECT_EQ(additions.at(1)->get_friendly_name(), "add_node2"); + EXPECT_EQ(additions.at(1)->get_output_tensor(0).get_names(), + std::unordered_set{"Y"}); } NGRAPH_TEST(${BACKEND_NAME}, onnx_model_add_abc) @@ -3946,3 +3950,23 @@ NGRAPH_TEST(${BACKEND_NAME}, onnx_clip_inbounds) test_case.add_expected_output(Shape{data.size()}, data); test_case.run(); } + +NGRAPH_TEST(${BACKEND_NAME}, onnx_mvn_v6) +{ + auto function = onnx_import::import_onnx_model( + file_util::path_join(SERIALIZED_ZOO, "onnx/mvn_v6.prototxt")); + + auto test_case = test::TestCase(function); + test_case.add_input( + {0.8439683, 0.5665144, 0.05836735, 0.02916367, 0.12964272, 0.5060197, 0.79538304, + 0.9411346, 0.9546573, 0.17730942, 0.46192095, 0.26480448, 0.6746842, 0.01665257, + 0.62473077, 0.9240844, 0.9722341, 0.11965699, 0.41356155, 0.9129373, 0.59330076, + 0.81929934, 0.7862604, 0.11799799, 0.69248444, 0.54119414, 0.07513223}); + test_case.add_expected_output( + Shape{3, 3, 3, 1}, + {1.3546423, 0.33053496, -1.5450814, -1.2106764, -0.8925952, 0.29888135, 0.38083088, + 0.81808794, 0.85865635, -1.1060555, -0.05552877, -0.78310335, 0.83281356, -1.250282, + 0.67467856, 0.7669372, 0.9113869, -1.6463585, -0.23402764, 1.6092131, 0.42940593, + 1.2906139, 1.1860244, -0.92945826, 0.0721334, -0.38174, -1.7799333}); + test_case.run(); +} diff --git a/ngraph/test/onnx/onnx_import_controlflow.in.cpp b/ngraph/test/onnx/onnx_import_controlflow.in.cpp index b2b5a403288d0c..4235501b5d3c04 100644 --- a/ngraph/test/onnx/onnx_import_controlflow.in.cpp +++ b/ngraph/test/onnx/onnx_import_controlflow.in.cpp @@ -335,12 +335,12 @@ NGRAPH_TEST(${BACKEND_NAME}, onnx_controlflow_loop_the_proper_opset_in_subgraph) return std::string{op->get_type_name()} == "Loop"; }); const auto body_ops = - ngraph::as_type_ptr(*loop_node_it)->get_function()->get_ops(); + ngraph::as_type_ptr(*loop_node_it)->get_function()->get_ops(); const auto body_mul_node_it = std::find_if(body_ops.begin(), body_ops.end(), [](const std::shared_ptr& op) { return std::string{op->get_type_name()} == "Multiply"; }); - const auto body_mul_node = ngraph::as_type_ptr(*body_mul_node_it); + const auto body_mul_node = ngraph::as_type_ptr(*body_mul_node_it); EXPECT_TRUE(body_mul_node); EXPECT_EQ( body_mul_node->get_autob().m_type, diff --git a/ngraph/test/onnx/onnx_tensor_names.cpp b/ngraph/test/onnx/onnx_tensor_names.cpp new file mode 100644 index 00000000000000..6b31a42e7954af --- /dev/null +++ b/ngraph/test/onnx/onnx_tensor_names.cpp @@ -0,0 +1,80 @@ +//***************************************************************************** +// Copyright 2017-2021 Intel Corporation +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. +//***************************************************************************** + +#include "gtest/gtest.h" +#include "ngraph/ngraph.hpp" +#include "onnx_import/onnx.hpp" +#include "onnx_import/onnx_utils.hpp" +#include "util/test_case.hpp" +#include "util/test_control.hpp" + +NGRAPH_SUPPRESS_DEPRECATED_START + +using namespace ngraph; + +static std::string s_manifest = "${MANIFEST}"; + +using Inputs = std::vector>; +using Outputs = std::vector>; + +NGRAPH_TEST(onnx_tensor_names, simple_model) +{ + auto function = onnx_import::import_onnx_model( + file_util::path_join(SERIALIZED_ZOO, "onnx/tensor_names.prototxt")); + + auto ops = function->get_ordered_ops(); + ASSERT_EQ(ops[0]->get_friendly_name(), "input"); + ASSERT_EQ(ops[0]->get_output_tensor(0).get_names(), std::unordered_set{"input"}); + ASSERT_EQ(ops[1]->get_friendly_name(), "relu"); + ASSERT_EQ(ops[1]->get_output_tensor(0).get_names(), std::unordered_set{"relu_t"}); + // ops[2] is a constant created in the ONNX importer as part of Identity operator + ASSERT_EQ(ops[3]->get_friendly_name(), "ident"); + ASSERT_EQ(ops[3]->get_output_tensor(0).get_names(), + std::unordered_set{"final_output"}); + ASSERT_EQ(ops[4]->get_friendly_name(), "final_output"); + + ASSERT_EQ(function->get_result()->get_input_tensor(0).get_names(), + std::unordered_set{"final_output"}); + ASSERT_EQ(function->get_result()->input_value(0).get_tensor().get_names(), + std::unordered_set{"final_output"}); +} + +NGRAPH_TEST(onnx_tensor_names, node_multiple_outputs) +{ + auto function = + onnx_import::import_onnx_model(file_util::path_join(SERIALIZED_ZOO, "onnx/top_k.prototxt")); + + auto ops = function->get_ordered_ops(); + + ASSERT_EQ(ops[0]->get_friendly_name(), "x"); + ASSERT_EQ(ops[0]->get_output_tensor(0).get_names(), std::unordered_set{"x"}); + // ops[1] is a constant created in the ONNX importer as part of TopK operator(K value) + ASSERT_EQ(ops[2]->get_friendly_name(), "indices"); + ASSERT_EQ(ops[2]->get_output_tensor(0).get_names(), std::unordered_set{"values"}); + ASSERT_EQ(ops[2]->get_output_tensor(1).get_names(), std::unordered_set{"indices"}); + // result nodes are generated in different order than function results. + ASSERT_EQ(ops[3]->get_friendly_name(), "indices"); + ASSERT_EQ(ops[4]->get_friendly_name(), "values"); + + ASSERT_EQ(function->get_results()[0]->get_input_tensor(0).get_names(), + std::unordered_set{"values"}); + ASSERT_EQ(function->get_results()[1]->get_input_tensor(0).get_names(), + std::unordered_set{"indices"}); + ASSERT_EQ(function->get_results()[0]->input_value(0).get_tensor().get_names(), + std::unordered_set{"values"}); + ASSERT_EQ(function->get_results()[1]->input_value(0).get_tensor().get_names(), + std::unordered_set{"indices"}); +} diff --git a/ngraph/test/pattern.cpp b/ngraph/test/pattern.cpp index 5d3772069a7ea3..733078815274a8 100644 --- a/ngraph/test/pattern.cpp +++ b/ngraph/test/pattern.cpp @@ -810,7 +810,7 @@ TEST(pattern, is_contained_match) ASSERT_FALSE(n.is_contained_match()); } -TEST(pattern, wrap_type) +TEST(pattern, wrap_type_single_op) { auto a = make_shared(element::f32, Shape{1, 3, 64, 64}); auto b = make_shared(a); @@ -852,3 +852,47 @@ TEST(pattern, wrap_type) ASSERT_TRUE(matcher->match(static_pointer_cast(mul2))); } } + +TEST(pattern, wrap_type_multi_op) +{ + auto a = make_shared(element::f32, Shape{1, 3, 64, 64}); + auto b = make_shared(a); + auto c = make_shared(a); + auto mul = make_shared(a, op::Constant::create(element::f32, Shape{}, {1})); + auto add = make_shared(op::Constant::create(element::f32, Shape{}, {1}), a); + + { + auto m = pattern::wrap_type(); + auto matcher = std::make_shared(m, "MulAddMatcher"); + ASSERT_TRUE(matcher->match(mul->output(0))); + ASSERT_EQ(matcher->get_matched_nodes().size(), 1); + ASSERT_EQ(matcher->get_matched_nodes()[0], mul); + ASSERT_EQ(matcher->get_pattern_map().count(m), 1); + + ASSERT_TRUE(matcher->match(add->output(0))); + ASSERT_EQ(matcher->get_matched_nodes().size(), 1); + ASSERT_EQ(matcher->get_matched_nodes()[0], add); + ASSERT_EQ(matcher->get_pattern_map().count(m), 1); + + ASSERT_FALSE(matcher->match(static_pointer_cast(a))); + ASSERT_FALSE(matcher->match(static_pointer_cast(b))); + ASSERT_FALSE(matcher->match(static_pointer_cast(c))); + } + { + auto m = pattern::wrap_type(); + auto matcher = std::make_shared(m, "ElementwiseMatcher"); + ASSERT_TRUE(matcher->match(mul->output(0))); + ASSERT_EQ(matcher->get_matched_nodes().size(), 1); + ASSERT_EQ(matcher->get_matched_nodes()[0], mul); + ASSERT_EQ(matcher->get_pattern_map().count(m), 1); + + ASSERT_TRUE(matcher->match(add->output(0))); + ASSERT_EQ(matcher->get_matched_nodes().size(), 1); + ASSERT_EQ(matcher->get_matched_nodes()[0], add); + ASSERT_EQ(matcher->get_pattern_map().count(m), 1); + + ASSERT_FALSE(matcher->match(static_pointer_cast(a))); + ASSERT_FALSE(matcher->match(static_pointer_cast(b))); + ASSERT_FALSE(matcher->match(static_pointer_cast(c))); + } +} diff --git a/ngraph/test/runtime/dynamic/dynamic_backend.cpp b/ngraph/test/runtime/dynamic/dynamic_backend.cpp index aec5dcb1a96fdb..d91f82091225ef 100644 --- a/ngraph/test/runtime/dynamic/dynamic_backend.cpp +++ b/ngraph/test/runtime/dynamic/dynamic_backend.cpp @@ -279,9 +279,7 @@ bool runtime::dynamic::DynamicExecutable::call( num_dyn_nodes_last_pass = num_dyn_nodes_this_pass; } - pass::Manager pass_val; - pass_val.register_pass(); - pass_val.run_passes(clone); + clone->validate_nodes_and_infer_types(); std::vector> wrapped_outputs; diff --git a/ngraph/test/runtime/ie/unit_test.manifest b/ngraph/test/runtime/ie/unit_test.manifest index adffca1acf21d6..fe1b362af2d91e 100644 --- a/ngraph/test/runtime/ie/unit_test.manifest +++ b/ngraph/test/runtime/ie/unit_test.manifest @@ -1176,9 +1176,6 @@ IE_CPU.onnx_model_nonmaxsuppression_center_point_box_format IE_CPU.onnx_model_nonmaxsuppression_single_box IE_CPU.nonmaxsuppression_suppress_by_IOU_and_scores_without_constants -# Bug in CPU plugin for ROIPooling when pooled size is 1x1 and method is bilinear -IE_CPU.roi_pooling_1x1_bilinear - # Unsupported dynamic op IE_CPU.range_v4_trunc_inputs IE_CPU.onnx_model_reduce_sum_13_axes_as_input @@ -1562,25 +1559,14 @@ onnx_controlflow_loop_infinite onnx_dyn_shapes_reduce_max_dynamic_input_rank_negative_axis IE_GPU.range_v4_trunc_inputs -# not implemented yet on CPU and GPU plugins -IE_CPU.evaluate_1D_gather_elements_3_indices_int32 -IE_CPU.evaluate_2D_gather_elements_2x2_indices_int32_axis_0 -IE_CPU.evaluate_2D_gather_elements_2x2_indices_int32_axis_1 -IE_CPU.evaluate_2D_gather_elements_2x2_indices_int32_axis_minus_1 -IE_CPU.evaluate_2D_gather_elements_2x3_indices_int32 -IE_CPU.evaluate_3D_gather_elements_3x2x2_indices_int32 -IE_CPU.evaluate_3D_gather_elements_3x2x2_indices_int64 +# CPU plugin does not support bool type IE_CPU.evaluate_2D_gather_elements_3x2_data_bool -IE_CPU.evaluate_2D_gather_elements_2x3_data_float32 + +# CPU plugin does not validate whether indices values are out of range IE_CPU.evaluate_1D_gather_elements_negative_test IE_CPU.evaluate_2D_gather_elements_negative_test -IE_CPU.evaluate_2D_gather_elements_2x2x1_data_float32 -IE_CPU.evaluate_4D_gather_elements_3x2x2x2_indices_int64 -IE_CPU.onnx_model_gather_elements_float_1D -IE_CPU.onnx_model_gather_elements_float_negative_axis -IE_CPU.onnx_model_gather_elements_int32_axis_0 -IE_CPU.onnx_model_gather_elements_int8_axis_1 -IE_CPU.onnx_model_gather_elements_float_3D_axis_2 + +# not implemented yet on GPU plugin IE_GPU.evaluate_1D_gather_elements_3_indices_int32 IE_GPU.evaluate_2D_gather_elements_2x2_indices_int32_axis_0 IE_GPU.evaluate_2D_gather_elements_2x2_indices_int32_axis_1 @@ -1621,3 +1607,4 @@ evaluate_mvn_6 evaluate_mvn_6_inside_sqrt evaluate_mvn_6_across_chanells evaluate_mvn_6_across_batch +IE_CPU.onnx_mvn_v6 diff --git a/ngraph/test/runtime/interpreter/evaluates_map.cpp b/ngraph/test/runtime/interpreter/evaluates_map.cpp index 4f8802e79e69fd..3f709c5cb9a609 100644 --- a/ngraph/test/runtime/interpreter/evaluates_map.cpp +++ b/ngraph/test/runtime/interpreter/evaluates_map.cpp @@ -26,6 +26,7 @@ #include #include #include +#include #include #include #include @@ -42,6 +43,7 @@ #include #include #include +#include #include #include #include @@ -196,8 +198,6 @@ namespace const auto& out_shape = outputs[0]->get_shape(); const auto& in_shape = inputs[0]->get_shape(); const auto& filter_shape = inputs[1]->get_shape(); - Strides in_dilation(std::vector(in_shape.size() - 2)); - std::fill(in_dilation.begin(), in_dilation.end(), 1); runtime::reference::convolution::value_type>( in_data_ptr, filter_data, @@ -208,8 +208,7 @@ namespace op->get_strides(), op->get_dilations(), op->get_pads_begin(), - op->get_pads_end(), - in_dilation); + op->get_pads_end()); return true; } @@ -241,6 +240,30 @@ namespace return true; } + template + bool evaluate(const shared_ptr& op, + const HostTensorVector& outputs, + const HostTensorVector& inputs) + { + const auto filter_data = inputs[1]->get_data_ptr(); + auto out_data_ptr = outputs[0]->get_data_ptr(); + const auto in_data_ptr = inputs[0]->get_data_ptr(); + const auto& out_shape = outputs[0]->get_shape(); + const auto& in_shape = inputs[0]->get_shape(); + const auto& filter_shape = inputs[1]->get_shape(); + runtime::reference::group_convolution::value_type>( + in_data_ptr, + filter_data, + out_data_ptr, + in_shape, + filter_shape, + out_shape, + op->get_strides(), + op->get_dilations(), + op->get_pads_begin(), + op->get_pads_end()); + return true; + } namespace cum_sum_v0 { template diff --git a/ngraph/test/runtime/interpreter/int_executable.cpp b/ngraph/test/runtime/interpreter/int_executable.cpp index 405d839cc62cf6..0b61b9946a5266 100644 --- a/ngraph/test/runtime/interpreter/int_executable.cpp +++ b/ngraph/test/runtime/interpreter/int_executable.cpp @@ -72,38 +72,6 @@ runtime::interpreter::INTExecutable::INTExecutable(const shared_ptr& f auto concat = std::make_shared(convs, 1); replace_node(node, concat); } - else if (is_type(node)) - { - auto gr_conv = dynamic_pointer_cast(node); - auto num_groups = gr_conv->input_value(1).get_shape()[0]; - auto split_filter_axis = std::make_shared( - ngraph::element::Type_t::i64, ngraph::Shape{}, std::vector{0}); - auto sliced_filter = std::make_shared( - gr_conv->input_value(1), split_filter_axis, num_groups); - auto split_data_axis = std::make_shared( - ngraph::element::Type_t::i64, ngraph::Shape{}, std::vector{1}); - auto sliced_data = std::make_shared( - gr_conv->input_value(0), split_data_axis, num_groups); - - NodeVector convs; - auto squeeze_filter_axis = std::make_shared( - ngraph::element::Type_t::i64, ngraph::Shape{}, std::vector{0}); - for (size_t i = 0; i < num_groups; ++i) - { - auto squeezed_filter = std::make_shared(sliced_filter->output(i), - squeeze_filter_axis); - auto conv = std::make_shared(sliced_data->output(i), - squeezed_filter, - gr_conv->get_strides(), - gr_conv->get_pads_begin(), - gr_conv->get_pads_end(), - gr_conv->get_dilations(), - gr_conv->get_auto_pad()); - convs.push_back(conv); - } - auto concat = std::make_shared(convs, 1); - replace_node(node, concat); - } } for (auto node : m_function->get_ordered_ops()) { diff --git a/ngraph/test/runtime/interpreter/opset_int_tbl.hpp b/ngraph/test/runtime/interpreter/opset_int_tbl.hpp index 04d5a36af2fc68..93f473eef8abf9 100644 --- a/ngraph/test/runtime/interpreter/opset_int_tbl.hpp +++ b/ngraph/test/runtime/interpreter/opset_int_tbl.hpp @@ -51,6 +51,7 @@ NGRAPH_OP(AvgPool, op::v1) NGRAPH_OP(ConvertLike, op::v1) NGRAPH_OP(Convolution, ngraph::op::v1) NGRAPH_OP(ConvolutionBackpropData, ngraph::op::v1) +NGRAPH_OP(GroupConvolution, ngraph::op::v1) NGRAPH_OP(LessEqual, op::v1) NGRAPH_OP(LogicalAnd, op::v1) NGRAPH_OP(LogicalOr, op::v1) diff --git a/ngraph/test/tensor.cpp b/ngraph/test/tensor.cpp index 216831dc0de8ce..be9cc26ab1f180 100644 --- a/ngraph/test/tensor.cpp +++ b/ngraph/test/tensor.cpp @@ -23,6 +23,7 @@ #include "gtest/gtest.h" #include "ngraph/function.hpp" #include "ngraph/ngraph.hpp" +#include "ngraph/opsets/opset6.hpp" #include "ngraph/pass/manager.hpp" #include "pass/liveness.hpp" #include "util/test_tools.hpp" @@ -91,3 +92,23 @@ TEST(tensor, output_flag) EXPECT_TRUE(op::is_output(f0->get_output_op(i))); } } + +TEST(tensor, tensor_names) +{ + auto arg0 = make_shared(element::f32, Shape{1}); + arg0->set_friendly_name("data"); + arg0->get_output_tensor(0).set_names({"input"}); + + auto relu = make_shared(arg0); + relu->set_friendly_name("relu"); + relu->get_output_tensor(0).set_names({"relu_t", "identity"}); + auto f0 = make_shared(relu, ParameterVector{arg0}); + + ASSERT_EQ(arg0->get_output_tensor(0).get_names(), relu->get_input_tensor(0).get_names()); + ASSERT_EQ(arg0->get_output_tensor(0).get_names(), + relu->input_value(0).get_tensor().get_names()); + ASSERT_EQ(f0->get_result()->get_input_tensor(0).get_names(), + relu->get_output_tensor(0).get_names()); + ASSERT_EQ(f0->get_result()->input_value(0).get_tensor().get_names(), + relu->get_output_tensor(0).get_names()); +} diff --git a/ngraph/test/type_prop/broadcast.cpp b/ngraph/test/type_prop/broadcast.cpp index 75c949127a1dc7..de5a784ffa26ef 100644 --- a/ngraph/test/type_prop/broadcast.cpp +++ b/ngraph/test/type_prop/broadcast.cpp @@ -90,8 +90,7 @@ TYPED_TEST_P(BroadcastTests, broadcast_target_shape_as_concat_with_node) ASSERT_TRUE(bc->get_output_partial_shape(0).rank().is_static()); ASSERT_TRUE(bc->get_output_partial_shape(0).rank().same_scheme(Rank{4})); ASSERT_TRUE(bc->get_output_partial_shape(0).is_dynamic()); - ASSERT_TRUE(bc->get_output_partial_shape(0).same_scheme( - PartialShape{Dimension::dynamic(), 16, 50, 50})); + ASSERT_EQ(bc->get_output_partial_shape(0), PartialShape({Dimension::dynamic(), 16, 50, 50})); } TYPED_TEST_P(BroadcastTests, broadcast_fail_rank) diff --git a/ngraph/test/type_prop/experimental_detectron_prior_grid_generator.cpp b/ngraph/test/type_prop/experimental_detectron_prior_grid_generator.cpp index e8249658511d13..863b50a1e9ff3c 100644 --- a/ngraph/test/type_prop/experimental_detectron_prior_grid_generator.cpp +++ b/ngraph/test/type_prop/experimental_detectron_prior_grid_generator.cpp @@ -113,8 +113,23 @@ TEST(type_prop, detectron_grid_generator_dynamic_shapes) } } -TEST(type_prop, detectron_grid_generator_dynamic_shapes_intervals) +struct GridGeneratorIntervalsTestParams { + PartialShape priors_shape; + PartialShape feature_map_shape; + PartialShape im_data_shape; + PartialShape ref_out_shape; + bool flatten; +}; + +struct GridGeneratorIntervalsTest : ::testing::TestWithParam +{ +}; + +TEST_P(GridGeneratorIntervalsTest, detectron_grid_generator_dynamic_shapes_intervals_2) +{ + auto params = GetParam(); + Attrs attrs; attrs.flatten = false; attrs.h = 0; @@ -122,57 +137,144 @@ TEST(type_prop, detectron_grid_generator_dynamic_shapes_intervals) attrs.stride_x = 4.0f; attrs.stride_y = 4.0f; - struct ShapesAndAttrs - { - PartialShape priors_shape; - PartialShape feature_map_shape; - PartialShape ref_out_shape; - bool flatten; - }; - - const Shape im_data_shape = Shape{1, 3, 800, 1344}; - - std::vector shapes = { - {{3, 4}, {1, 256, 200, Dimension(0, 100)}, {Dimension(0, 60000), 4}, true}, - {{3, 4}, {1, 256, Dimension(0, 150), 336}, {Dimension(0, 151200), 4}, true}, - {{3, 4}, {1, 256, Dimension(0, 150), Dimension(0, 100)}, {Dimension(0, 45000), 4}, true}, - {{Dimension(0, 3), 4}, {1, 256, 200, Dimension(0, 150)}, {Dimension(0, 90000), 4}, true}, - {{Dimension(0, 3), 4}, {1, 256, Dimension(0, 150), 336}, {Dimension(0, 151200), 4}, true}, - {{Dimension(0, 3), 4}, - {1, 256, Dimension(0, 150), Dimension(0, 100)}, - {Dimension(0, 45000), 4}, - true}, - {{3, 4}, {1, 256, 200, Dimension(0, 100)}, {200, Dimension(0, 100), 3, 4}, false}, - {{3, 4}, {1, 256, Dimension(0, 150), 336}, {Dimension(0, 150), 336, 3, 4}, false}, - {{3, 4}, - {1, 256, Dimension(0, 150), Dimension(0, 100)}, - {Dimension(0, 150), Dimension(0, 100), 3, 4}, - false}, - {{Dimension(0, 3), 4}, - {1, 256, 200, Dimension(0, 100)}, - {200, Dimension(0, 100), Dimension(0, 3), 4}, - false}, - {{Dimension(0, 3), 4}, - {1, 256, Dimension(0, 150), 336}, - {Dimension(0, 150), 336, Dimension(0, 3), 4}, - false}, - {{Dimension(0, 3), 4}, - {1, 256, Dimension(0, 150), Dimension(0, 100)}, - {Dimension(0, 150), Dimension(0, 100), Dimension(0, 3), 4}, - false}}; + auto grid_attrs = attrs; + grid_attrs.flatten = params.flatten; - for (const auto& s : shapes) - { - auto grid_attrs = attrs; - grid_attrs.flatten = s.flatten; + auto priors = std::make_shared(element::f32, params.priors_shape); + auto feature_map = std::make_shared(element::f32, params.feature_map_shape); + auto im_data = std::make_shared(element::f32, params.im_data_shape); - auto priors = std::make_shared(element::f32, s.priors_shape); - auto feature_map = std::make_shared(element::f32, s.feature_map_shape); - auto im_data = std::make_shared(element::f32, im_data_shape); + auto grid_gen = std::make_shared(priors, feature_map, im_data, grid_attrs); - auto grid_gen = std::make_shared(priors, feature_map, im_data, grid_attrs); - - ASSERT_EQ(grid_gen->get_output_element_type(0), element::f32); - ASSERT_TRUE(grid_gen->get_output_partial_shape(0).same_scheme(s.ref_out_shape)); - } + ASSERT_EQ(grid_gen->get_output_element_type(0), element::f32); + ASSERT_TRUE(grid_gen->get_output_partial_shape(0).same_scheme(params.ref_out_shape)); } + +INSTANTIATE_TEST_CASE_P( + type_prop, + GridGeneratorIntervalsTest, + ::testing::Values( + GridGeneratorIntervalsTestParams{{3, 4}, + {1, 256, 200, Dimension(0, 100)}, + {Dimension(0, 5), 3, 800, 1344}, + {Dimension(0, 60000), 4}, + true}, + GridGeneratorIntervalsTestParams{{3, 4}, + {Dimension(0, 7), 256, Dimension(0, 150), 336}, + {Dimension(0, 5), 3, 800, 1344}, + {Dimension(0, 151200), 4}, + true}, + GridGeneratorIntervalsTestParams{{3, 4}, + {1, 256, Dimension(0, 150), Dimension(0, 100)}, + {Dimension(0, 11), 3, 800, 1344}, + {Dimension(0, 45000), 4}, + true}, + GridGeneratorIntervalsTestParams{{Dimension(0, 3), 4}, + {1, 256, 200, Dimension(0, 150)}, + {Dimension(0, 5), 3, 800, 1344}, + {Dimension(0, 90000), 4}, + true}, + GridGeneratorIntervalsTestParams{{Dimension(0, 3), 4}, + {Dimension(0, 77), 256, Dimension(0, 150), 336}, + {Dimension(0, 54), 3, 800, 1344}, + {Dimension(0, 151200), 4}, + true}, + GridGeneratorIntervalsTestParams{ + {Dimension(0, 3), 4}, + {Dimension(0, 3), 256, Dimension(0, 150), Dimension(0, 100)}, + {Dimension(0, 54), 3, 800, 1344}, + {Dimension(0, 45000), 4}, + true}, + GridGeneratorIntervalsTestParams{{3, 4}, + {1, 256, 200, Dimension(0, 100)}, + {Dimension(0, 6), 3, 800, 1344}, + {200, Dimension(0, 100), 3, 4}, + false}, + GridGeneratorIntervalsTestParams{{3, 4}, + {Dimension(0, 9), 256, Dimension(0, 150), 336}, + {Dimension(0, 4), 3, 800, 1344}, + {Dimension(0, 150), 336, 3, 4}, + false}, + GridGeneratorIntervalsTestParams{ + {3, 4}, + {Dimension(1, 3), 256, Dimension(0, 150), Dimension(0, 100)}, + {Dimension(0, 4), 3, 800, 1344}, + {Dimension(0, 150), Dimension(0, 100), 3, 4}, + false}, + GridGeneratorIntervalsTestParams{{Dimension(0, 3), 4}, + {Dimension(5, 11), 256, 200, Dimension(0, 100)}, + {Dimension(0, 17), 3, 800, 1344}, + {200, Dimension(0, 100), Dimension(0, 3), 4}, + false}, + GridGeneratorIntervalsTestParams{{Dimension(0, 3), 4}, + {Dimension(7, 9), 256, Dimension(0, 150), 336}, + {Dimension(4, 18), 3, 800, 1344}, + {Dimension(0, 150), 336, Dimension(0, 3), 4}, + false}, + GridGeneratorIntervalsTestParams{ + {Dimension(0, 3), 4}, + {Dimension(0, 8), 256, Dimension(0, 150), Dimension(0, 100)}, + {Dimension(4, 18), 3, 800, 1344}, + {Dimension(0, 150), Dimension(0, 100), Dimension(0, 3), 4}, + false}, + GridGeneratorIntervalsTestParams{{3, 4}, + {1, 256, 200, Dimension(0, 100)}, + Shape{1, 3, 800, 1344}, + {Dimension(0, 60000), 4}, + true}, + GridGeneratorIntervalsTestParams{{3, 4}, + {1, 256, Dimension(0, 150), 336}, + Shape{1, 3, 800, 1344}, + {Dimension(0, 151200), 4}, + true}, + GridGeneratorIntervalsTestParams{{3, 4}, + {1, 256, Dimension(0, 150), Dimension(0, 100)}, + Shape{1, 3, 800, 1344}, + {Dimension(0, 45000), 4}, + true}, + GridGeneratorIntervalsTestParams{{Dimension(0, 3), 4}, + {1, 256, 200, Dimension(0, 150)}, + Shape{1, 3, 800, 1344}, + {Dimension(0, 90000), 4}, + true}, + GridGeneratorIntervalsTestParams{{Dimension(0, 3), 4}, + {1, 256, Dimension(0, 150), 336}, + Shape{1, 3, 800, 1344}, + {Dimension(0, 151200), 4}, + true}, + GridGeneratorIntervalsTestParams{{Dimension(0, 3), 4}, + {1, 256, Dimension(0, 150), Dimension(0, 100)}, + Shape{1, 3, 800, 1344}, + {Dimension(0, 45000), 4}, + true}, + GridGeneratorIntervalsTestParams{{3, 4}, + {1, 256, 200, Dimension(0, 100)}, + Shape{1, 3, 800, 1344}, + {200, Dimension(0, 100), 3, 4}, + false}, + GridGeneratorIntervalsTestParams{{3, 4}, + {1, 256, Dimension(0, 150), 336}, + Shape{1, 3, 800, 1344}, + {Dimension(0, 150), 336, 3, 4}, + false}, + GridGeneratorIntervalsTestParams{{3, 4}, + {1, 256, Dimension(0, 150), Dimension(0, 100)}, + Shape{1, 3, 800, 1344}, + {Dimension(0, 150), Dimension(0, 100), 3, 4}, + false}, + GridGeneratorIntervalsTestParams{{Dimension(0, 3), 4}, + {1, 256, 200, Dimension(0, 100)}, + Shape{1, 3, 800, 1344}, + {200, Dimension(0, 100), Dimension(0, 3), 4}, + false}, + GridGeneratorIntervalsTestParams{{Dimension(0, 3), 4}, + {1, 256, Dimension(0, 150), 336}, + Shape{1, 3, 800, 1344}, + {Dimension(0, 150), 336, Dimension(0, 3), 4}, + false}, + GridGeneratorIntervalsTestParams{{Dimension(0, 3), 4}, + {1, 256, Dimension(0, 150), Dimension(0, 100)}, + Shape{1, 3, 800, 1344}, + {Dimension(0, 150), Dimension(0, 100), Dimension(0, 3), 4}, + false}), + PrintToDummyParamName()); diff --git a/ngraph/test/type_prop/experimental_detectron_roi_feature_extractor.cpp b/ngraph/test/type_prop/experimental_detectron_roi_feature_extractor.cpp index 4bc3f1a34fcc67..b6942a585feddd 100644 --- a/ngraph/test/type_prop/experimental_detectron_roi_feature_extractor.cpp +++ b/ngraph/test/type_prop/experimental_detectron_roi_feature_extractor.cpp @@ -88,84 +88,338 @@ TEST(type_prop, detectron_roi_feature_extractor_dynamic) } } -TEST(type_prop, detectron_roi_feature_extractor_intervals) +struct ROIFeatureIntervalsTestParams { + PartialShape input_shape; + Dimension channels[4]; + Dimension first_dims[4]; +}; + +struct ROIFeatureIntervalsTest : ::testing::TestWithParam +{ +}; + +TEST_P(ROIFeatureIntervalsTest, detectron_roi_feature_extractor_intervals_1) +{ + auto params = GetParam(); + Attrs attrs; attrs.aligned = false; attrs.output_size = 14; attrs.sampling_ratio = 2; attrs.pyramid_scales = {4, 8, 16, 32}; - struct Shapes - { - PartialShape input_shape; - Dimension channels[4]; - }; + auto layer0_channels = params.channels[0]; + auto layer1_channels = params.channels[1]; + auto layer2_channels = params.channels[2]; + auto layer3_channels = params.channels[3]; - const auto dyn_dim = Dimension::dynamic(); + auto layer0_shape = PartialShape{params.first_dims[0], layer0_channels, 200, 336}; + auto layer1_shape = PartialShape{params.first_dims[1], layer1_channels, 100, 168}; + auto layer2_shape = PartialShape{params.first_dims[2], layer2_channels, 50, 84}; + auto layer3_shape = PartialShape{params.first_dims[3], layer3_channels, 25, 42}; - std::vector shapes = { - {{1000, 4}, {Dimension(0, 128), Dimension(0, 256), Dimension(0, 64), Dimension(0, 33)}}, - {{1000, 4}, {Dimension(0, 128), Dimension(0, 256), Dimension(0, 64), Dimension(33)}}, - {{1000, 4}, {Dimension(0, 128), Dimension(0, 256), Dimension(64), Dimension(0, 72)}}, - {{1000, 4}, {Dimension(0, 128), Dimension(0, 256), Dimension(64), Dimension(64)}}, - {{1000, 4}, {Dimension(0, 512), Dimension(256), Dimension(0, 640), Dimension(0, 330)}}, - {{1000, 4}, {Dimension(0, 512), Dimension(256), Dimension(0, 640), Dimension(256)}}, - {{1000, 4}, {Dimension(0, 512), Dimension(256), Dimension(256), Dimension(0, 720)}}, - {{1000, 4}, {Dimension(0, 380), Dimension(256), Dimension(256), Dimension(256)}}, - {{1000, 4}, {Dimension(128), Dimension(0, 256), Dimension(0, 640), Dimension(0, 330)}}, - {{1000, 4}, {Dimension(128), Dimension(0, 256), Dimension(0, 640), Dimension(128)}}, - {{1000, 4}, {Dimension(128), Dimension(0, 256), Dimension(128), Dimension(0, 720)}}, - {{1000, 4}, {Dimension(128), Dimension(0, 256), Dimension(128), Dimension(128)}}, - {{1000, 4}, {Dimension(256), Dimension(256), Dimension(0, 640), Dimension(0, 330)}}, - {{1000, 4}, {Dimension(256), Dimension(256), Dimension(0, 640), Dimension(256)}}, - {{1000, 4}, {Dimension(256), Dimension(256), Dimension(256), Dimension(0, 330)}}, - {{1000, 4}, {Dimension(256), Dimension(256), Dimension(256), Dimension(256)}}, - {{dyn_dim, 4}, {Dimension(0, 128), Dimension(0, 256), Dimension(0, 64), Dimension(0, 33)}}, - {{dyn_dim, 4}, {Dimension(0, 128), Dimension(0, 256), Dimension(0, 64), Dimension(33)}}, - {{dyn_dim, 4}, {Dimension(0, 128), Dimension(0, 256), Dimension(64), Dimension(0, 72)}}, - {{dyn_dim, 4}, {Dimension(0, 128), Dimension(0, 256), Dimension(64), Dimension(64)}}, - {{dyn_dim, 4}, {Dimension(0, 512), Dimension(256), Dimension(0, 640), Dimension(0, 330)}}, - {{dyn_dim, 4}, {Dimension(0, 512), Dimension(256), Dimension(0, 640), Dimension(256)}}, - {{dyn_dim, 4}, {Dimension(0, 512), Dimension(256), Dimension(256), Dimension(0, 720)}}, - {{dyn_dim, 4}, {Dimension(0, 380), Dimension(256), Dimension(256), Dimension(256)}}, - {{dyn_dim, 4}, {Dimension(128), Dimension(0, 256), Dimension(0, 640), Dimension(0, 330)}}, - {{dyn_dim, 4}, {Dimension(128), Dimension(0, 256), Dimension(0, 640), Dimension(128)}}, - {{dyn_dim, 4}, {Dimension(128), Dimension(0, 256), Dimension(128), Dimension(0, 720)}}, - {{dyn_dim, 4}, {Dimension(128), Dimension(0, 256), Dimension(128), Dimension(128)}}, - {{dyn_dim, 4}, {Dimension(256), Dimension(256), Dimension(0, 640), Dimension(0, 330)}}, - {{dyn_dim, 4}, {Dimension(256), Dimension(256), Dimension(0, 640), Dimension(256)}}, - {{dyn_dim, 4}, {Dimension(256), Dimension(256), Dimension(256), Dimension(0, 330)}}, - {{dyn_dim, 4}, {Dimension(256), Dimension(256), Dimension(256), Dimension(256)}}}; + auto expected_channels = layer0_channels & layer1_channels & layer2_channels & layer3_channels; - for (const auto& s : shapes) - { - auto layer0_channels = s.channels[0]; - auto layer1_channels = s.channels[1]; - auto layer2_channels = s.channels[2]; - auto layer3_channels = s.channels[3]; + auto ref_out_shape = PartialShape{params.input_shape[0], expected_channels, 14, 14}; - auto layer0_shape = PartialShape{1, layer0_channels, 200, 336}; - auto layer1_shape = PartialShape{1, layer1_channels, 100, 168}; - auto layer2_shape = PartialShape{1, layer2_channels, 50, 84}; - auto layer3_shape = PartialShape{1, layer3_channels, 25, 42}; + auto input = std::make_shared(element::f32, params.input_shape); + auto pyramid_layer0 = std::make_shared(element::f32, layer0_shape); + auto pyramid_layer1 = std::make_shared(element::f32, layer1_shape); + auto pyramid_layer2 = std::make_shared(element::f32, layer2_shape); + auto pyramid_layer3 = std::make_shared(element::f32, layer3_shape); - auto expected_channels = - layer0_channels & layer1_channels & layer2_channels & layer3_channels; + auto roi = std::make_shared( + NodeVector{input, pyramid_layer0, pyramid_layer1, pyramid_layer2, pyramid_layer3}, attrs); - auto ref_out_shape = PartialShape{s.input_shape[0], expected_channels, 14, 14}; + ASSERT_EQ(roi->get_output_element_type(0), element::f32); + ASSERT_TRUE(roi->get_output_partial_shape(0).same_scheme(ref_out_shape)); +} - auto input = std::make_shared(element::f32, s.input_shape); - auto pyramid_layer0 = std::make_shared(element::f32, layer0_shape); - auto pyramid_layer1 = std::make_shared(element::f32, layer1_shape); - auto pyramid_layer2 = std::make_shared(element::f32, layer2_shape); - auto pyramid_layer3 = std::make_shared(element::f32, layer3_shape); +INSTANTIATE_TEST_CASE_P( + type_prop, + ROIFeatureIntervalsTest, + ::testing::Values( + ROIFeatureIntervalsTestParams{ + {1000, Dimension(0, 5)}, + {Dimension(0, 128), Dimension(0, 256), Dimension(0, 64), Dimension(0, 33)}, + {Dimension(0, 2), Dimension(1, 3), Dimension(0, 5), Dimension(1, 2)}}, + ROIFeatureIntervalsTestParams{ + {1000, Dimension(0, 5)}, + {Dimension(0, 128), Dimension(0, 256), Dimension(0, 64), Dimension(33)}, + {Dimension(0, 2), Dimension(1, 3), Dimension(0, 5), Dimension(1, 2)}}, + ROIFeatureIntervalsTestParams{ + {1000, Dimension(2, 5)}, + {Dimension(0, 128), Dimension(0, 256), Dimension(64), Dimension(0, 72)}, + {Dimension(0, 2), Dimension(1, 3), Dimension(0, 5), Dimension(1, 2)}}, + ROIFeatureIntervalsTestParams{ + {1000, Dimension(2, 5)}, + {Dimension(0, 128), Dimension(0, 256), Dimension(64), Dimension(64)}, + {Dimension(0, 2), Dimension(1, 3), Dimension(0, 5), Dimension(1, 2)}}, + ROIFeatureIntervalsTestParams{ + {1000, Dimension(0, 5)}, + {Dimension(0, 512), Dimension(256), Dimension(0, 640), Dimension(0, 330)}, + {Dimension(0, 2), Dimension(1, 3), Dimension(0, 5), Dimension(1, 2)}}, + ROIFeatureIntervalsTestParams{ + {1000, Dimension(0, 5)}, + {Dimension(0, 512), Dimension(256), Dimension(0, 640), Dimension(256)}, + {Dimension(0, 2), Dimension(1, 3), Dimension(0, 5), Dimension(1, 2)}}, + ROIFeatureIntervalsTestParams{ + {1000, Dimension(2, 4)}, + {Dimension(0, 512), Dimension(256), Dimension(256), Dimension(0, 720)}, + {Dimension(0, 2), Dimension(1, 3), Dimension(0, 5), Dimension(1, 2)}}, + ROIFeatureIntervalsTestParams{ + {1000, Dimension(2, 4)}, + {Dimension(0, 380), Dimension(256), Dimension(256), Dimension(256)}, + {Dimension(0, 2), Dimension(1, 3), Dimension(0, 5), Dimension(1, 2)}}, + ROIFeatureIntervalsTestParams{ + {1000, Dimension(3, 4)}, + {Dimension(0, 380), Dimension(256), Dimension(256), Dimension(256)}, + {Dimension(0, 2), Dimension(1, 3), Dimension(0, 5), Dimension(1, 2)}}, + ROIFeatureIntervalsTestParams{ + {1000, Dimension(3, 4)}, + {Dimension(128), Dimension(0, 256), Dimension(0, 640), Dimension(0, 330)}, + {Dimension(0, 2), Dimension(1, 3), Dimension(0, 5), Dimension(1, 2)}}, + ROIFeatureIntervalsTestParams{ + {1000, Dimension(0, 6)}, + {Dimension(128), Dimension(0, 256), Dimension(0, 640), Dimension(128)}, + {Dimension(0, 2), Dimension(1, 3), Dimension(0, 5), Dimension(1, 2)}}, + ROIFeatureIntervalsTestParams{ + {1000, Dimension(0, 6)}, + {Dimension(128), Dimension(0, 256), Dimension(128), Dimension(0, 720)}, + {Dimension(0, 2), Dimension(1, 3), Dimension(0, 5), Dimension(1, 2)}}, + ROIFeatureIntervalsTestParams{ + {1000, Dimension(3, 7)}, + {Dimension(128), Dimension(0, 256), Dimension(128), Dimension(128)}, + {Dimension(0, 2), Dimension(1, 3), Dimension(0, 5), Dimension(1, 2)}}, + ROIFeatureIntervalsTestParams{ + {1000, Dimension(4, 6)}, + {Dimension(256), Dimension(256), Dimension(0, 640), Dimension(0, 330)}, + {Dimension(0, 2), Dimension(1, 3), Dimension(0, 5), Dimension(1, 2)}}, + ROIFeatureIntervalsTestParams{ + {1000, Dimension(4, 6)}, + {Dimension(256), Dimension(256), Dimension(0, 640), Dimension(256)}, + {Dimension(0, 2), Dimension(1, 3), Dimension(0, 5), Dimension(1, 2)}}, + ROIFeatureIntervalsTestParams{ + {1000, Dimension(2, 8)}, + {Dimension(256), Dimension(256), Dimension(256), Dimension(0, 330)}, + {Dimension(0, 2), Dimension(1, 3), Dimension(0, 5), Dimension(1, 2)}}, + ROIFeatureIntervalsTestParams{ + {1000, Dimension(2, 8)}, + {Dimension(256), Dimension(256), Dimension(256), Dimension(256)}, + {Dimension(0, 2), Dimension(1, 3), Dimension(0, 5), Dimension(1, 2)}}, + ROIFeatureIntervalsTestParams{ + {Dimension::dynamic(), Dimension(0, 4)}, + {Dimension(0, 128), Dimension(0, 256), Dimension(0, 64), Dimension(0, 33)}, + {Dimension(0, 2), Dimension(1, 3), Dimension(0, 5), Dimension(1, 2)}}, + ROIFeatureIntervalsTestParams{ + {Dimension::dynamic(), Dimension(0, 4)}, + {Dimension(0, 128), Dimension(0, 256), Dimension(0, 64), Dimension(33)}, + {Dimension(0, 2), Dimension(1, 3), Dimension(0, 5), Dimension(1, 2)}}, + ROIFeatureIntervalsTestParams{ + {Dimension::dynamic(), Dimension(1, 4)}, + {Dimension(0, 128), Dimension(0, 256), Dimension(64), Dimension(0, 72)}, + {Dimension(0, 2), Dimension(1, 3), Dimension(0, 5), Dimension(1, 2)}}, + ROIFeatureIntervalsTestParams{ + {Dimension::dynamic(), Dimension(1, 4)}, + {Dimension(0, 128), Dimension(0, 256), Dimension(64), Dimension(64)}, + {Dimension(0, 2), Dimension(1, 3), Dimension(0, 5), Dimension(1, 2)}}, + ROIFeatureIntervalsTestParams{ + {Dimension::dynamic(), Dimension(2, 4)}, + {Dimension(0, 512), Dimension(256), Dimension(0, 640), Dimension(0, 330)}, + {Dimension(0, 2), Dimension(1, 3), Dimension(0, 5), Dimension(1, 2)}}, + ROIFeatureIntervalsTestParams{ + {Dimension::dynamic(), Dimension(2, 4)}, + {Dimension(0, 512), Dimension(256), Dimension(0, 640), Dimension(256)}, + {Dimension(0, 2), Dimension(1, 3), Dimension(0, 5), Dimension(1, 2)}}, + ROIFeatureIntervalsTestParams{ + {Dimension::dynamic(), Dimension(3, 5)}, + {Dimension(0, 512), Dimension(256), Dimension(256), Dimension(0, 720)}, + {Dimension(0, 2), Dimension(1, 3), Dimension(0, 5), Dimension(1, 2)}}, + ROIFeatureIntervalsTestParams{ + {Dimension::dynamic(), Dimension(3, 5)}, + {Dimension(0, 380), Dimension(256), Dimension(256), Dimension(256)}, + {Dimension(0, 2), Dimension(1, 3), Dimension(0, 5), Dimension(1, 2)}}, + ROIFeatureIntervalsTestParams{ + {Dimension::dynamic(), Dimension(4, 6)}, + {Dimension(128), Dimension(0, 256), Dimension(0, 640), Dimension(0, 330)}, + {Dimension(0, 2), Dimension(1, 3), Dimension(0, 5), Dimension(1, 2)}}, + ROIFeatureIntervalsTestParams{ + {Dimension::dynamic(), Dimension(4, 6)}, + {Dimension(128), Dimension(0, 256), Dimension(0, 640), Dimension(128)}, + {Dimension(0, 2), Dimension(1, 3), Dimension(0, 5), Dimension(1, 2)}}, + ROIFeatureIntervalsTestParams{ + {Dimension::dynamic(), Dimension(3, 8)}, + {Dimension(128), Dimension(0, 256), Dimension(128), Dimension(0, 720)}, + {Dimension(0, 2), Dimension(1, 3), Dimension(0, 5), Dimension(1, 2)}}, + ROIFeatureIntervalsTestParams{ + {Dimension::dynamic(), Dimension(3, 8)}, + {Dimension(128), Dimension(0, 256), Dimension(128), Dimension(128)}, + {Dimension(0, 2), Dimension(1, 3), Dimension(0, 5), Dimension(1, 2)}}, + ROIFeatureIntervalsTestParams{ + {Dimension::dynamic(), Dimension(4, 11)}, + {Dimension(256), Dimension(256), Dimension(0, 640), Dimension(0, 330)}, + {Dimension(0, 2), Dimension(1, 3), Dimension(0, 5), Dimension(1, 2)}}, + ROIFeatureIntervalsTestParams{ + {Dimension::dynamic(), Dimension(4, 11)}, + {Dimension(256), Dimension(256), Dimension(0, 640), Dimension(256)}, + {Dimension(0, 2), Dimension(1, 3), Dimension(0, 5), Dimension(1, 2)}}, + ROIFeatureIntervalsTestParams{ + {Dimension::dynamic(), Dimension(2, 16)}, + {Dimension(256), Dimension(256), Dimension(256), Dimension(0, 330)}, + {Dimension(0, 2), Dimension(1, 3), Dimension(0, 5), Dimension(1, 2)}}, + ROIFeatureIntervalsTestParams{ + {Dimension::dynamic(), Dimension(2, 16)}, + {Dimension(256), Dimension(256), Dimension(256), Dimension(256)}, + {Dimension(0, 2), Dimension(1, 3), Dimension(0, 5), Dimension(1, 2)}}), + PrintToDummyParamName()); - auto roi = std::make_shared( - NodeVector{input, pyramid_layer0, pyramid_layer1, pyramid_layer2, pyramid_layer3}, - attrs); +struct ROIFeatureIntervalsSameFirstDimsTestParams +{ + PartialShape input_shape; + Dimension channels[4]; +}; - ASSERT_EQ(roi->get_output_element_type(0), element::f32); - ASSERT_TRUE(roi->get_output_partial_shape(0).same_scheme(ref_out_shape)); - } +struct ROIFeatureIntervalsSameFirstDimsTest + : ::testing::TestWithParam +{ +}; + +TEST_P(ROIFeatureIntervalsSameFirstDimsTest, detectron_roi_feature_extractor_intervals_1) +{ + auto params = GetParam(); + + Attrs attrs; + attrs.aligned = false; + attrs.output_size = 14; + attrs.sampling_ratio = 2; + attrs.pyramid_scales = {4, 8, 16, 32}; + + auto layer0_channels = params.channels[0]; + auto layer1_channels = params.channels[1]; + auto layer2_channels = params.channels[2]; + auto layer3_channels = params.channels[3]; + + auto layer0_shape = PartialShape{1, layer0_channels, 200, 336}; + auto layer1_shape = PartialShape{1, layer1_channels, 100, 168}; + auto layer2_shape = PartialShape{1, layer2_channels, 50, 84}; + auto layer3_shape = PartialShape{1, layer3_channels, 25, 42}; + + auto expected_channels = layer0_channels & layer1_channels & layer2_channels & layer3_channels; + + auto ref_out_shape = PartialShape{params.input_shape[0], expected_channels, 14, 14}; + + auto input = std::make_shared(element::f32, params.input_shape); + auto pyramid_layer0 = std::make_shared(element::f32, layer0_shape); + auto pyramid_layer1 = std::make_shared(element::f32, layer1_shape); + auto pyramid_layer2 = std::make_shared(element::f32, layer2_shape); + auto pyramid_layer3 = std::make_shared(element::f32, layer3_shape); + + auto roi = std::make_shared( + NodeVector{input, pyramid_layer0, pyramid_layer1, pyramid_layer2, pyramid_layer3}, attrs); + + ASSERT_EQ(roi->get_output_element_type(0), element::f32); + ASSERT_TRUE(roi->get_output_partial_shape(0).same_scheme(ref_out_shape)); } + +INSTANTIATE_TEST_CASE_P( + type_prop, + ROIFeatureIntervalsSameFirstDimsTest, + ::testing::Values( + ROIFeatureIntervalsSameFirstDimsTestParams{ + {Dimension(1000), Dimension(4)}, + {Dimension(0, 128), Dimension(0, 256), Dimension(0, 64), Dimension(0, 33)}}, + ROIFeatureIntervalsSameFirstDimsTestParams{ + {Dimension(1000), Dimension(4)}, + {Dimension(0, 128), Dimension(0, 256), Dimension(0, 64), Dimension(33)}}, + ROIFeatureIntervalsSameFirstDimsTestParams{ + {Dimension(1000), Dimension(4)}, + {Dimension(0, 128), Dimension(0, 256), Dimension(64), Dimension(0, 72)}}, + ROIFeatureIntervalsSameFirstDimsTestParams{ + {Dimension(1000), Dimension(4)}, + {Dimension(0, 128), Dimension(0, 256), Dimension(64), Dimension(64)}}, + ROIFeatureIntervalsSameFirstDimsTestParams{ + {Dimension(1000), Dimension(4)}, + {Dimension(0, 512), Dimension(256), Dimension(0, 640), Dimension(0, 330)}}, + ROIFeatureIntervalsSameFirstDimsTestParams{ + {Dimension(1000), Dimension(4)}, + {Dimension(0, 512), Dimension(256), Dimension(0, 640), Dimension(256)}}, + ROIFeatureIntervalsSameFirstDimsTestParams{ + {Dimension(1000), Dimension(4)}, + {Dimension(0, 512), Dimension(256), Dimension(256), Dimension(0, 720)}}, + ROIFeatureIntervalsSameFirstDimsTestParams{ + {Dimension(1000), Dimension(4)}, + {Dimension(0, 380), Dimension(256), Dimension(256), Dimension(256)}}, + ROIFeatureIntervalsSameFirstDimsTestParams{ + {Dimension(1000), Dimension(4)}, + {Dimension(128), Dimension(0, 256), Dimension(0, 640), Dimension(0, 330)}}, + ROIFeatureIntervalsSameFirstDimsTestParams{ + {Dimension(1000), Dimension(4)}, + {Dimension(128), Dimension(0, 256), Dimension(0, 640), Dimension(128)}}, + ROIFeatureIntervalsSameFirstDimsTestParams{ + {Dimension(1000), Dimension(4)}, + {Dimension(128), Dimension(0, 256), Dimension(128), Dimension(0, 720)}}, + ROIFeatureIntervalsSameFirstDimsTestParams{ + {Dimension(1000), Dimension(4)}, + {Dimension(128), Dimension(0, 256), Dimension(128), Dimension(128)}}, + ROIFeatureIntervalsSameFirstDimsTestParams{ + {Dimension(1000), Dimension(4)}, + {Dimension(256), Dimension(256), Dimension(0, 640), Dimension(0, 330)}}, + ROIFeatureIntervalsSameFirstDimsTestParams{ + {Dimension(1000), Dimension(4)}, + {Dimension(256), Dimension(256), Dimension(0, 640), Dimension(256)}}, + ROIFeatureIntervalsSameFirstDimsTestParams{ + {Dimension(1000), Dimension(4)}, + {Dimension(256), Dimension(256), Dimension(256), Dimension(0, 330)}}, + ROIFeatureIntervalsSameFirstDimsTestParams{ + {Dimension(1000), Dimension(4)}, + {Dimension(256), Dimension(256), Dimension(256), Dimension(256)}}, + ROIFeatureIntervalsSameFirstDimsTestParams{ + {Dimension::dynamic(), Dimension(4)}, + {Dimension(0, 128), Dimension(0, 256), Dimension(0, 64), Dimension(0, 33)}}, + ROIFeatureIntervalsSameFirstDimsTestParams{ + {Dimension::dynamic(), Dimension(4)}, + {Dimension(0, 128), Dimension(0, 256), Dimension(0, 64), Dimension(33)}}, + ROIFeatureIntervalsSameFirstDimsTestParams{ + {Dimension::dynamic(), Dimension(4)}, + {Dimension(0, 128), Dimension(0, 256), Dimension(64), Dimension(0, 72)}}, + ROIFeatureIntervalsSameFirstDimsTestParams{ + {Dimension::dynamic(), Dimension(4)}, + {Dimension(0, 128), Dimension(0, 256), Dimension(64), Dimension(64)}}, + ROIFeatureIntervalsSameFirstDimsTestParams{ + {Dimension::dynamic(), Dimension(4)}, + {Dimension(0, 512), Dimension(256), Dimension(0, 640), Dimension(0, 330)}}, + ROIFeatureIntervalsSameFirstDimsTestParams{ + {Dimension::dynamic(), Dimension(4)}, + {Dimension(0, 512), Dimension(256), Dimension(0, 640), Dimension(256)}}, + ROIFeatureIntervalsSameFirstDimsTestParams{ + {Dimension::dynamic(), Dimension(4)}, + {Dimension(0, 512), Dimension(256), Dimension(256), Dimension(0, 720)}}, + ROIFeatureIntervalsSameFirstDimsTestParams{ + {Dimension::dynamic(), Dimension(4)}, + {Dimension(0, 380), Dimension(256), Dimension(256), Dimension(256)}}, + ROIFeatureIntervalsSameFirstDimsTestParams{ + {Dimension::dynamic(), Dimension(4)}, + {Dimension(128), Dimension(0, 256), Dimension(0, 640), Dimension(0, 330)}}, + ROIFeatureIntervalsSameFirstDimsTestParams{ + {Dimension::dynamic(), Dimension(4)}, + {Dimension(128), Dimension(0, 256), Dimension(0, 640), Dimension(128)}}, + ROIFeatureIntervalsSameFirstDimsTestParams{ + {Dimension::dynamic(), Dimension(4)}, + {Dimension(128), Dimension(0, 256), Dimension(128), Dimension(0, 720)}}, + ROIFeatureIntervalsSameFirstDimsTestParams{ + {Dimension::dynamic(), Dimension(4)}, + {Dimension(128), Dimension(0, 256), Dimension(128), Dimension(128)}}, + ROIFeatureIntervalsSameFirstDimsTestParams{ + {Dimension::dynamic(), Dimension(4)}, + {Dimension(256), Dimension(256), Dimension(0, 640), Dimension(0, 330)}}, + ROIFeatureIntervalsSameFirstDimsTestParams{ + {Dimension::dynamic(), Dimension(4)}, + {Dimension(256), Dimension(256), Dimension(0, 640), Dimension(256)}}, + ROIFeatureIntervalsSameFirstDimsTestParams{ + {Dimension::dynamic(), Dimension(4)}, + {Dimension(256), Dimension(256), Dimension(256), Dimension(0, 330)}}, + ROIFeatureIntervalsSameFirstDimsTestParams{ + {Dimension::dynamic(), Dimension(4)}, + {Dimension(256), Dimension(256), Dimension(256), Dimension(256)}}), + PrintToDummyParamName()); diff --git a/ngraph/test/type_prop/loop.cpp b/ngraph/test/type_prop/loop.cpp index ae586ebf43223d..c5d2a1a5c99aed 100644 --- a/ngraph/test/type_prop/loop.cpp +++ b/ngraph/test/type_prop/loop.cpp @@ -108,7 +108,6 @@ TEST(type_prop, loop_operation_for_mode_10_iter_static_shapes) EXPECT_NE(output_desc, nullptr); } } - auto result0 = make_shared(out0); auto result1 = make_shared(out1); auto result2 = make_shared(out2); @@ -213,7 +212,6 @@ TEST(type_prop, loop_operation_dowhile_mode_1_iter_static_shapes) EXPECT_NE(output_desc, nullptr); } } - auto result0 = make_shared(out0); auto result1 = make_shared(out1); auto result2 = make_shared(out2); @@ -316,7 +314,6 @@ TEST(type_prop, loop_operation_for_and_condition_mode_dynamic_iter_static_shapes EXPECT_NE(output_desc, nullptr); } } - auto result0 = make_shared(out0); auto result1 = make_shared(out1); Shape out0_shape{1}; @@ -416,7 +413,6 @@ TEST(type_prop, loop_operation_for_and_condition_mode_dynamic_iter_dynamic_shape EXPECT_NE(output_desc, nullptr); } } - auto result0 = make_shared(out0); auto result1 = make_shared(out1); auto result2 = make_shared(out2); @@ -524,7 +520,6 @@ TEST(type_prop, loop_operation_for_and_condition_mode_dynamic_iter_partially_dyn EXPECT_NE(output_desc, nullptr); } } - auto result0 = make_shared(out0); auto result1 = make_shared(out1); auto result2 = make_shared(out2); @@ -586,12 +581,9 @@ TEST(type_prop, loop_operation_for_and_condition_mode_dynamic_iter_incorrect_sli loop->set_merged_input(M_body, M, Zo); const auto sliced_output_axis = 4; - auto out = loop->get_concatenated_slices(Zo, 0, 1, 1, -1, sliced_output_axis); - - auto result = make_shared(out); try { - auto f = make_shared(ResultVector{result}, ParameterVector{X, Y, M}); + auto out = loop->get_concatenated_slices(Zo, 0, 1, 1, -1, sliced_output_axis); FAIL() << "Loop was created with incorrect axis of concatenated slices output."; } catch (const std::exception& error) @@ -690,7 +682,6 @@ TEST(type_prop, loop_operation_infinite_loop_mode_dynamic_iter_dynamic_shapes) EXPECT_NE(output_desc, nullptr); } } - auto result0 = make_shared(out0); auto result1 = make_shared(out1); auto result2 = make_shared(out2); @@ -796,7 +787,6 @@ TEST(type_prop, loop_operation_for_mode_10_iter_static_shapes_special_body_ports EXPECT_NE(output_desc, nullptr); } } - auto result0 = make_shared(out0); auto result1 = make_shared(out1); auto result2 = make_shared(out2); @@ -902,7 +892,6 @@ TEST(type_prop, loop_operation_for_mode_10_iter_static_shapes_special_body_ports EXPECT_NE(output_desc, nullptr); } } - auto result0 = make_shared(out0); auto result1 = make_shared(out1); auto result2 = make_shared(out2); @@ -1009,7 +998,6 @@ TEST(type_prop, loop_operation_10_iter_static_shapes_sliced_inputs) EXPECT_NE(output_desc, nullptr); } } - auto result0 = make_shared(out0); auto result1 = make_shared(out1); auto result2 = make_shared(out2); @@ -1121,7 +1109,6 @@ TEST(type_prop, loop_operation_dynamic_iter_dynamic_batch_shapes_sliced_inputs_c EXPECT_NE(output_desc, nullptr); } } - auto result0 = make_shared(out0); auto result1 = make_shared(out1); auto result2 = make_shared(out2); @@ -1236,7 +1223,6 @@ TEST(type_prop, loop_operation_dynamic_iter_dynamic_shapes_sliced_inputs_concate EXPECT_NE(output_desc, nullptr); } } - auto result0 = make_shared(out0); auto result1 = make_shared(out1); auto result2 = make_shared(out2); diff --git a/ngraph/test/type_prop/one_hot.cpp b/ngraph/test/type_prop/one_hot.cpp index 09886dd18a690b..c55a393afdcf67 100644 --- a/ngraph/test/type_prop/one_hot.cpp +++ b/ngraph/test/type_prop/one_hot.cpp @@ -31,6 +31,11 @@ TEST(type_prop, one_hot_v1_output_shape) auto ont_hot = make_shared(indices, depth, on_value, off_value, axis); ASSERT_EQ(ont_hot->get_element_type(), element::u32); ASSERT_EQ(ont_hot->get_shape(), (Shape{3, 2})); + + auto dyn_indices = make_shared(element::i64, PartialShape{{1, 3}}); + auto dyn_ont_hot = make_shared(dyn_indices, depth, on_value, off_value, axis); + ASSERT_EQ(dyn_ont_hot->get_output_element_type(0), element::u32); + ASSERT_EQ(dyn_ont_hot->get_output_partial_shape(0), (PartialShape{{1, 3}, 2})); } TEST(type_prop, one_hot_v1_output_shape_2) @@ -43,6 +48,11 @@ TEST(type_prop, one_hot_v1_output_shape_2) auto ont_hot = make_shared(indices, depth, on_value, off_value, axis); ASSERT_EQ(ont_hot->get_element_type(), element::f32); ASSERT_EQ(ont_hot->get_shape(), (Shape{1, 3, 2, 4, 3})); + + auto dyn_indices = make_shared(element::i64, PartialShape{1, {3, 5}, 2, 3}); + auto dyn_ont_hot = make_shared(dyn_indices, depth, on_value, off_value, axis); + ASSERT_EQ(dyn_ont_hot->get_output_element_type(0), element::f32); + ASSERT_EQ(dyn_ont_hot->get_output_partial_shape(0), (PartialShape{1, {3, 5}, 2, 4, 3})); } TEST(type_prop, one_hot_v1_indices_elem_not_integral) diff --git a/ngraph/test/type_prop/reshape.cpp b/ngraph/test/type_prop/reshape.cpp index 4bb23c5264469c..ff2cb4b438f814 100644 --- a/ngraph/test/type_prop/reshape.cpp +++ b/ngraph/test/type_prop/reshape.cpp @@ -21,6 +21,180 @@ using namespace std; using namespace ngraph; +TEST(type_prop, static_value_propagation) +{ + auto param = make_shared(element::f32, Shape{1, 2, 3}); + auto shape_of = make_shared(param); + + auto r = make_shared(param, shape_of, false); + + ASSERT_EQ(r->get_element_type(), element::f32); + ASSERT_EQ(r->get_shape(), (Shape{1, 2, 3})); +} + +TEST(type_prop, interval_value_propagation) +{ + auto param = make_shared(element::f32, PartialShape{Dimension(1, 8), 2, 3}); + auto shape_of = make_shared(param); + + auto r = make_shared(param, shape_of, false); + + ASSERT_EQ(r->get_element_type(), element::f32); + ASSERT_EQ(r->get_output_partial_shape(0), PartialShape({Dimension(1, 8), 2, 3})); + + auto shape_of_opset1 = make_shared(param); + + auto reshape = make_shared(param, shape_of_opset1, false); + + ASSERT_EQ(reshape->get_element_type(), element::f32); + ASSERT_EQ(reshape->get_output_partial_shape(0), PartialShape({Dimension(1, 8), 2, 3})); +} + +TEST(type_prop, static_value_propagation_through_gather) +{ + auto param = make_shared(element::f32, Shape{1, 2, 3}); + auto shape_of = make_shared(param); + auto gather = make_shared(shape_of, + op::Constant::create(element::i64, {3}, {2, 1, 0}), + op::Constant::create(element::i64, {}, {0})); + + auto r = make_shared(param, gather, false); + + ASSERT_EQ(r->get_element_type(), element::f32); + ASSERT_EQ(r->get_shape(), (Shape{3, 2, 1})); +} + +TEST(type_prop, interval_value_propagation_through_gather) +{ + auto param = make_shared(element::f32, PartialShape{Dimension(1, 8), 2, 3}); + auto shape_of = make_shared(param); + auto gather = make_shared(shape_of, + op::Constant::create(element::i64, {3}, {2, 1, 0}), + op::Constant::create(element::i64, {}, {0})); + + auto r = make_shared(param, gather, false); + + ASSERT_EQ(r->get_element_type(), element::f32); + ASSERT_EQ(r->get_output_partial_shape(0), PartialShape({3, 2, Dimension(1, 8)})); +} + +TEST(type_prop, interval_value_propagation_through_consecutive_gathers) +{ + auto param = make_shared(element::f32, PartialShape{Dimension(1, 8), 2, 3}); + auto shape_of = make_shared(param); + auto gather_1 = make_shared(shape_of, + op::Constant::create(element::i64, {3}, {2, 1, 0}), + op::Constant::create(element::i64, {}, {0})); + + auto gather_2 = make_shared(gather_1, + op::Constant::create(element::i64, {3}, {1, 2, 0}), + op::Constant::create(element::i64, {}, {0})); + + auto r = make_shared(param, gather_2, false); + + ASSERT_EQ(r->get_element_type(), element::f32); + ASSERT_EQ(r->get_output_partial_shape(0), PartialShape({2, Dimension(1, 8), 3})); +} + +TEST(type_prop, interval_value_propagation_concatenated_gathers) +{ + auto param = make_shared(element::f32, PartialShape{Dimension(1, 8), 2, 3}); + auto shape_of = make_shared(param); + + auto gather_1 = make_shared(shape_of, + op::Constant::create(element::i64, {}, {2}), + op::Constant::create(element::i64, {}, {0})); + auto dim_1 = make_shared(gather_1, op::Constant::create(element::i64, {1}, {0})); + + auto gather_2 = make_shared(shape_of, + op::Constant::create(element::i64, {}, {1}), + op::Constant::create(element::i64, {}, {0})); + auto tmp_dim_2 = make_shared( + gather_2, op::Constant::create(element::i64, {2}, {1, 1}), true); + auto dim_2 = + make_shared(tmp_dim_2, op::Constant::create(element::i64, {1}, {0})); + + auto gather_3 = make_shared(shape_of, + op::Constant::create(element::i64, {}, {0}), + op::Constant::create(element::i64, {}, {0})); + auto dim_3 = make_shared(gather_3, op::Constant::create(element::i64, {1}, {0})); + + auto shape = make_shared(OutputVector{dim_1, dim_2, dim_3}, 0); + auto r = make_shared(param, shape, false); + + ASSERT_EQ(r->get_element_type(), element::f32); + ASSERT_EQ(r->get_output_partial_shape(0), PartialShape({3, 2, Dimension(1, 8)})); +} + +TEST(type_prop, interval_value_propagation_mul_div) +{ + auto param = make_shared(element::f32, + PartialShape{Dimension(2, 8), Dimension(4, 16), 2}); + + auto shape_of = make_shared(param); + auto cast_fp = make_shared(shape_of, element::f32); + auto mul = make_shared(cast_fp, + op::Constant::create(element::f32, {3}, {-2, 2, -4})); + auto div = + make_shared(mul, op::Constant::create(element::f32, {3}, {-2, 2, -4})); + auto cast_int = make_shared(div, element::i32); + + auto r = make_shared(param, cast_int, false); + + ASSERT_EQ(r->get_element_type(), element::f32); + ASSERT_EQ(r->get_output_partial_shape(0), PartialShape({Dimension(2, 8), Dimension(4, 16), 2})); +} + +TEST(type_prop, interval_value_propagation_reduce) +{ + auto param = make_shared(element::f32, PartialShape{Dimension(1, 8), 2, 3}); + auto shape_of = make_shared(param); + auto reduce_prod = make_shared( + shape_of, op::Constant::create(element::i64, {1}, {0}), true); + auto r = make_shared(param, reduce_prod, false); + + ASSERT_EQ(r->get_element_type(), element::f32); + ASSERT_EQ(r->get_output_partial_shape(0), PartialShape{Dimension(6, 48)}); +} + +TEST(type_prop, interval_value_propagation_reshape_zero_special_value) +{ + auto param = make_shared( + element::f32, PartialShape{Dimension(1, 8), Dimension(16, 64), 3, Dimension(200, 400)}); + auto shape_of = make_shared(param); + + auto dim_021 = make_shared(shape_of, + op::Constant::create(element::i64, {3}, {0, 2, 1}), + op::Constant::create(element::i64, {}, {0})); + auto dim_3 = op::Constant::create(element::i64, {1}, {0}); + + auto shape = make_shared(OutputVector{dim_021, dim_3}, 0); + auto r = make_shared(param, shape, true); + + ASSERT_EQ(r->get_element_type(), element::f32); + ASSERT_EQ(r->get_output_partial_shape(0), + PartialShape({Dimension(1, 8), 3, Dimension(16, 64), Dimension(200, 400)})); +} + +TEST(type_prop, interval_value_propagation_reshape_zero_minus_one_special_values) +{ + auto param = make_shared( + element::f32, PartialShape{Dimension(1, 8), Dimension(16, 64), 6, Dimension(200, 400)}); + auto shape_of = make_shared(param); + + auto dim_0 = make_shared(shape_of, + op::Constant::create(element::i64, {1}, {1}), + op::Constant::create(element::i64, {}, {0})); + auto dim_1 = op::Constant::create(element::i64, {1}, {0}); + auto dim_2 = op::Constant::create(element::i64, {1}, {-1}); + + auto shape = make_shared(OutputVector{dim_0, dim_1, dim_2}, 0); + auto r = make_shared(param, shape, true); + ASSERT_EQ(r->get_element_type(), element::f32); + ASSERT_EQ(r->get_output_partial_shape(0), + PartialShape({Dimension(16, 64), Dimension(16, 64), Dimension(19, 1200)})); +} + TEST(type_prop, reshape_deduce_s2t) { auto param = make_shared(element::f32, Shape{}); diff --git a/ngraph/test/type_prop/ti.cpp b/ngraph/test/type_prop/ti.cpp index 031d6fa7a328ee..22c0b2deee3567 100644 --- a/ngraph/test/type_prop/ti.cpp +++ b/ngraph/test/type_prop/ti.cpp @@ -106,7 +106,6 @@ TEST(type_prop, tensor_iterator_2_slice_inputs_part_size_2) // Output 1 is concat of Zos // start=0, stride=2, part_size=2, end=39, axis=1 auto out1 = tensor_iterator->get_concatenated_slices(Zo, 0, 2, 2, 39, 1); - auto result0 = make_shared(out0); auto result1 = make_shared(out1); Shape out0_shape{32, 2, 10}; @@ -189,7 +188,6 @@ TEST(type_prop, tensor_iterator_2_slice_inputs_part_size_2_dynamic) EXPECT_NE(output_desc, nullptr); } } - auto result0 = make_shared(out0); auto result1 = make_shared(out1); Shape out0_shape{32, 2, 10}; diff --git a/ngraph/test/type_prop/tile.cpp b/ngraph/test/type_prop/tile.cpp index 4d3871d97765ea..370822b30991c9 100644 --- a/ngraph/test/type_prop/tile.cpp +++ b/ngraph/test/type_prop/tile.cpp @@ -47,3 +47,12 @@ TEST(type_prop, tile_few_repeats) ASSERT_EQ(top->get_element_type(), element::f32); ASSERT_EQ(top->get_shape(), (Shape{6, 32, 10})); } + +TEST(type_prop, tile_few_repeats_dyn_input) +{ + auto param0 = make_shared(element::f32, PartialShape{6, Dimension(8, 10), 10}); + auto param1 = op::Constant::create(element::i64, Shape{2}, {4, 1}); + auto top = make_shared(param0, param1); + ASSERT_EQ(top->get_element_type(), element::f32); + ASSERT_EQ(top->get_output_partial_shape(0), (PartialShape{6, Dimension(32, 40), 10})); +} diff --git a/ngraph/test/type_prop/top_k.cpp b/ngraph/test/type_prop/top_k.cpp index d6c01a09642bcc..f5753adff34401 100644 --- a/ngraph/test/type_prop/top_k.cpp +++ b/ngraph/test/type_prop/top_k.cpp @@ -111,7 +111,7 @@ TYPED_TEST_P(topk_type_prop, topk_rank_static_k_unknown) const auto convert_k = make_shared(k, element::i32); const auto topk = make_shared(data, convert_k, axis, "max", "value"); - const PartialShape ranged_dynamic_axis_shape{1, Dimension{5, 10}, 100}; + const PartialShape ranged_dynamic_axis_shape{1, Dimension{5}, 100}; EXPECT_EQ(topk->get_output_partial_shape(0), ranged_dynamic_axis_shape); } } diff --git a/openvino/conditional_compilation/CMakeLists.txt b/openvino/conditional_compilation/CMakeLists.txt index 6fd8b9539ffe0e..dfe2386fa5901f 100644 --- a/openvino/conditional_compilation/CMakeLists.txt +++ b/openvino/conditional_compilation/CMakeLists.txt @@ -26,36 +26,34 @@ target_include_directories(${TARGET_NAME} INTERFACE ${CMAKE_CURRENT_SOURCE_DIR}/ if(SELECTIVE_BUILD STREQUAL "COLLECT") target_compile_definitions(${TARGET_NAME} INTERFACE SELECTIVE_BUILD_ANALYZER) - include(FetchContent) - FetchContent_Declare( - ext_seapi - GIT_REPOSITORY https://github.com/intel/IntelSEAPI.git - GIT_TAG 7997a782fd3fa5621e275bd31060f9795564e6ca - ) - - FetchContent_GetProperties(ext_seapi) - if(NOT ext_seapi_POPULATED) - FetchContent_Populate(ext_seapi) - add_custom_target(build-seapi - COMMAND pushd ${ext_seapi_SOURCE_DIR} && python ${ext_seapi_SOURCE_DIR}/buildall.py -i && popd - ) - endif() - add_dependencies(${TARGET_NAME} build-seapi) elseif(SELECTIVE_BUILD STREQUAL "ON") if(NOT DEFINED SELECTIVE_BUILD_STAT) message(FATAL_ERROR "In case SELECTIVE_BUILD is enabled, the SELECTIVE_BUILD_STAT variable should contain the path to the collected InelSEAPI statistics.\ Usage: -DSELECTIVE_BUILD=ON -DSELECTIVE_BUILD_STAT=/path/*.csv") endif() + find_package (Python3 COMPONENTS Interpreter) + if (NOT Python3_FOUND) + message(FATAL_ERROR " Python3 wasn't found!") + endif() file(GLOB STAT_FILES ${SELECTIVE_BUILD_STAT}) target_compile_definitions(${TARGET_NAME} INTERFACE SELECTIVE_BUILD) + if(CMAKE_CXX_COMPILER_ID STREQUAL "GNU" + OR CMAKE_CXX_COMPILER_ID MATCHES "^(Apple)?Clang$") + # After disabling a block of code, some variables might be unused. + target_compile_options(${TARGET_NAME} INTERFACE + -Wno-unused-function + -Wno-unused-parameter + -Wunused-local-typedefs) + endif() + set(GENERATED_HEADER ${CMAKE_CURRENT_BINARY_DIR}/conditional_compilation_gen.h) set(GENERATOR ${CMAKE_CURRENT_SOURCE_DIR}/scripts/ccheader.py) add_custom_command(OUTPUT ${GENERATED_HEADER} - COMMAND python3 ${GENERATOR} --stat ${SELECTIVE_BUILD_STAT} --out ${GENERATED_HEADER} + COMMAND ${Python3_EXECUTABLE} ${GENERATOR} --stat ${SELECTIVE_BUILD_STAT} --out ${GENERATED_HEADER} DEPENDS ${STAT_FILES}) add_custom_target(conditional_compilation_gen DEPENDS ${GENERATED_HEADER}) add_dependencies(${TARGET_NAME} conditional_compilation_gen) diff --git a/openvino/conditional_compilation/include/openvino/cc/selective_build.h b/openvino/conditional_compilation/include/openvino/cc/selective_build.h index 96e4cb64e6d7e5..7e41d463dcf067 100644 --- a/openvino/conditional_compilation/include/openvino/cc/selective_build.h +++ b/openvino/conditional_compilation/include/openvino/cc/selective_build.h @@ -167,7 +167,7 @@ bool match(char const *region, Ctx && ctx, T && val, Case && cs) { const bool is_matched = val == cs.value; if (is_matched) { openvino::itt::ScopedTask task( - openvino::itt::handle( + openvino::itt::handle( std::string(region) + "$" + cs.name)); Fn()(std::forward(ctx)); } diff --git a/openvino/conditional_compilation/scripts/ccheader.py b/openvino/conditional_compilation/scripts/ccheader.py index 63ceede84f192b..f0527af6ebba10 100755 --- a/openvino/conditional_compilation/scripts/ccheader.py +++ b/openvino/conditional_compilation/scripts/ccheader.py @@ -27,6 +27,7 @@ # --out cc.h C++ header file to be generated import argparse, csv +from glob import glob from pathlib import Path from abc import ABC, abstractmethod @@ -119,30 +120,31 @@ def module(self, name): return self.modules.get(name) def read(self, files): - for stat in files: - with open(str(stat)) as f: - reader = csv.reader(f) - rows = list(reader) - if rows: - # Scopes - scopes = list(filter(lambda row: row[0].startswith(Domain[0]), rows)) - for row in scopes: - moduleName = row[0][len(Domain[0]):] - self.module(moduleName).scope(row[1]) - - # Switches - switches = list(map(lambda row: [row[0][len(Domain[1]):]] + row[1].strip().split('$'), - filter(lambda row: row[0].startswith(Domain[1]), rows))) - for switch in switches: - self.module(switch[0]).switch(switch[1]).case(switch[2]) - - # Factories - factories = list(map(lambda row: [row[0][len(Domain[2]):]] + row[1].strip().split('$'), - filter(lambda row: row[0].startswith(Domain[2]), rows))) - for reg in list(filter(lambda row: row[1] == 'REG', factories)): - self.module(reg[0]).factory(reg[2]).register(reg[3], reg[4]) - for cre in list(filter(lambda row: row[1] == 'CREATE', factories)): - self.module(cre[0]).factory(cre[2]).create(cre[3]) + for stats in files: + for stat in glob(str(stats)): + with open(str(stat)) as f: + reader = csv.reader(f) + rows = list(reader) + if rows: + # Scopes + scopes = list(filter(lambda row: len(row) and row[0].startswith(Domain[0]), rows)) + for row in scopes: + moduleName = row[0][len(Domain[0]):] + self.module(moduleName).scope(row[1]) + + # Switches + switches = list(map(lambda row: [row[0][len(Domain[1]):]] + row[1].strip().split('$'), + filter(lambda row: len(row) and row[0].startswith(Domain[1]), rows))) + for switch in switches: + self.module(switch[0]).switch(switch[1]).case(switch[2]) + + # Factories + factories = list(map(lambda row: [row[0][len(Domain[2]):]] + row[1].strip().split('$'), + filter(lambda row: len(row) and row[0].startswith(Domain[2]), rows))) + for reg in list(filter(lambda row: len(row) > 1 and row[1] == 'REG', factories)): + self.module(reg[0]).factory(reg[2]).register(reg[3], reg[4]) + for cre in list(filter(lambda row: len(row) > 1 and row[1] == 'CREATE', factories)): + self.module(cre[0]).factory(cre[2]).create(cre[3]) def generate(self, out): with open(str(out), 'w') as f: diff --git a/openvino/itt/CMakeLists.txt b/openvino/itt/CMakeLists.txt index 22b287164fb552..85a8cf198749b7 100644 --- a/openvino/itt/CMakeLists.txt +++ b/openvino/itt/CMakeLists.txt @@ -18,37 +18,6 @@ set(TARGET_NAME itt) file(GLOB_RECURSE SOURCES "src/*.cpp" "src/*.hpp") -if(ENABLE_PROFILING_ITT) - if(DEFINED INTEL_VTUNE_DIR OR DEFINED ENV{INTEL_VTUNE_DIR}) - find_package(ITT - PATHS "${CMAKE_CURRENT_SOURCE_DIR}/cmake" - NO_DEFAULT_PATH) - if(NOT ITT_FOUND) - message(WARNING "Profiling option enabled, but no ITT library was found under INTEL_VTUNE_DIR") - endif() - else() - include(FetchContent) - FetchContent_Declare( - ext_ittapi - GIT_REPOSITORY https://github.com/intel/ittapi.git - GIT_TAG v3.18.6 - ) - - FetchContent_GetProperties(ext_ittapi) - if(NOT ext_ittapi_POPULATED) - FetchContent_Populate(ext_ittapi) - add_subdirectory(${ext_ittapi_SOURCE_DIR} ${ext_ittapi_BINARY_DIR}) - endif() - - target_compile_definitions(ittnotify INTERFACE ENABLE_PROFILING_ITT) - if (UNIX) - target_compile_options(ittnotify PRIVATE -Wno-undef) - endif() - - openvino_developer_export_targets(COMPONENT openvino_common TARGETS ittnotify) - endif() -endif() - add_library(${TARGET_NAME} STATIC ${SOURCES}) add_library(openvino::itt ALIAS ${TARGET_NAME}) diff --git a/scripts/install_dependencies/install_GST_dependencies.sh b/scripts/install_dependencies/install_GST_dependencies.sh deleted file mode 100755 index 994bd155252892..00000000000000 --- a/scripts/install_dependencies/install_GST_dependencies.sh +++ /dev/null @@ -1,189 +0,0 @@ -#!/bin/bash - -# Copyright (c) 2020 Intel Corporation -# -# Licensed under the Apache License, Version 2.0 (the "License"); -# you may not use this file except in compliance with the License. -# You may obtain a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, software -# distributed under the License is distributed on an "AS IS" BASIS, -# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -# See the License for the specific language governing permissions and -# limitations under the License. - -set -e - -if [ $EUID -ne 0 ]; then - echo "ERROR: this script must be run as root to install 3rd party packages." >&2 - echo "Please try again with \"sudo -E $0\", or as root." >&2 - exit 1 -fi - -params=$@ - -yes_or_no() { - if [ "$params" == "-y" ]; then - return 0 - fi - - while true; do - read -p "Add third-party repositories and install GStreamer Plugins (y/n): " yn - case $yn in - [Yy]*) return 0 ;; - [Nn]*) return 1 ;; - esac - done -} - -echo -echo "This script installs the following GStreamer 3rd-party dependencies:" -echo " 1. build dependencies for GStreamer plugin bad" -echo " 2. build dependencies for GStreamer plugin ugly" -echo " 3. build dependencies for GStreamer plugin vaapi" -echo - -if [ -f /etc/lsb-release ]; then - # Ubuntu - PKGS=( - libbluetooth-dev - libusb-1.0.0-dev - libass-dev - libbs2b-dev - libchromaprint-dev - liblcms2-dev - libssh2-1-dev - libdc1394-22-dev - libdirectfb-dev - libssh-dev - libdca-dev - libfaac-dev - libfaad-dev - libfdk-aac-dev - flite1-dev - libfluidsynth-dev - libgme-dev - libgsm1-dev - nettle-dev - libkate-dev - liblrdf0-dev - libde265-dev - libmjpegtools-dev - libmms-dev - libmodplug-dev - libmpcdec-dev - libneon27-dev - libofa0-dev - libopenal-dev - libopenexr-dev - libopenjp2-7-dev - libopenmpt-dev - libopenni2-dev - libdvdnav-dev - librtmp-dev - librsvg2-dev - libsbc-dev - libsndfile1-dev - libsoundtouch-dev - libspandsp-dev - libsrtp2-dev - libzvbi-dev - libvo-aacenc-dev - libvo-amrwbenc-dev - libwebrtc-audio-processing-dev - libwebp-dev - libwildmidi-dev - libzbar-dev - libnice-dev - libx265-dev - libxkbcommon-dev - libx264-dev - libmpeg2-4-dev - libdvdread-dev - libcdio-dev - libopencore-amrnb-dev - libopencore-amrwb-dev - liba52-0.7.4-dev - libsidplay1-dev - libva-dev - libxrandr-dev - libudev-dev - python-gi-dev \ - python3-dev - ) - apt update - apt install -y ${PKGS[@]} -else - # CentOS - PKGS=( - bluez-libs-devel - libusb-devel - libass-devel - libbs2b-devel - libchromaprint-devel - lcms2-devel - libssh2-devel - libdc1394-devel - libXext-devel - libssh-devel - libdca-devel - faac-devel - faad2-devel - fdk-aac-devel - flite-devel - fluidsynth-devel - game-music-emu-devel - gsm-devel - nettle-devel - kate-devel - liblrdf-devel - libde265-devel - mjpegtools-devel - libmms-devel - libmodplug-devel - libmpcdec-devel - neon-devel - libofa-devel - openal-soft-devel - OpenEXR-devel - openjpeg2-devel - openni-devel - libdvdnav-devel - librtmp-devel - librsvg2-devel - sbc-devel - libsndfile-devel - soundtouch-devel - spandsp-devel - libsrtp-devel - zvbi-devel - vo-amrwbenc-devel - webrtc-audio-processing-devel - wildmidi-devel - zbar-devel - libnice-devel - x265-devel - libxkbcommon-devel - x264-devel - libmpeg2-devel - libcdio-devel - opencore-amr-devel - libva-devel - python36-gobject-devel - python3-devel - ) - if yes_or_no; then - rpm --import http://li.nux.ro/download/nux/RPM-GPG-KEY-nux.ro - yum install -y epel-release - rpm -Uvh http://li.nux.ro/download/nux/dextop/el7/x86_64/nux-dextop-release-0-5.el7.nux.noarch.rpm - yum install -y ${PKGS[@]} - else - echo - echo "Plugins cannot be installed without adding repositories:" - echo " PM-GPG-KEY-nux, epel-release, nux-dextop-release-0-5." - echo - fi - exit -fi diff --git a/scripts/install_dependencies/install_openvino_dependencies.sh b/scripts/install_dependencies/install_openvino_dependencies.sh index eb0bfb8fd19fbf..bb185ce8b5cad7 100755 --- a/scripts/install_dependencies/install_openvino_dependencies.sh +++ b/scripts/install_dependencies/install_openvino_dependencies.sh @@ -1,6 +1,6 @@ #!/bin/bash -# Copyright (c) 2018 - 2020 Intel Corporation +# Copyright (c) 2018 - 2021 Intel Corporation # # Licensed under the Apache License, Version 2.0 (the "License"); # you may not use this file except in compliance with the License. @@ -16,260 +16,356 @@ set -e -if [ $EUID -ne 0 ]; then - echo "ERROR: this script must be run as root to install 3rd party packages." >&2 - echo "Please try again with \"sudo -E $0\", or as root." >&2 - exit 1 -fi +#=================================================================================================== +# Option parsing -params=$@ +all_comp=(opencv_req opencv_opt python dev myriad dlstreamer installer pot cl_compiler) +os=${os:-auto} -yes_or_no_ffmpeg() { - if [ "$params" == "-y" ]; then - return 0 - fi +# public options +interactive=yes +dry= +extra= +print= +comp=() - while true; do - read -p "Add third-party RPM Fusion repository and install FFmpeg package (y/n): " yn - case $yn in - [Yy]*) return 0 ;; - [Nn]*) return 1 ;; - esac - done -} +# private options +keepcache= +selftest= -yes_or_no_gst_bad_ugly() { - if [ "$params" == "-y" ]; then - return 0 - fi +while :; do + case $1 in + -h|-\?|--help) + echo "Options:" + echo " -y non-interactive run (off)" + echo " -n dry-run, assume no (off)" + echo " -c= install component , can be repeated (${all_comp[*]})" + echo " -e add extra repositories (CentOS 7) (off)" + echo " -p print package list and exit (off)" + exit + ;; + -y) interactive= ;; + -n) dry=yes ;; + -c=?*) comp+=("${1#*=}") ;; + -e) extra=yes ;; + -p) print=yes ;; + --selftest) selftest=yes ;; + --keepcache) keepcache=yes ;; + *) break ;; + esac + shift +done - while true; do - read -p "Add third-party RPM Epel, Nux, Fusion, Forensics repositories and install dependencies for GStreamer Bad & Ugly Plugins (y/n): " yn - case $yn in - [Yy]*) return 0 ;; - [Nn]*) return 1 ;; - esac +# No components selected - install all +if [ ${#comp[@]} -eq 0 ]; then + comp=(${all_comp[@]}) +fi + +#=================================================================================================== +# Selftest + +if [ -n "$selftest" ] ; then + for image in centos:7 ubuntu:18.04 ubuntu:20.04 ; do + for opt in "-h" "-p" "-e -p" "-n" "-n -e" "-y" "-y -e" ; do + echo "||" + echo "|| Test $image / '$opt'" + echo "||" + SCRIPT_DIR="$( cd "$( dirname "${BASH_SOURCE[0]}" )" >/dev/null 2>&1 && pwd )" + docker run -it --rm \ + --volume ${SCRIPT_DIR}:/scripts:ro,Z \ + --volume yum-cache:/var/cache/yum \ + --volume apt-cache:/var/cache/apt/archives \ + -e DEBIAN_FRONTEND=noninteractive \ + $image \ + bash /scripts/${0##*/} $opt --keepcache + echo "||" + echo "|| Completed: $image / '$opt'" + echo "||" + done done -} - -if [ -f /etc/lsb-release ]; then - # Ubuntu - echo - echo "This script installs the following OpenVINO 3rd-party dependencies:" - echo " 1. GTK+, FFmpeg and GStreamer libraries used by OpenCV" - echo " 2. libusb library required for Myriad plugin for Inference Engine" - echo " 3. build dependencies for OpenVINO samples" - echo " 4. build dependencies for GStreamer Plugins" - echo - PKGS=( - cpio - build-essential - cmake - libusb-1.0-0-dev - libdrm-dev - libgstreamer1.0-0 + echo "Self test finished, to remove temporary docker volumes run: + 'docker volume rm yum-cache apt-cache'" + exit 0 +fi + +#=================================================================================================== +# OS detection + +if [ "$os" == "auto" ] ; then + os=$( . /etc/os-release ; echo "${ID}${VERSION_ID}" ) + case $os in + centos7|ubuntu18.04|ubuntu20.04) [ -z "$print" ] && echo "Detected OS: ${os}" ;; + *) echo "Unsupported OS: ${os:-detection failed}" >&2 ; exit 1 ;; + esac +fi + +#=================================================================================================== +# Collect packages + +extra_repos=() + +if [ "$os" == "ubuntu18.04" ] ; then + + pkgs_opencv_req=(libgtk-3-0 libgl1) + pkgs_python=(python3 python3-dev python3-venv python3-setuptools python3-pip) + pkgs_dev=(cmake g++ gcc libc6-dev make curl) + pkgs_myriad=(libusb-1.0-0) + pkgs_installer=(cpio) + pkgs_pot=() + pkgs_cl_compiler=(libtinfo5) + pkgs_opencv_opt=( + gstreamer1.0-plugins-bad gstreamer1.0-plugins-base gstreamer1.0-plugins-good + gstreamer1.0-plugins-ugly + gstreamer1.0-tools + libavcodec57 + libavformat57 + libavresample3 + libavutil55 + libgstreamer1.0-0 + libswscale4 + ) + pkgs_dlstreamer=( + ffmpeg + flex + gstreamer1.0-alsa gstreamer1.0-plugins-bad + gstreamer1.0-plugins-base + gstreamer1.0-plugins-good + gstreamer1.0-plugins-ugly gstreamer1.0-vaapi + gstreamer1.0-tools + libfaac0 + libfluidsynth1 + libgl-dev + libglib2.0 + libgstreamer1.0-0 + libnettle6 + libtag-extras1 + python3-gi + vainfo + ) + +elif [ "$os" == "ubuntu20.04" ] ; then + + pkgs_opencv_req=(libgtk-3-0 libgl1) + pkgs_python=(python3 python3-dev python3-venv python3-setuptools python3-pip) + pkgs_dev=(cmake g++ gcc libc6-dev make curl) + pkgs_myriad=(libusb-1.0-0) + pkgs_installer=(cpio) + pkgs_pot=(libblas-dev liblapack-dev gfortran) + pkgs_cl_compiler=(libtinfo5) + pkgs_opencv_opt=( + gstreamer1.0-plugins-bad + gstreamer1.0-plugins-base + gstreamer1.0-plugins-good + gstreamer1.0-plugins-ugly + gstreamer1.0-tools + libavcodec58 + libavformat58 + libavresample4 + libavutil56 + libgstreamer1.0-0 + libswscale5 + ) + pkgs_dlstreamer=( ffmpeg + flex + gstreamer1.0-alsa + gstreamer1.0-libav + gstreamer1.0-plugins-bad + gstreamer1.0-plugins-base + gstreamer1.0-plugins-good + gstreamer1.0-plugins-ugly + gstreamer1.0-vaapi + gstreamer1.0-tools + libfaac0 + libfluidsynth2 + libgl-dev + libglib2.0-0 + libgstreamer-plugins-base1.0-dev + libgstreamer1.0-0 + libgstrtspserver-1.0-dev + libnettle7 + libopenexr24 + libtag-extras1 + python3-gi + python3-gst-1.0 + vainfo ) - system_ver=$(cat /etc/lsb-release | grep -i "DISTRIB_RELEASE" | cut -d "=" -f2) - if [ "$system_ver" = "16.04" ]; then - PKGS+=( libgtk2.0-0 ) - else - if [ "$system_ver" = "20.04" ]; then - PKGS+=( gstreamer1.0-plugins-ugly - gstreamer1.0-libav - libgstreamer-plugins-base1.0-dev - gstreamer1.0-alsa - libgstrtspserver-1.0-dev - python3-gst-1.0 - libfluidsynth2 - libnettle7 - libopenexr24 - python3.8 - libpython3.8 - libglib2.0-0 - ) - elif [ "$system_ver" = "18.04" ]; then - PKGS+=( libfluidsynth1 - libnettle6 - libopenexr22 - gstreamer1.0-plugins-ugly - gstreamer1.0-alsa - libglib2.0 - ) - fi - PKGS+=( flex - libgl-dev - libtag-extras1 - libusb-1.0-0-dev - libfaac0 - python3-gi - libgtk-3-0 - ) - fi - apt update - # shellcheck disable=SC2068 - apt install -y ${PKGS[@]} -else - # CentOS - echo - echo "This script installs the following OpenVINO 3rd-party dependencies:" - echo " 1. GTK+ and GStreamer libraries used by OpenCV" - echo " 2. libusb library required for Myriad plugin for Inference Engine" - echo " 3. Python 3.6 for Model Optimizer" - echo " 4. gcc 4.8.5 and other build dependencies for OpenVINO samples" - echo " 5. build dependencies for GStreamer Plugins" - echo - PKGS=( - libusbx-devel - gtk2 + +elif [ "$os" == "centos7" ] ; then + + # find -name *.so -exec objdump -p {} \; | grep NEEDED | sort -u | cut -c 23- | xargs -t -n1 yum -q whatprovides + + pkgs_opencv_req=(gtk2) + pkgs_python=(python3 python3-devel python3-setuptools python3-pip) + pkgs_dev=(gcc gcc-c++ make glibc libstdc++ libgcc cmake curl) + pkgs_myriad=(libusbx) + pkgs_installer=() + pkgs_pot=() + pkgs_cl_compiler=() + pkgs_opencv_opt=( gstreamer1 - gstreamer1-plugins-good gstreamer1-plugins-bad-free - gcc - gcc-c++ - make - glibc-static + gstreamer1-plugins-good + gstreamer1-plugins-ugly-free + ) + pkgs_dlstreamer=( + OpenEXR-libs + alsa-lib + boost-regex + bzip2-libs + cairo + cdparanoia-libs + flac-libs + flite + gdk-pixbuf2 + glib2 glibc - libstdc++-static - libstdc++ - libstdc++ - libgcc - cmake - python36 - python36-pip - glib2-devel - flex gmp - gsl - libcap - libcap - gettext - libXrandr + gsm + gstreamer1 + gstreamer1-plugins-bad-free + gstreamer1-plugins-base + ilmbase libX11 - iso-codes - mesa-libEGL - mesa-libGLES - mesa-libGL - libgudev1 - libtheora - cdparanoia - pango - mesa-libgbm - alsa-lib + libXdamage + libXext + libXfixes + libXrandr + libXrender + libXv + libdrm + libdv + libgcc + libglvnd-glx libjpeg-turbo + libogg + libpng + librdkafka + librsvg2 + libsndfile + libsoup + libstdc++ + libtheora + libuuid + libv4l libvisual - libXv - opus libvorbis - patch - bzip2 - libv4l - flac - gdk-pixbuf2 - libdv - mpg123 - libraw1394 - libavc1394 - libiec61883 + libxml2 + mpg123-libs + neon + nettle + openjpeg2 + openssl-libs + opus + orc + pango pulseaudio-libs - libsoup + sbc + soundtouch speex wavpack - boost-regex-1.53.0 + xz-libs + zlib ) - yum install -y ${PKGS[@]} - # Thirdparty repositories for installing GStreamer Bad & Ugly Plugins dependencies. - if yes_or_no_gst_bad_ugly; then - GST_BAD_UGLY_PKGS=( - bluez-libs - libusb + if [ -n "$extra" ] ; then + # 1 RPMFusion + extra_repos+=(https://mirrors.rpmfusion.org/free/el/rpmfusion-free-release-7.noarch.rpm) + pkgs_opencv_opt+=(ffmpeg-libs) + pkgs_dlstreamer+=( + libde265 + libmms + librtmp + opencore-amr + vo-amrwbenc + ) + # 2 EPEL + extra_repos+=(https://dl.fedoraproject.org/pub/epel/epel-release-latest-7.noarch.rpm) + pkgs_dlstreamer+=( + fluidsynth-libs + game-music-emu libass libbs2b libchromaprint - lcms2 - libssh2 - libdc1394 - libXext - libssh - libdca - faac - fdk-aac - flite - fluidsynth - game-music-emu - gsm - nettle - kate - liblrdf - libde265 - mjpegtools - libmms libmodplug - libmpcdec - neon openal-soft - OpenEXR - openjpeg2 - openni - libdvdnav - librtmp - librsvg2 - sbc - libsndfile - soundtouch + paho-c spandsp - libsrtp - zvbi - vo-amrwbenc - webrtc-audio-processing - wildmidi zbar - libnice - libxkbcommon - opencore-amr - libva - python36-gobject - python3-devel + zvbi + ) + # 3 ForensicsTools + extra_repos+=(https://forensics.cert.org/cert-forensics-tools-release-el7.rpm) + pkgs_dlstreamer+=( + faac + fdk-aac ) - yum install -y epel-release - rpm -Uvh https://download1.rpmfusion.org/free/el/rpmfusion-free-release-7.noarch.rpm - RPMFUSION_IS_INSTALLED=1 - yum install -y https://forensics.cert.org/cert-forensics-tools-release-el7.rpm - yum install -y ${GST_BAD_UGLY_PKGS[@]} - else - echo "Dependencies for GStreamer Ugly & Bad plugins installation skipped." - echo fi - echo - echo "Intel(R) Distribution of OpenVINO(TM) toolkit can use FFmpeg for processing video streams with OpenCV. Please select your preferred method for installing FFmpeg:" - echo - echo "Option 1: Allow installer script to add a third party repository, RPM Fusion (https://rpmfusion.org/), which contains FFmpeg. FFmpeg rpm package will be installed from this repository. " - echo "WARNING: This repository is NOT PROVIDED OR SUPPORTED by Intel or CentOS. Neither Intel nor CentOS has control over this repository. Terms governing your use of FFmpeg can be found here: https://www.ffmpeg.org/legal.html " - echo "Once added, this repository will be enabled on your operating system and can thus receive updates to all packages installed from it. " - echo - echo "Consider the following ways to prevent unintended 'updates' from this third party repository from over-writing some core part of CentOS:" - echo "a) Only enable these archives from time to time, and generally leave them disabled. See: man yum" - echo "b) Use the exclude= and includepkgs= options on a per sub-archive basis, in the matching .conf file found in /etc/yum.repos.d/ See: man yum.conf" - echo "c) The yum Priorities plug-in can prevent a 3rd party repository from replacing base packages, or prevent base/updates from replacing a 3rd party package." - echo - echo "Option 2: Skip FFmpeg installation." - echo - - if yes_or_no_ffmpeg; then - if [[ -z $RPMFUSION_IS_INSTALLED ]]; then - yum install -y epel-release - rpm -Uvh https://download1.rpmfusion.org/free/el/rpmfusion-free-release-7.noarch.rpm - fi - yum install -y ffmpeg +else + echo "Internal script error: invalid OS after check (package selection)" >&2 + exit 3 +fi + +#=================================================================================================== +# Gather packages and print list + +pkgs=() +for comp in ${comp[@]} ; do + var=pkgs_${comp}[@] + pkgs+=(${!var}) +done + +if [ ${#pkgs[@]} -eq 0 ]; then + if [ -n "$print" ] ; then + echo "No packages to install" >&2 + exit 1 else - echo "FFmpeg installation skipped. You may build FFmpeg from sources as described here: https://trac.ffmpeg.org/wiki/CompilationGuide/Centos" - echo + echo "No packages to install" + exit 0 fi - exit fi + +if [ -n "$print" ] ; then + echo "${pkgs[*]}" + exit 0 +fi + +#=================================================================================================== +# Actual installation + +if [ $EUID -ne 0 ]; then + echo "ERROR: this script must be run as root to install 3rd party packages." >&2 + echo "Please try again with \"sudo -E $0\", or as root." >&2 + exit 1 +fi + +iopt= + +if [ "$os" == "ubuntu18.04" ] || [ "$os" == "ubuntu20.04" ] ; then + + [ -z "$interactive" ] && iopt="-y" + [ -n "$dry" ] && iopt="--dry-run" + [ -n "$keepcache" ] && rm -f /etc/apt/apt.conf.d/docker-clean + + apt-get update && apt-get install --no-install-recommends $iopt ${pkgs[@]} + +elif [ "$os" == "centos7" ] ; then + + [ -z "$interactive" ] && iopt="--assumeyes" + [ -n "$dry" ] && iopt="--downloadonly" + [ -n "$keepcache" ] && iopt="$iopt --setopt=keepcache=1" + [ ${#extra_repos[@]} -ne 0 ] && yum localinstall $iopt --nogpgcheck ${extra_repos[@]} + + yum install $iopt ${pkgs[@]} + +else + echo "Internal script error: invalid OS after check (package installation)" >&2 + exit 3 +fi + +exit 0 diff --git a/scripts/setupvars/setupvars.bat b/scripts/setupvars/setupvars.bat index 7503ee101c413e..e31aeb91505232 100644 --- a/scripts/setupvars/setupvars.bat +++ b/scripts/setupvars/setupvars.bat @@ -14,7 +14,6 @@ :: See the License for the specific language governing permissions and :: limitations under the License. -setlocal enableDelayedExpansion set ROOT=%~dp0 call :GetFullPath "%ROOT%\.." ROOT @@ -23,12 +22,13 @@ set SCRIPT_NAME=%~nx0 set "INTEL_OPENVINO_DIR=%ROOT%" set "INTEL_CVSDK_DIR=%INTEL_OPENVINO_DIR%" +set "python_version=" + :: command line arguments parsing :input_arguments_loop if not "%1"=="" ( if "%1"=="-pyver" ( - set python_version=%2 - echo python_version = !python_version! + set "python_version=%2" shift ) shift @@ -83,8 +83,9 @@ if errorlevel 1 ( ) :: Check Python version if user did not pass -pyver -if not defined python_version ( - for /F "tokens=* USEBACKQ" %%F IN (`python -c "import sys; print(str(sys.version_info[0])+'.'+str(sys.version_info[1]))"`) DO ( + +if "%python_version%" == "" ( + for /F "tokens=* USEBACKQ" %%F IN (`python -c "import sys; print(str(sys.version_info[0])+'.'+str(sys.version_info[1]))" 2^>^&1`) DO ( set python_version=%%F ) ) diff --git a/tests/conditional_compilation/conftest.py b/tests/conditional_compilation/conftest.py index 3a0ae137029f95..7439aa53e7a58a 100644 --- a/tests/conditional_compilation/conftest.py +++ b/tests/conditional_compilation/conftest.py @@ -7,7 +7,7 @@ """ Pytest configuration for compilation tests. Sample usage: -python3 -m pytest --artifacts ./compiled --models_root= \ +python3 -m pytest --artifacts ./compiled --test_conf= \ --sea_runtool=./IntelSEAPI/runtool/sea_runtool.py \ --benchmark_app=./bin/benchmark_app test_collect.py """ @@ -17,32 +17,30 @@ from pathlib import Path import pytest +import yaml # add ../lib to imports sys.path.insert( 0, str((Path(getsourcefile(lambda: 0)) / ".." / ".." / "lib").resolve(strict=True)) ) -# Using models from https://github.com/openvinotoolkit/testdata -# $find models -wholename "*.xml" -TESTS = [ - {"path": "models/mobilenet_v2_1.4_224/mobilenet_v2_1.4_224_i8.xml"}, - {"path": "models/mobilenet_v2_1.0_224/mobilenet_v2_1.0_224_i8.xml"}, - {"path": "models/inception_v3/inception_v3_i8.xml"}, - {"path": "models/resnet_v1_50/resnet_v1_50_i8.xml"}, - {"path": "models/test_model/test_model_fp16.xml"}, - {"path": "models/test_model/test_model_fp32.xml"}, -] +from path_utils import expand_env_vars # pylint: disable=import-error def pytest_addoption(parser): """ Define extra options for pytest options """ parser.addoption( - "--models_root", required=True, type=Path, help="Path to models root directory" + "--test_conf", + type=Path, + default=Path(__file__).parent / "test_config.yml", + help="Path to models root directory" ) parser.addoption( - "--sea_runtool", required=True, type=Path, help="Path to sea_runtool.py" + "--sea_runtool", + required=True, + type=Path, + help="Path to sea_runtool.py" ) parser.addoption( "--benchmark_app", @@ -50,6 +48,12 @@ def pytest_addoption(parser): type=Path, help="Path to the benchmark_app tool", ) + parser.addoption( + "--collector_dir", + required=True, + type=Path, + help="Path to a directory with a collector binary", + ) parser.addoption( "-A", "--artifacts", @@ -65,14 +69,17 @@ def pytest_generate_tests(metafunc): params = [] ids = [] - for test in TESTS: + with open(metafunc.config.getoption('test_conf'), "r") as file: + test_cases = yaml.safe_load(file) + + for test in test_cases: extra_args = {} - path = test["path"] + model_path = test["model"]["path"] if "marks" in test: extra_args["marks"] = test["marks"] - params.append(pytest.param(Path(path), **extra_args)) - ids = ids + [path] + params.append(pytest.param(Path(expand_env_vars(model_path)), **extra_args)) + ids = ids + [model_path] metafunc.parametrize("model", params, ids=ids) @@ -89,9 +96,9 @@ def benchmark_app(request): @pytest.fixture(scope="session") -def models_root(request): +def collector_dir(request): """Fixture function for command-line option.""" - return request.config.getoption("models_root") + return request.config.getoption("collector_dir") @pytest.fixture(scope="session") diff --git a/tests/conditional_compilation/test_collect.py b/tests/conditional_compilation/test_collect.py index 4088c2f7d220a8..1111f041022617 100644 --- a/tests/conditional_compilation/test_collect.py +++ b/tests/conditional_compilation/test_collect.py @@ -11,7 +11,7 @@ from proc_utils import cmd_exec # pylint: disable=import-error -def test_cc_collect(model, sea_runtool, benchmark_app, models_root, artifacts): +def test_cc_collect(model, sea_runtool, benchmark_app, collector_dir, artifacts): """ Test conditional compilation statistics collection """ out = artifacts / model.parent / model.stem @@ -26,10 +26,11 @@ def test_cc_collect(model, sea_runtool, benchmark_app, models_root, artifacts): str(sea_runtool), f"-o={out}", "-f=stat", + f"--bindir={collector_dir}", "!", str(benchmark_app), "-d=CPU", - f"-m={models_root / model}", + f"-m={model}", "-niter=1", "-nireq=1", ] diff --git a/tests/conditional_compilation/test_config.yml b/tests/conditional_compilation/test_config.yml new file mode 100644 index 00000000000000..ac03e092fe1c14 --- /dev/null +++ b/tests/conditional_compilation/test_config.yml @@ -0,0 +1,15 @@ +# Using models from https://github.com/openvinotoolkit/testdata +# $find models -wholename "*.xml" + +- model: + path: ${TESTDATA}/models/mobilenet_v2_1.4_224/mobilenet_v2_1.4_224_i8.xml +- model: + path: ${TESTDATA}/models/mobilenet_v2_1.0_224/mobilenet_v2_1.0_224_i8.xml +- model: + path: ${TESTDATA}/models/inception_v3/inception_v3_i8.xml +- model: + path: ${TESTDATA}/models/resnet_v1_50/resnet_v1_50_i8.xml +- model: + path: ${TESTDATA}/models/test_model/test_model_fp16.xml +- model: + path: ${TESTDATA}/models/test_model/test_model_fp32.xml diff --git a/tests/conditional_compilation/test_infer.py b/tests/conditional_compilation/test_infer.py index b7d66f72015233..85df816b94fc86 100644 --- a/tests/conditional_compilation/test_infer.py +++ b/tests/conditional_compilation/test_infer.py @@ -8,10 +8,10 @@ from proc_utils import cmd_exec # pylint: disable=import-error -def test_infer(model, models_root, benchmark_app): +def test_infer(model, benchmark_app): """ Test inference with conditional compiled binaries """ returncode, _ = cmd_exec( - [str(benchmark_app), "-d=CPU", f"-m={models_root / model}", "-niter=1", "-nireq=1"] + [str(benchmark_app), "-d=CPU", f"-m={model}", "-niter=1", "-nireq=1"] ) assert returncode == 0, f"Command exited with non-zero status {returncode}" diff --git a/tests/fuzz/fuzz-testhelper/main-testhelper.cc b/tests/fuzz/fuzz-testhelper/main-testhelper.cc index d6a0e94a04956d..4f32da064d9890 100644 --- a/tests/fuzz/fuzz-testhelper/main-testhelper.cc +++ b/tests/fuzz/fuzz-testhelper/main-testhelper.cc @@ -25,12 +25,12 @@ the inputs. #include #include #include -#ifdef WIN32 -#include +#ifdef _WIN32 +# include #else // WIN32 -#include -#include -#include +# include +# include +# include #endif // WIN32 /// Fuzzing target @@ -45,7 +45,7 @@ std::string basename(std::string const& path) { /// Get directory content std::vector list_dir(std::string const& path) { std::vector res; -#ifdef WIN32 +#ifdef _WIN32 WIN32_FIND_DATA find_data; HANDLE find_handle; find_handle = FindFirstFile((path + "\\*").c_str(), &find_data); @@ -73,7 +73,7 @@ std::vector list_dir(std::string const& path) { // Check if file by given path is a directory. bool is_dir(std::string const& path) { -#ifdef WIN32 +#ifdef _WIN32 return 0 != (FILE_ATTRIBUTE_DIRECTORY & GetFileAttributes(path.c_str())); #else // WIN32 struct stat stat_res = {0}; diff --git a/tests/lib/path_utils.py b/tests/lib/path_utils.py new file mode 100644 index 00000000000000..f65182761e20d0 --- /dev/null +++ b/tests/lib/path_utils.py @@ -0,0 +1,22 @@ +#!/usr/bin/env python3 +# Copyright (C) 2021 Intel Corporation +# SPDX-License-Identifier: Apache-2.0 + +""" Common utilities for working with processes. +""" + +import os + + +def expand_env_vars(obj): + """Expand environment variables in provided object.""" + + if isinstance(obj, list): + for i, value in enumerate(obj): + obj[i] = expand_env_vars(value) + elif isinstance(obj, dict): + for name, value in obj.items(): + obj[name] = expand_env_vars(value) + else: + obj = os.path.expandvars(obj) + return obj diff --git a/tests/time_tests/scripts/requirements.txt b/tests/time_tests/scripts/requirements.txt index 3101697065ff1c..a9d757b4faffac 100644 --- a/tests/time_tests/scripts/requirements.txt +++ b/tests/time_tests/scripts/requirements.txt @@ -1 +1 @@ -PyYAML==5.3.1 \ No newline at end of file +PyYAML==5.4.1 \ No newline at end of file diff --git a/tests/time_tests/test_runner/requirements.txt b/tests/time_tests/test_runner/requirements.txt index 583031358bf840..bdb557e84976b0 100644 --- a/tests/time_tests/test_runner/requirements.txt +++ b/tests/time_tests/test_runner/requirements.txt @@ -1,5 +1,5 @@ pytest==4.0.1 attrs==19.1.0 # required for pytest==4.0.1 to resolve compatibility issues -PyYAML==5.3.1 +PyYAML==5.4.1 jsonschema==3.2.0 distro==1.5.0 \ No newline at end of file diff --git a/thirdparty/CMakeLists.txt b/thirdparty/CMakeLists.txt new file mode 100644 index 00000000000000..efaa7c3ecdd8e0 --- /dev/null +++ b/thirdparty/CMakeLists.txt @@ -0,0 +1,18 @@ +# ****************************************************************************** +# Copyright 2017-2021 Intel Corporation +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +# ****************************************************************************** + +add_subdirectory(ittapi) +add_subdirectory(itt_collector) diff --git a/thirdparty/itt_collector/CMakeLists.txt b/thirdparty/itt_collector/CMakeLists.txt new file mode 100644 index 00000000000000..57d6c55c39ddd4 --- /dev/null +++ b/thirdparty/itt_collector/CMakeLists.txt @@ -0,0 +1,19 @@ +# ****************************************************************************** +# Copyright 2017-2020 Intel Corporation +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +# ****************************************************************************** + +if(ENABLE_PROFILING_ITT AND SELECTIVE_BUILD STREQUAL "COLLECT") + add_subdirectory(sea_itt_lib) +endif() diff --git a/thirdparty/itt_collector/runtool/collectors/ftrace.py b/thirdparty/itt_collector/runtool/collectors/ftrace.py new file mode 100644 index 00000000000000..2923ffa3345708 --- /dev/null +++ b/thirdparty/itt_collector/runtool/collectors/ftrace.py @@ -0,0 +1,226 @@ +# Intel® Single Event API +# +# This file is provided under the BSD 3-Clause license. +# Copyright (c) 2021, Intel Corporation +# All rights reserved. +# +# Redistribution and use in source and binary forms, with or without modification, are permitted provided that the following conditions are met: +# Redistributions of source code must retain the above copyright notice, this list of conditions and the following disclaimer. +# Redistributions in binary form must reproduce the above copyright notice, this list of conditions and the following disclaimer in the documentation and/or other materials provided with the distribution. +# Neither the name of the Intel Corporation nor the names of its contributors may be used to endorse or promote products derived from this software without specific prior written permission. +# +# THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. +# IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) +# HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. +# +import os +import sys +import glob +import shutil +import traceback +import subprocess + +# http://www.brendangregg.com/perf.html +# sudo perf probe --funcs + +sys.path.append(os.path.realpath(os.path.join(os.path.dirname(__file__), '..'))) +import sea + +from sea_runtool import Collector, Progress, format_bytes + + +def time_sync(): + sea.ITT('lin').time_sync() + +supported_events = [ + "binder_locked", + "binder_unlock", + "binder_lock", + "binder_transaction", + "binder_transaction_received", + "memory_bus_usage", + "clock_set_rate", + "cpufreq_interactive_up", + "cpufreq_interactive_down", + "cpufreq_interactive_already", + "cpufreq_interactive_notyet", + "cpufreq_interactive_setspeed", + "cpufreq_interactive_target", + "cpufreq_interactive_boost", + "cpufreq_interactive_unboost", + "f2fs_write_begin", + "f2fs_write_end", + "f2fs_sync_file_enter", + "f2fs_sync_file_exit", + "ext4_sync_file_enter", + "ext4_sync_file_exit", + "ext4_da_write_begin", + "ext4_da_write_end", + "block_rq_issue", + "block_rq_complete", + "drm_vblank_event", + "exynos_busfreq_target_int", + "exynos_busfreq_target_mif", + "exynos_page_flip_state", + "i915_gem_object_create", + "i915_gem_object_bind", + "i915_gem_object_unbind", + "i915_gem_object_change_domain", + "i915_gem_object_pread", + "i915_gem_object_pwrite", + "i915_gem_object_fault", + "i915_gem_object_clflush", + "i915_gem_object_destroy", + "i915_gem_ring_dispatch", + "i915_gem_ring_flush", + "i915_gem_request", + "i915_gem_request_add", + "i915_gem_request_complete", + "i915_gem_request_retire", + "i915_gem_request_wait_begin", + "i915_gem_request_wait_end", + "i915_gem_ring_wait_begin", + "i915_gem_ring_wait_end", + "i915_mvp_read_req", + "i915_reg_rw", + "i915_flip_request", + "i915_flip_complete", + "intel_gpu_freq_change", + "irq_handler_entry", + "irq_handler_exit", + "softirq_raise", + "softirq_entry", + "softirq_exit", + "ipi_entry", + "ipi_exit", + "graph_ent", + "graph_ret", + "mali_dvfs_event", + "mali_dvfs_set_clock", + "mali_dvfs_set_voltage", + "tracing_mark_write:mali_driver", + "mm_vmscan_kswapd_wake", + "mm_vmscan_kswapd_sleep", + "mm_vmscan_direct_reclaim_begin", + "mm_vmscan_direct_reclaim_end", + "workqueue_execute_start", + "workqueue_execute_end", + "power_start", + "power_frequency", + "cpu_frequency", + "cpu_idle", + "regulator_enable", + "regulator_enable_delay", + "regulator_enable_complete", + "regulator_disable", + "regulator_disable_complete", + "regulator_set_voltage", + "regulator_set_voltage_complete", + "sched_switch", + "sched_wakeup", + "workqueue_execute_start", + "workqueue_execute_end", + "workqueue_queue_work", + "workqueue_activate_work", +] + + +class FTrace(Collector): + def __init__(self, args, remote=False): + Collector.__init__(self, args) + self.remote = remote + self.event_list = [] + self.file = None + self.perf_file = None + self.perf_proc = None + for event in supported_events: + for path in glob.glob('/sys/kernel/debug/tracing/events/*/%s/enable' % event): + self.event_list.append(path) + + def echo(self, what, where): + self.log("echo %s > %s" % (what, where)) + try: + if self.remote: + self.remote.execute('echo %s > %s' % (what, where)) + else: + with open(where, "w") as file: + file.write(what) + except: + self.log("Failed: " + traceback.format_exc()) + return False + return True + + def start(self): + if not self.echo("nop", "/sys/kernel/debug/tracing/current_tracer"): + self.log("Warning: failed to access ftrace subsystem") + return + self.file = os.path.join(self.args.output, 'nop-%s.ftrace' % (self.args.cuts[0] if self.args.cuts else '0')) + + self.echo("0", "/sys/kernel/debug/tracing/tracing_on") + self.echo("nop", "/sys/kernel/debug/tracing/current_tracer") # google chrome understands this format + self.echo("", "/sys/kernel/debug/tracing/set_event") # disabling all events + self.echo("", "/sys/kernel/debug/tracing/trace") # cleansing ring buffer (we need it's header only) + if self.args.ring: + self.echo("%d" % (self.args.ring * 1024), "/sys/kernel/debug/tracing/buffer_size_kb") + + # best is to write sync markers here + self.echo("1", "/sys/kernel/debug/tracing/tracing_on") # activate tracing + time_sync() + self.echo("0", "/sys/kernel/debug/tracing/tracing_on") # deactivate tracing + # saving first part of synchronization as it will be wiped out in ring + self.copy_from_target("/sys/kernel/debug/tracing/trace", self.file) + self.echo("", "/sys/kernel/debug/tracing/trace") # cleansing ring buffer again + + for event in self.event_list: # enabling only supported + self.echo("1", event) + + for path in glob.glob('/sys/kernel/debug/dri/*/i915_mvp_enable'): # special case for Intel GPU events + self.echo("1", path) + self.echo("1", "/sys/kernel/debug/tracing/tracing_on") + if self.args.stacks and self.args.target: + self.perf_file = os.path.join(self.args.output, 'perf-%s.data' % (self.args.cuts[0] if self.args.cuts else '0')) + if os.path.exists(self.perf_file): + os.remove(self.perf_file) + cmd = 'perf record -a -g -o "%s" --pid=%s' % (self.perf_file, self.args.target) + self.log(cmd) + self.perf_proc = subprocess.Popen(cmd, shell=True, stdout=self.get_output(), stderr=self.get_output(), preexec_fn=os.setpgrp) + + def copy_from_target(self, what, where): + self.log("copy %s > %s" % (what, where)) + if self.remote: + self.remote.copy('%s:%s' % (self.args.ssh, what), where) + else: + shutil.copy(what, where) + + def stop(self, wait=True): + results = [] + if self.perf_proc: + self.perf_proc.wait() + if os.path.exists(self.perf_file): + results.append(self.perf_file + '.perf') + with open(results[-1], 'wb') as file: + self.execute('perf script -F comm,tid,pid,time,ip,sym,dso,symoff --show-kernel-path --demangle-kernel --full-source-path -i "%s"' % self.perf_file, stdout=file) + os.remove(self.perf_file) + + if not self.file: + return results + time_sync() + self.echo("0", "/sys/kernel/debug/tracing/tracing_on") + for path in glob.glob('/sys/kernel/debug/dri/*/i915_mvp_enable'): # special case for Intel GPU events + self.echo("0", path) + file_name = os.path.join(self.args.output, "tmp.ftrace") + self.copy_from_target("/sys/kernel/debug/tracing/trace", file_name) + self.log("append %s > %s" % (file_name, self.file)) + with open(file_name) as file_from, open(self.file, 'a') as file_to: + shutil.copyfileobj(file_from, file_to) + os.remove(file_name) + results.append(self.file) + self.execute('chmod -R a+rwX "%s"' % self.args.output) + return results + + +COLLECTOR_DESCRIPTORS = [{ + 'format': 'ftrace', + 'available': True, + 'collector': FTrace +}] diff --git a/thirdparty/itt_collector/runtool/collectors/osx.py b/thirdparty/itt_collector/runtool/collectors/osx.py new file mode 100644 index 00000000000000..d6d1f77b7165eb --- /dev/null +++ b/thirdparty/itt_collector/runtool/collectors/osx.py @@ -0,0 +1,827 @@ +# Intel® Single Event API +# +# This file is provided under the BSD 3-Clause license. +# Copyright (c) 2021, Intel Corporation +# All rights reserved. +# +# Redistribution and use in source and binary forms, with or without modification, are permitted provided that the following conditions are met: +# Redistributions of source code must retain the above copyright notice, this list of conditions and the following disclaimer. +# Redistributions in binary form must reproduce the above copyright notice, this list of conditions and the following disclaimer in the documentation and/or other materials provided with the distribution. +# Neither the name of the Intel Corporation nor the names of its contributors may be used to endorse or promote products derived from this software without specific prior written permission. +# +# THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. +# IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) +# HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. +# +from __future__ import print_function +import re +import os +import sys +import time +from datetime import datetime, timedelta +import shutil +import subprocess +import threading +sys.path.append(os.path.realpath(os.path.join(os.path.dirname(__file__), '..'))) +from sea_runtool import Collector, is_domain_enabled, message, get_original_env + +""" + sudo dtrace -l | perl -pe 's/^.*?\S+\s+(\S+?)([0-9]|\s).*/\1/' | sort | uniq > /tmp/dtrace_providers.txt + sudo dtrace -l > /tmp/dtrace_list.txt + dtrace -n 'fbt:::entry { @[probefunc] = count(); }' -c 'ping host' + http://www.brendangregg.com/DTrace/DTrace-cheatsheet.pdf + + https://docs.oracle.com/cd/E19253-01/817-6223/chp-variables-5/index.html + TODO: printf("%s: called from %a\n", probefunc, caller); + objc_runtime$target::: { ustack(); } /*objc_exception_throw*/ + pid$target::objc_msgSend:entry + sudo dtrace -qn 'fbt::*vent13k*:entry/arg3/{printf("%d\n",arg2)}' # keylogger +""" + +DSCRIPT_HEADER = r""" +#pragma D option nolibs +#define GREEDY_ON ++self->greedy_enabled +#define GREEDY_OFF self->greedy_enabled = (self->greedy_enabled > 0) ? (self->greedy_enabled - 1) : self->greedy_enabled + +BEGIN +{ + self->greedy_enabled = 0; +} + + +""" + +dtrace_context_switch = r""" + +/* +off-cpu + +Probe that fires when the current CPU is about to end execution of a thread. +The curcpu variable indicates the current CPU. +The curlwpsinfo variable indicates the thread that is ending execution. +The curpsinfo variable describes the process containing the current thread. +The lwpsinfo_t structure of the thread that the current CPU will next execute is pointed to by args[0]. +The psinfo_t of the process containing the next thread is pointed to by args[1]. +*/ + +sched:::off-cpu +{ + printf( + "%x\toff\t%x\t%x\t%x\t%s\t%x\t%x\t%s\n", machtimestamp, curcpu->cpu_id, + curlwpsinfo->pr_lwpid, curlwpsinfo->pr_pri, curpsinfo->pr_fname, + args[0]->pr_lwpid, args[0]->pr_pri, args[1]->pr_fname + ); +} +""" + +OFF_CPU_STACKS = r""" +sched:::off-cpu +/pid == $target/ +{ + printf("%x\tkstack\t%x\t%x:", machtimestamp, pid, tid); + stack(); + printf("\n%x\tustack\t%x\t%x:", machtimestamp, pid, tid); + ustack(); + /* + printf("\n%x\tjstack\t%x\t%x:", machtimestamp, pid, tid); + jstack(); //TODO: enable better support for jstack-s + */ + printf("\n"); +} +""" + +dtrace_wakeup = r""" +sched:::wakeup +/curpsinfo->pr_pid == $target || args[1]->pr_pid == $target/ +{ + printf("%x\twkp\t%x\t%x\t%s\t%x\t%s\t%x\t%x\t%x\t%x\n", machtimestamp, + curpsinfo->pr_pid, curlwpsinfo->pr_lwpid, + execname, cpu, + stringof(args[1]->pr_fname), + args[1]->pr_pid, args[0]->pr_lwpid, + args[0]->pr_stype, args[0]->pr_wchan + ); +} + +""" + + +osxaskpass = r"""#!/bin/bash +osascript -e 'Tell application "System Events" to display dialog "Password:" default answer "" with hidden answer with title "DTrace requires root priveledges"' -e 'text returned of result' 2>/dev/null +""" + + +pid_dtrace_hooks = [r""" +pid$target::*dtSEAHookScope*:entry /*{UMD_STACKS}*/ +{ + printf( + "%x\te\t%x\t%x\t%s\t%s\n", machtimestamp, pid, tid, copyinstr(arg1), copyinstr(arg2) + ); +} +""", r""" +pid$target::*dtSEAHookEndScope*:entry +{ + printf( + "%x\tr\t%x\t%x\t%s\t%s\n", machtimestamp, pid, tid, copyinstr(arg0), copyinstr(arg1) + ); +} +""", r""" +/* +pid$target::*dtSEAHookArgStr*:entry +{ + printf( + "%x\targ\t%x\t%x\t%s\t%s\n", + machtimestamp, pid, tid, copyinstr(arg0), copyinstr(arg1) + ); +} + +pid$target::*dtSEAHookArgInt*:entry +{ + printf( + "%x\targ\t%x\t%x\t%s\t%d\n", + machtimestamp, pid, tid, copyinstr(arg0), arg1 + ); +} +*/ + +""" + +] + +pid_dtrace_hooks += [ # XXX +r""" +objc$target:::entry +/*{CONDITIONS}*/ +{ + printf( + "%x\te\t%x\t%x\tobjc\t%s%s\n", machtimestamp, pid, tid, probemod, probefunc + ); + /*{ARGUMENTS}*/ +} +""", r""" +objc$target:::return +/*{CONDITIONS}*/ +{ + printf( + "%x\tr\t%x\t%x\tobjc\t%s%s\n", machtimestamp, pid, tid, probemod, probefunc + ); +} +""" +] if 0 else [] + + +FOLLOW_CHILD = r""" +//https://www.synack.com/2015/11/17/monitoring-process-creation-via-the-kernel-part-i/ +proc:::exec-success /$target == curpsinfo->pr_ppid/{ + printf("%x\tfollowchild\t%x\t%x\t%s\t%x\t%x\t%s\n", machtimestamp, pid, tid, probename, curpsinfo->pr_ppid, curpsinfo->pr_pid, curpsinfo->pr_psargs); + system("printf \"%d\n\" >> /*{FOLLOW_CHILD}*/", curpsinfo->pr_pid); +} + +proc:::exec /$target == curpsinfo->pr_ppid/{ + printf("%x\tfollowchild\t%x\t%x\t%s\t%x\t%x\t%s\n", machtimestamp, pid, tid, probename, curpsinfo->pr_ppid, curpsinfo->pr_pid, curpsinfo->pr_psargs); + system("printf \"%d\n\" >> /*{FOLLOW_CHILD}*/", curpsinfo->pr_pid); +} + +syscall::exec*:return /$target == curpsinfo->pr_ppid/ +{ + printf( + "%x\tfollowchild\t%x\t%x\t%s\t%s\t%s\n", machtimestamp, pid, tid, probemod, probefunc, probename + ); +} + +syscall::fork:return /$target == curpsinfo->pr_ppid/ +{ + printf( + "%x\tfollowchild\t%x\t%x\t%s\t%s\t%s\n", machtimestamp, pid, tid, probemod, probefunc, probename + ); + system("printf \"%d\n\" >> /*{FOLLOW_CHILD}*/", curpsinfo->pr_pid); +} + +""" + +BRACKET_FUNC = r""" +pid$target:/*{M:F}*/:entry /*{UMD_STACKS}*/ +/*{CONDITIONS}*/ +{ + printf( + "%x\te\t%x\t%x\t%s\t%s\n", machtimestamp, pid, tid, probemod, probefunc + ); + /*{ARGUMENTS}*/ + GREEDY_ON; +} +""", r""" +pid$target:/*{M:F}*/:return +/*{CONDITIONS}*/ +{ + GREEDY_OFF; + + printf( + "%x\tr\t%x\t%x\t%s\t%s\n", machtimestamp, pid, tid, probemod, probefunc + ); +} +""" + + +def bracket_hook(module='', function=''): + res = [] + for item in BRACKET_FUNC: + res.append(item.replace('/*{M:F}*/', '%s:%s' % (module, function))) + return res + + +for mask in ['JavaScriptCore', '*GL*:*gl*', '*GL*:*GL*', 'Metal', '*MTLDriver', '*GLDriver']: # ,'mdtest', 'libigdmd.dylib' + pid_dtrace_hooks += bracket_hook(*mask.split(':')) + + +# TODO: add opencl_api & opencl_cpu providers + +IO_HOOKS = r""" + +fsinfo:::open,fsinfo:::close +{ + printf( + "%x\tio\t%x\t%x\t%s\t%s\n", machtimestamp, pid, tid, probename, stringof(args[0]->fi_pathname) + ); +} + +""" + + +OPEN_CL = r""" +opencl_api$target:::, opencl_cpu$target::: +{ + printf( + "%x\tocl\t%x\t%x\t%s\t%s\t%s\n", machtimestamp, pid, tid, probemod, probefunc, probename + ); +} +""" + +# FIXME: extract Interrupt handling and do conditional +fbt_dtrace_hooks = [r""" + +//void kernel_debug_enter(uint32_t coreid, uint32_t debugid, uint64_t timestamp, uintptr_t arg1, uintptr_t arg2, uintptr_t arg3, uintptr_t arg4, uintptr_t threadid); +fbt::kernel_debug_enter:entry +{ + printf( + "%x\tkd\t%x\t%x\t%s\t%x\t%x\t%x\t%x\t%x\t%x\t%x\t%x\n", machtimestamp, pid, tid, probefunc, arg0, arg1, arg2, arg3, arg4, arg5, arg6, arg7 + ); +} + +""", r""" + +fbt::*debugid*enabled*:entry +{ + printf( + "%x\tkd\t%x\t%x\t%s\t%x\t%x\t%x\t%x\t%x\t%x\n", machtimestamp, pid, tid, probefunc, arg0, arg1, arg2, arg3, arg4, arg5 + ); +} + + +""" if not "DISABLED XXX" else '', r""" +fbt::*dtSEAHookScope*:entry /*{KMD_STACKS}*/ +{ + printf( + "%x\te\t%x\t%x\t%s_%s\t%s\t%d\n", + machtimestamp, pid, tid, stringof(probemod), stringof(arg1), stringof(arg2), arg0 + ); +} +""", r""" +fbt::*dtSEAHookEndScope*:entry +{ + printf( + "%x\tr\t%x\t%x\t%s_%s\t%s\n", + machtimestamp, pid, tid, stringof(probemod), stringof(arg0), stringof(arg1) + ); +} + +fbt::*dtSEAHookArgStr*:entry +{ + printf( + "%x\targ\t%x\t%x\t%s\t%s\n", + machtimestamp, pid, tid, stringof(arg0), stringof(arg1) + ); +} + +fbt::*dtSEAHookArgInt*:entry +{ + printf( + "%x\targ\t%x\t%x\t%s\t%d\n", + machtimestamp, pid, tid, stringof(arg0), arg1 + ); +} + +fbt::*dtSEAHookArgBlobStart*:entry +{ + printf( + "%x\tbs\t%x\t%x\t%x\t%s\t%d\n", + machtimestamp, pid, tid, arg1, stringof(arg1), arg0 + ); + trace(stringof(arg0)); + tracemem(arg0, 10); +} + +fbt::*dtSEAHookArgBlob1024*:entry +{ + printf( + "%x\tblb\t%x\t%x\n", + machtimestamp, pid, tid + ); + tracemem(arg0, 1024); +} + +fbt::*dtSEAHookArgBlobEnd*:entry +{ + printf( + "%x\tbe\t%x\t%x\n", + machtimestamp, pid, tid + ); +} + +""" if not "DISABLED XXX" else '', r""" + +/* + Interrupt handling. + The list of interrupts was obtained by running 'dtrace -l | grep handleInterrupt' +*/ +fbt:com.apple.driver.AppleAPIC:_ZN28AppleAPICInterruptController15handleInterruptEPvP9IOServicei:entry, +fbt:com.apple.iokit.IOPCIFamily:_ZN8AppleVTD15handleInterruptEP22IOInterruptEventSourcei:entry, +fbt:com.apple.iokit.IOPCIFamily:_ZN32IOPCIMessagedInterruptController15handleInterruptEPvP9IOServicei:entry, +fbt:com.apple.driver.AppleSMBusController:_ZN23AppleSMBusControllerMCP15handleInterruptEP22IOInterruptEventSourcei:entry, +fbt:com.apple.driver.AppleThunderboltNHI:_ZN19AppleThunderboltNHI15handleInterruptEv:entry +{ + printf("%x\tie\t%x\t%x\t%s\t%x\t%s\t%s\n", machtimestamp, + curpsinfo->pr_pid, curlwpsinfo->pr_lwpid, + execname, cpu, probemod, probefunc + ); +} + +fbt:com.apple.driver.AppleAPIC:_ZN28AppleAPICInterruptController15handleInterruptEPvP9IOServicei:return, +fbt:com.apple.iokit.IOPCIFamily:_ZN8AppleVTD15handleInterruptEP22IOInterruptEventSourcei:return, +fbt:com.apple.iokit.IOPCIFamily:_ZN32IOPCIMessagedInterruptController15handleInterruptEPvP9IOServicei:return, +fbt:com.apple.driver.AppleSMBusController:_ZN23AppleSMBusControllerMCP15handleInterruptEP22IOInterruptEventSourcei:return, +fbt:com.apple.driver.AppleThunderboltNHI:_ZN19AppleThunderboltNHI15handleInterruptEv:return +{ + printf("%x\tir\t%x\t%x\t%s\t%x\t%s\t%s\n", machtimestamp, + curpsinfo->pr_pid, curlwpsinfo->pr_lwpid, + execname, cpu, probemod, probefunc + ); +} + +"""] + + +STACKS = { + +'UMD': r""" +{ + printf("%x\tustack\t%x\t%x:", machtimestamp, pid, tid); + ustack(); + printf("\n"); +} +""", + +'KMD': r""" +{ + printf("%x\tkstack\t%x\t%x:", machtimestamp, pid, tid); + stack(); + printf("\n"); +} +""" +} + + +def mach_absolute_time(static={}): + if not static: + import ctypes + libc = ctypes.CDLL('libc.dylib', use_errno=True) + static['mach_absolute_time'] = libc.mach_absolute_time + static['mach_absolute_time'].restype = ctypes.c_uint64 + return static['mach_absolute_time']() + + +class FifoReader(threading.Thread): + def __init__(self, collector, path): + threading.Thread.__init__(self) + self.collector = collector + self.pipe = path + if os.path.exists(self.pipe): + os.remove(self.pipe) + os.mkfifo(self.pipe) + self.file = os.open(self.pipe, os.O_RDWR) + + def run(self): + print('Started reading', self.pipe) + while self.file: + chunks = os.read(self.file, 1024).strip() + for chunk in chunks.split('\n'): + if chunk != 'close': + self.collector.attach(int(chunk)) + print('Read:', chunk) + print('Stopped reading', self.pipe) + + def stop(self): + os.write(self.file, 'close\n') + os.close(self.file) + self.file = None + os.unlink(self.pipe) + + +class DTraceCollector(Collector): + class Subcollector: + @staticmethod + def get_hooks(args): + return None + + @staticmethod + def collect(collector, on): + pass + + def __init__(self, args): + Collector.__init__(self, args) + + self.processes = {} + + self.files = [] + self.subcollectors = set() + + self.attached = set() + + if 'SUDO_ASKPASS' not in os.environ: + get_original_env()['SUDO_ASKPASS'] = self.create_ask_pass() + assert 'DYLD_INSERT_LIBRARIES' not in os.environ + + self.sudo_execute('pkill dtrace') + self.script = None + self.prepare() + self.times = [] + self.attach_by_pid = True + + @staticmethod + def create_ask_pass(): + path = '/tmp/osxaskpass.sh' + if os.path.exists(path): + return path + with open(path, 'w') as file: + file.write(osxaskpass) + os.chmod(path, 0o700) + return path + + @staticmethod + def gen_options(options): + return '\n'.join('#pragma D option %s=%s' % (key, str(value)) for key, value in options) + '\n' + + def prepare(self): + self.files = [os.path.join(self.args.output, 'data-%s.dtrace' % (self.args.cuts[0] if self.args.cuts else '0'))] + assert not os.path.exists(self.files[0]) # TODO: remove if not asserts, or return back: was if ... os.remove(self.files[0]) + + dtrace_script = [DSCRIPT_HEADER] + options = [ + ('bufresize', 'auto'), + ] + if self.args.ring: # https://docs.oracle.com/cd/E19253-01/817-6223/chp-buf/index.html + options += [ + ('bufpolicy', 'ring'), + ('bufsize', '64m') # 64 is maximum, system crashes on any bigger, even 65m + ] + else: + options += [ + ('switchrate', '10hz'), # print at 10Hz (instead of 1Hz) - brendangregg + ('bufsize', '4g') + ] + + dtrace_script.append(self.gen_options(options)) + + if is_domain_enabled('context_switches'): + dtrace_script.append(dtrace_context_switch) + + if is_domain_enabled('fbt_hooks'): + dtrace_script += fbt_dtrace_hooks + + for subcollector in self.subcollectors: + hooks = subcollector.get_hooks(self.args) # support multi pid for subcollectors + if hooks: + dtrace_script += hooks + subcollector.collect(self, True) + + self.script = dtrace_script + + def sudo_execute(self, cmd): + return self.execute('sudo -E -A ' + cmd) + + def prepare_per_pid(self): + dtrace_script = [] + if is_domain_enabled('pid_hooks'): + dtrace_script += pid_dtrace_hooks # TODO: add %app_name% hotspots unconditionally + + if is_domain_enabled('instrument_target'): + if self.args.victim: + mod_name = os.path.basename(self.args.victim) + elif self.args.target: + (out, err) = DTraceCollector.execute('ps -p %d -o args' % self.args.target, log=False) + if not err: + lines = out.strip().split('\n') + if len(lines) > 1: + executable = lines[1].split()[0] + mod_name = executable.split('/')[-1] + + print('Auto-instrumented module:', mod_name) + dtrace_script += bracket_hook(mod_name.replace(' ', '*')) + + if is_domain_enabled('opencl'): + dtrace_script.append(OPEN_CL) + + return dtrace_script + + def patch_per_pid(self, pids, items): + result = [] + for item in items: + if '$target:' in item: + for pid in pids: + result.append(item.replace('$target:', '%s:' % str(pid))) + else: + result.append(item) + + return result + + def get_cmd(self, out, script, pids=[]): + # -C Run the C preprocessor + # -Z Permit probe descriptions that match zero probes + # -w Permit destructive actions in D programs + cmd = 'sudo -E -A dtrace -C -Z -w -o "%s" -s "%s"' % (out, script) # FIXME: sudo_execute + if self.args.verbose != 'info': + cmd += ' -q' # Set quiet mode + else: + # -S Show D compiler intermediate code + # -v Print an interface stability report + # -V Report the highest D programming interface version + # -e Exit after compiling + # -l List probes instead of enabling them + cmd += ' ' + + for pid in pids: + cmd += " -p %s" % pid + return cmd + + def launch_victim(self, victim, env): + proc = self.run_dtrace(victim=victim, env=env) + if not proc: + return None + + class PopenWrapper: + def __init__(self, parent, victim): + self.parent = parent + cmd = 'pgrep -n "%s"' % os.path.basename(victim[0]) + while True: + data, err = parent.execute(cmd) + if data: + self.pid = int(data) + break + time.sleep(1) + + def send_signal(self, sig): + self.parent.sudo_execute('kill -%d %d' % (sig, self.pid)) + + def wait(self, sec=10): + proc['proc'].wait() + """ XXX + for x in range(0, sec): + proc['proc'].poll() + time.sleep(1) + """ + return proc['proc'].returncode + + def communicate(self): + self.wait() + return None, None + + return PopenWrapper(self, victim) + + def run_dtrace(self, attach_by_name=False, victim=None, env=None): + self.attach_by_pid = False + # spawn dtrace tracers and exit, all means to stop it must be saved to self members: + # launch command line with dtrace script and remember pid + script = os.path.join(self.args.output, 'script.d') + cmd = self.get_cmd(self.files[0], script) + dtrace_script = self.script[:] + + hooks = [] + dtrace_script += hooks + # The target is known only when start is called, so doing part of preparation here + if attach_by_name: + dtrace_script += self.prepare_per_pid() + cmd += " -W %s" % os.path.basename(self.args.victim) + elif victim: + dtrace_script += self.prepare_per_pid() + cmd += ' -c "%s"' % ' '.join(victim) + else: + assert not any('$target' in item for item in dtrace_script) + pids = self.args.target if isinstance(self.args.target, list) else [self.args.target] + for pid in pids: + cmd += " -p %s" % pid + print("Attaching PIDs:", pids) + items = self.prepare_per_pid() + dtrace_script += self.patch_per_pid(pids, items) + for pid in pids: + if self.args.stacks: + dtrace_script.append(OFF_CPU_STACKS.replace('$target', str(pid))) + if is_domain_enabled('wakeups'): + dtrace_script.append(dtrace_wakeup.replace('$target', str(pid))) + for hook in hooks: + dtrace_script.append(hook.replace('/*{CONDITIONS}*/', '/pid == %s/' % str(pid))) + + if self.args.stacks: + dtrace_script = self.patch_stacks(pids, dtrace_script) + + # remove duplicates from the list: + dtrace_script = [item for n, item in enumerate(dtrace_script) if item not in dtrace_script[:n]] + dtrace_script = '\n'.join(dtrace_script) + + with open(script, 'w') as file: + file.write(dtrace_script) + + return self.run_parallel(cmd, env) + + def start(self): # FIXME: see man dtrace -W option for proper attach + self.times.append(datetime.now()) + if self.attach_by_pid: + self.run_dtrace() + + def run_parallel(self, cmd, env=None): + self.log(cmd) + proc = self.processes.setdefault(cmd, {}) + proc['proc'] = subprocess.Popen(cmd, shell=True, stdin=None, stdout=subprocess.PIPE, stderr=subprocess.PIPE, env = env or os.environ) + proc['pid'] = proc['proc'].pid + self.log("%s -> pid: %d" % (cmd, proc['proc'].pid)) + return proc + + @staticmethod + def patch_stacks(pids, items): + reg_exp = re.compile(r"""(.*)\/\*\{(.*)_STACKS\}\*\/.*""", re.IGNORECASE | re.DOTALL) + result = [] + for item in items: + result.append(item) + if '_STACKS}*/' in item: + lines = item.strip().split('\n') + assert lines[-1].strip().endswith('}') + res = reg_exp.search(lines[0]) + what, where = res.groups() + condition = '/$target == pid/' if pids else '' + code = '\n%s %s %s' % (what, condition, STACKS[where]) + if pids: + for pid in pids: + result.append(code.replace('$target', str(pid))) + else: + result.append(code) + return result + + def attach(self, pid): + if pid in self.attached: + return + pids = [pid] + list(self.get_pid_children(pid)) + self.attached |= set(pids) + items = self.prepare_per_pid() + dtrace_script = [DSCRIPT_HEADER] + [item for item in self.script if '$target' in item] + dtrace_script += self.patch_per_pid(pids, items) + script = os.path.join(self.args.output, 'script_%d.d' % pid) + dtrace_script = '\n'.join(dtrace_script) + self.files.append(os.path.join(self.args.output, 'data-%d-%s.dtrace' % (pid, self.args.cuts[0] if self.args.cuts else '0'))) + cmd = self.get_cmd(self.files[-1], script, pids) + with open(script, 'w') as file: + file.write(dtrace_script) + self.run_parallel(cmd) + + @staticmethod + def get_pid_children(parent): + (out, err) = DTraceCollector.execute('ps -o pid,ppid -ax', log=False) + if err: + print(err) + return + for line in out.split('\n'): + if not line: + continue + parts = line.split() + if len(parts) != 2: + continue + pid, ppid = line.split() + if str(parent) == ppid: + yield int(pid) + + @staticmethod + def locate(what, statics={}): + try: + if not statics: + res = subprocess.check_output(['locate', '-S']).decode("utf-8") + statics['locate'] = 'WARNING' not in res + if not statics['locate']: + print(res) + if statics['locate']: + return subprocess.check_output(['locate', what]).decode("utf-8").split('\n') + except Exception: + pass + return [] + + def collect_codes(self): + items = self.locate("*.codes") + files = [item.strip() for item in items if not item.startswith('/Volumes')] + + items = self.locate("kdebug.h") + files += [item.strip() for item in items if not item.startswith('/Volumes')] + + filtered = {} + for file in files: + if not file or not os.path.exists(file): + continue + name = os.path.basename(file) + size = os.path.getsize(file) + if size and (name not in filtered or os.path.getsize(filtered[name]) < size): + filtered[name] = file + for file in filtered.values(): + shutil.copy(file, self.args.output) + + items = self.locate("IntelGPUSignposts.plist") + plists = [] + for line in items: + line = line.strip() + if line: + + plists.append((os.path.getmtime(line), line)) # finding newest + if plists: + plist = sorted(plists)[-1][1] + shutil.copy(plist, self.args.output) + + def collect_system_info(self): + with open(os.path.join(self.args.output, 'sysinfo.txt'), 'w') as file: + (probes, err) = self.execute('sysctl -a', stdout=file) + + def stop(self, wait=True): + for name, data in self.processes.items(): + print('Stopping:', name) + pids = [data['pid']] + list(self.get_pid_children(data['pid'])) + for pid in pids: + self.sudo_execute("kill -2 %d" % pid) + for pid in pids: + try: + os.waitpid(pid, 0) + except: + pass + + if not data['proc']: + continue + out, err = data['proc'].communicate() + message(None, "\n\n -= Target %s output =- {\n" % name) + if out: + self.log("Trace %s out:\n%s" % (name, out.decode())) + message(None, out.strip()) + message(None, "-" * 50) + if err: + self.log("Trace %s err:\n%s" % (name, err.decode()), True) + message(None, err.strip()) + message(None, "}\n\n") + + if data['proc'].returncode != 0: + message('error', '%s(%d) has exited with error code %d check logs for details' % (name, data['pid'], data['proc'].returncode)) + + for subcollector in self.subcollectors: + print('Stopping:', subcollector) + subcollector.collect(self, False) + + self.times.append(datetime.now()) + + sys_log = os.path.join(self.args.output, 'sys_log.json') + with open(sys_log, 'w') as file: + cmd = 'log show --source --style json --debug --signpost' # --last 1m --start, --end 'YYYY-MM-DD HH:MM:SS' + cmd += self.times[1].strftime(" --end '%Y-%m-%d %H:%M:%S'") + if self.args.ring: + cmd += (self.times[1] - timedelta(seconds=self.args.ring)).strftime(" --start '%Y-%m-%d %H:%M:%S'") + else: + cmd += self.times[0].strftime(" --start '%Y-%m-%d %H:%M:%S'") + self.execute(cmd, stdout=file) # FIXME: get time of collection or ring size + + self.collect_system_info() + self.collect_codes() + + res = self.files + [sys_log] + + return res + + @classmethod + def available(cls): + if 'darwin' not in sys.platform: + return False + (out, err) = cls.execute('csrutil status') + if 'disabled' not in out: + print('Please do: "csrutil disable" from Recovery OS terminal to be able using dtrace...') + return False + return True + + +COLLECTOR_DESCRIPTORS = [{ + 'format': 'dtrace', + 'available': DTraceCollector.available(), + 'collector': DTraceCollector +}] + +if __name__ == "__main__": + print(mach_absolute_time()) + DTraceCollector.check_graphics_firmware('com.apple.driver.AppleIntelSKLGraphics') + diff --git a/thirdparty/itt_collector/runtool/collectors/win.py b/thirdparty/itt_collector/runtool/collectors/win.py new file mode 100644 index 00000000000000..bc77b1d4ca9962 --- /dev/null +++ b/thirdparty/itt_collector/runtool/collectors/win.py @@ -0,0 +1,313 @@ +# Intel® Single Event API +# +# This file is provided under the BSD 3-Clause license. +# Copyright (c) 2021, Intel Corporation +# All rights reserved. +# +# Redistribution and use in source and binary forms, with or without modification, are permitted provided that the following conditions are met: +# Redistributions of source code must retain the above copyright notice, this list of conditions and the following disclaimer. +# Redistributions in binary form must reproduce the above copyright notice, this list of conditions and the following disclaimer in the documentation and/or other materials provided with the distribution. +# Neither the name of the Intel Corporation nor the names of its contributors may be used to endorse or promote products derived from this software without specific prior written permission. +# +# THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. +# IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) +# HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. +# +from __future__ import print_function +import os +import sys +import time +from datetime import datetime +import shutil +import tempfile +import platform +import traceback +import subprocess +sys.path.append(os.path.realpath(os.path.join(os.path.dirname(__file__), '..'))) +from sea_runtool import Collector, is_domain_enabled +import sea + + +def async_exec(cmd, title=None, env=None): + cmd = 'start "%s" /MIN /LOW %s' % (title if title else cmd, cmd) + subprocess.Popen(cmd, shell=True, stdin=None, stdout=None, stderr=None, creationflags=0x00000008, env=env) # DETACHED_PROCESS + + +class WPRCollector(Collector): + def __init__(self, args): + Collector.__init__(self, args) + self.wpr = self.detect() + self.started = False + if self.args.cuts: + self.file = os.path.join(args.output, "wpa-%s.etl" % (self.args.cuts[0] if self.args.cuts else '0')) + else: + self.file = os.path.join(args.output, "wpa.etl") + + @classmethod + def detect(cls, statics={}): + if 'res' in statics: + return statics['res'] + wprs = cls.detect_instances('wpr') + res = [] + for wpr in wprs: + proc = subprocess.Popen('"%s" /?' % wpr, shell=False, stdout=subprocess.PIPE, stderr=subprocess.PIPE) + (out, err) = proc.communicate() + out = out.decode() + if err: + return None + for line in out.split('\n'): + pos = line.find('Version') + if -1 != pos: + version = line[pos + len('Version '):].strip() + if int(version.split('.')[0]) >= 10: + res.append((wpr, version.split()[0])) + break + if not res: + return None + statics['res'] = sorted(res, key=lambda __ver: [int(item) for item in __ver[1].split('.')], reverse=True)[0][0] + return statics['res'] + + @staticmethod + def get_options(): + wpr = WPRCollector.detect() + if not wpr: + return + proc = subprocess.Popen('"%s" -profiles' % wpr, shell=False, stdout=subprocess.PIPE, stderr=subprocess.PIPE) + (out, err) = proc.communicate() + if err: + return + for line in out.split('\n'): + if not line.startswith('\t'): + continue + parts = line.strip().split() + yield parts[0], parts[0] in ['DiskIO', 'FileIO', 'GPU', 'GeneralProfile', 'Handle', 'Heap', 'Network', 'Power', 'Video', 'VirtualAllocation'] + + def start(self): + if not self.wpr: + print("Failed to start without WPR...") + return + if self.is_recording(): + self.cancel() + profile = os.path.normpath(os.path.join(self.args.bindir, '..', 'ETW', 'IntelSEAPI.wprp')) + profiles = ['-start %s' % option for option, _ in WPRCollector.get_options() if is_domain_enabled('wpa.' + option)] + cmd = '"%s" -start "%s" %s %s' % (self.wpr, profile, ' '.join(profiles), ('' if self.args.ring else '-filemode')) + (out, err) = self.execute(cmd) + if err: + return + self.started = True + return self + + def cancel(self): + return self.execute('"%s" -cancel' % self.wpr) + + @classmethod + def is_recording(cls, statics={}): + if not statics: + statics['wpr'] = cls.detect() + statics['xperf'] = os.path.normpath(os.path.join(os.path.dirname(statics['wpr']), 'xperf.exe')) + if os.path.exists(statics['xperf']): + (out, err) = cls.execute('"%s" -Loggers | find "WPR_"' % statics['xperf']) + return any('WPR_' in line for line in out.split('\n')) + else: + (out, err) = cls.execute('"%s" -status' % statics['wpr']) + return err or not any('WPR is not recording' in line for line in out.split('\n')) + + def stop(self, wait=True): + if not self.started: + return [] + + self.log("Stop wait=%s" % str(wait)) + if not wait: + cmd = 'start "WPR stop" /MIN /LOW "%s" "%s" wpa "%s" "%s"' % (sys.executable, os.path.realpath(__file__), self.file, self.args.output) + self.log(cmd) + subprocess.Popen(cmd, shell=False, stdin=None, stdout=None, stderr=None, creationflags=0x00000008, env=sea.prepare_environ(self.args)) # DETACHED_PROCESS + while self.is_recording(): + self.log("is_recording") + time.sleep(1) + return [self.file] + else: + env = sea.prepare_environ(self.args) + self.stop_wpr(self.wpr, self.file, self.args.output, env) + return [self.file] + + @classmethod + def stop_wpr(cls, wpr, file, output, env=None): + (out, err) = cls.execute('"%s" -stop "%s"' % (wpr, file), env=env) + if err: + return [] + assert(file in out) + + @classmethod + def launch(cls, args): + cls.stop_wpr(cls.detect(), args[0], args[1]) + + +class ETWTrace(Collector): + def __init__(self, args): + Collector.__init__(self, args) + wpr = WPRCollector.detect() + self.xperf = os.path.normpath(os.path.join(os.path.dirname(wpr), 'xperf')) if wpr else None + if not self.xperf or not os.path.exists(self.xperf): + variants = self.detect_instances('xperf') + if variants: + self.xperf = variants[0] # TODO: select by higher version + else: + self.xperf = None + self.files = [] + self.start() + + def start(self): + self.stop() + cmd = None + + if self.args.cuts: + self.files.append('%s\\etw-%s.etl' % (self.args.output, (self.args.cuts[0] if self.args.cuts else '0'))) + self.files.append('%s\\kernel-%s.etl' % (self.args.output, (self.args.cuts[0] if self.args.cuts else '0'))) + else: + self.files.append('%s\\etw.etl' % self.args.output) + self.files.append('%s\\kernel.etl' % self.args.output) + + logman_pf = os.path.join(tempfile.gettempdir(), 'gpa_logman.pf') + count = 0 + with open(logman_pf, 'w') as file: + if is_domain_enabled('Microsoft-Windows-DxgKrnl'): + file.write('"Microsoft-Windows-DxgKrnl" (Base,GPUScheduler,Profiler,Resource,References,0x4000000000000001)\n') + count += 1 + if is_domain_enabled('Microsoft-Windows-Dwm-Core'): + file.write('"Microsoft-Windows-Dwm-Core" (DetailedFrameInformation)\n') + count += 1 + if is_domain_enabled('Microsoft-Windows-DXGI'): + file.write('"Microsoft-Windows-DXGI" (Events)\n') + count += 1 + if is_domain_enabled('SteamVR'): + file.write('"{8C8F13B1-60EB-4B6A-A433-DE86104115AC}"\n') + count += 1 + if is_domain_enabled('OculusVR'): + file.write('"{553787FC-D3D7-4F5E-ACB2-1597C7209B3C}"\n') + count += 1 + if is_domain_enabled('Intel_Graphics_D3D10'): + file.write('"{AD367E62-97EF-4B20-8235-E8AB49DB0C23}"\n') + count += 1 + + if count: + cmd = 'logman start GPA_SEA -ct perf -bs 1024 -nb 120 480' + cmd += ' -pf "%s" -o "%s" %s -ets' % (logman_pf, self.files[0], (('-max %d -f bincirc' % (self.args.ring * 15)) if self.args.ring else '')) + else: + del self.files[0] + + if cmd: + (out, err) = self.execute(cmd) + if err: + return None + + if self.xperf: + time_multiplier = 0 + kernel_logger = [] # logman query providers "Windows Kernel Trace" + complimentary = '' + if is_domain_enabled('Kernel::ContextSwitches'): + time_multiplier += 10 + kernel_logger += ['PROC_THREAD', 'CSWITCH'] + if is_domain_enabled('Kernel::Stacks', self.args.stacks): + time_multiplier += 20 + kernel_logger += ['LOADER', 'PROFILE'] + complimentary += ' -stackwalk PROFILE+CSWITCH -SetProfInt 1000000' + if is_domain_enabled('Kernel::IO'): + time_multiplier += 5 + kernel_logger += ['FILE_IO', 'FILE_IO_INIT', 'DISK_IO', 'DISK_IO_INIT', 'FILENAME', 'OPTICAL_IO', 'OPTICAL_IO_INIT'] + if is_domain_enabled('Kernel::Network', False): + time_multiplier += 5 + kernel_logger += ['NETWORKTRACE'] + if is_domain_enabled('Kernel::Memory', False): + time_multiplier += 5 + kernel_logger += ['VIRT_ALLOC', 'MEMINFO', 'VAMAP', 'POOL', 'MEMINFO_WS'] # 'FOOTPRINT', 'MEMORY' + if is_domain_enabled('Kernel::PageFaults', False): + time_multiplier += 5 + kernel_logger += ['ALL_FAULTS', 'HARD_FAULTS'] + if kernel_logger: + cmd = '"%s" -on %s %s -f "%s" -ClockType PerfCounter -BufferSize 1024 -MinBuffers 120 -MaxBuffers 480' % (self.xperf, '+'.join(kernel_logger), complimentary, self.files[-1]) + if self.args.ring: + cmd += ' -MaxFile %d -FileMode Circular' % (self.args.ring * time_multiplier) # turning seconds into megabytes... + (out, err) = self.execute(cmd) + if err or 'Error:' in out: + del self.files[-1] + return self + else: + del self.files[-1] + else: + time_multiplier = 0 + kernel_logger = [] # logman query providers "Windows Kernel Trace" + if is_domain_enabled('Kernel::ContextSwitches'): + time_multiplier += 10 + kernel_logger += ['process', 'thread', 'cswitch'] + if is_domain_enabled('Kernel::Stacks', self.args.stacks): + time_multiplier += 10 + kernel_logger += ['img', 'profile'] + if is_domain_enabled('Kernel::IO'): + time_multiplier += 5 + kernel_logger += ['fileio', 'disk'] + if is_domain_enabled('Kernel::Network', False): + time_multiplier += 5 + kernel_logger += ['net'] + if is_domain_enabled('Kernel::Memory', False): + time_multiplier += 5 + kernel_logger += ['virtalloc'] + if is_domain_enabled('Kernel::PageFaults', False): + time_multiplier += 5 + kernel_logger += ['pf', 'hf'] + if kernel_logger: + cmd = 'logman start "NT Kernel Logger" -p "Windows Kernel Trace" (%s) -ct perf -bs 1024 -nb 120 480' % ','.join(kernel_logger) + cmd += ' -o "%s" %s -ets' % (self.files[-1], (('-max %d -f bincirc' % (self.args.ring * time_multiplier)) if self.args.ring else '')) + (out, err) = self.execute(cmd) + if err or 'Error:' in out: + del self.files[-1] + return self + else: + del self.files[-1] + + self.files.append('%s/etw_profilers.logman' % self.args.output) + cmd = 'cmd /c logman query providers ^> "%s"' % self.files[-1] + async_exec(cmd, 'Collecting ETW providers') + + return self + + def stop(self, wait=True): # TODO: stop without waits + if self.xperf: + proc = subprocess.Popen('xperf -stop', shell=False) + if wait: + proc.wait() + else: + proc = subprocess.Popen('logman stop "NT Kernel Logger" -ets', shell=False) + if wait: + proc.wait() + proc = subprocess.Popen('logman stop "GPA_SEA" -ets', shell=False) + if wait: + proc.wait() + + return self.files + +COLLECTOR_DESCRIPTORS = [ + { + 'available': sys.platform == 'win32' and WPRCollector.detect(), + 'collector': WPRCollector, + 'format': 'wpa' + }, + { + 'available': sys.platform == 'win32', + 'collector': ETWTrace, + 'format': 'etw' + } +] + +if __name__ == "__main__": + with open(os.path.join(tempfile.gettempdir(), datetime.now().strftime('sea_%H_%M_%S__%d_%m_%Y.log')), 'a') as log: + log.write(str(sys.argv) + '\n') + try: + name = sys.argv[1] + for desc in COLLECTOR_DESCRIPTORS: + if desc['format'] == name: + cls = desc['collector'] + cls.set_output(log) + cls.launch(sys.argv[2:]) + break + except: + log.write(traceback.format_exc()) diff --git a/thirdparty/itt_collector/runtool/exporters/Stat.py b/thirdparty/itt_collector/runtool/exporters/Stat.py new file mode 100644 index 00000000000000..4d3a6cc3ac8f75 --- /dev/null +++ b/thirdparty/itt_collector/runtool/exporters/Stat.py @@ -0,0 +1,57 @@ +# Intel® Single Event API +# +# This file is provided under the BSD 3-Clause license. +# Copyright (c) 2021, Intel Corporation +# All rights reserved. +# +# Redistribution and use in source and binary forms, with or without modification, are permitted provided that the following conditions are met: +# Redistributions of source code must retain the above copyright notice, this list of conditions and the following disclaimer. +# Redistributions in binary form must reproduce the above copyright notice, this list of conditions and the following disclaimer in the documentation and/or other materials provided with the distribution. +# Neither the name of the Intel Corporation nor the names of its contributors may be used to endorse or promote products derived from this software without specific prior written permission. +# +# THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. +# IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) +# HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. +# +import os +import csv +import shutil +from sea_runtool import GraphCombiner + +# Supported values are "csv" and "tsv" +FILE_EXTENSION = ".csv" + +class Stat(GraphCombiner): + def __init__(self, args, tree): + GraphCombiner.__init__(self, args, tree) + + def get_targets(self): + return [self.args.output + FILE_EXTENSION] + + def finish(self): + GraphCombiner.finish(self) + delim = ',' + if FILE_EXTENSION == ".tsv": + delim = '\t' + with open(self.get_targets()[-1], 'w') as f: + writer = csv.writer(f, delimiter=delim) + writer.writerow(["domain", "name", "min", "max", "avg", "total", "count"]) + for domain, data in self.per_domain.items(): + for task_name, task_data in data['tasks'].items(): + time = task_data['time'] + writer.writerow([domain, task_name, min(time), max(time), sum(time) / len(time), sum(time), len(time)]) + + @staticmethod + def join_traces(traces, output, args): # FIXME: implement real joiner + sorting = [] + for trace in traces: + sorting.append((os.path.getsize(trace), trace)) + sorting.sort(key=lambda size_trace: size_trace[0], reverse=True) + shutil.copyfile(sorting[0][1], output + ".tsv") + return output + ".tsv" + +EXPORTER_DESCRIPTORS = [{ + 'format': 'stat', + 'available': True, + 'exporter': Stat +}] diff --git a/thirdparty/itt_collector/runtool/python_compat.py b/thirdparty/itt_collector/runtool/python_compat.py new file mode 100644 index 00000000000000..35440cc74c5a23 --- /dev/null +++ b/thirdparty/itt_collector/runtool/python_compat.py @@ -0,0 +1,41 @@ +# Intel® Single Event API +# +# This file is provided under the BSD 3-Clause license. +# Copyright (c) 2021, Intel Corporation +# All rights reserved. +# +# Redistribution and use in source and binary forms, with or without modification, are permitted provided that the following conditions are met: +# Redistributions of source code must retain the above copyright notice, this list of conditions and the following disclaimer. +# Redistributions in binary form must reproduce the above copyright notice, this list of conditions and the following disclaimer in the documentation and/or other materials provided with the distribution. +# Neither the name of the Intel Corporation nor the names of its contributors may be used to endorse or promote products derived from this software without specific prior written permission. +# +# THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. +# IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) +# HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. +# +import sys + +if sys.version_info[0] > 2: + import queue as queue + basestring = (str, bytes) + unicode = str + + def func_name(func_object): + return func_object.__name__ + def func_globals(func_object): + return func_object.__globals__ + def func_code(func_object): + return func_object.__code__ + + raw_input = input +else: + import Queue as queue + basestring = basestring + unicode = unicode + + def func_name(func_object): + return func_object.func_name + def func_globals(func_object): + return func_object.func_globals + def func_code(func_object): + return func_object.func_code diff --git a/thirdparty/itt_collector/runtool/sea.py b/thirdparty/itt_collector/runtool/sea.py new file mode 100644 index 00000000000000..668e75eb096f9d --- /dev/null +++ b/thirdparty/itt_collector/runtool/sea.py @@ -0,0 +1,303 @@ +# Intel® Single Event API +# +# This file is provided under the BSD 3-Clause license. +# Copyright (c) 2021, Intel Corporation +# All rights reserved. +# +# Redistribution and use in source and binary forms, with or without modification, are permitted provided that the following conditions are met: +# Redistributions of source code must retain the above copyright notice, this list of conditions and the following disclaimer. +# Redistributions in binary form must reproduce the above copyright notice, this list of conditions and the following disclaimer in the documentation and/or other materials provided with the distribution. +# Neither the name of the Intel Corporation nor the names of its contributors may be used to endorse or promote products derived from this software without specific prior written permission. +# +# THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. +# IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) +# HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. +# + +from __future__ import print_function +import os +import sys +import json +import time +import platform +import threading +from ctypes import cdll, c_char_p, c_void_p, c_ulonglong, c_int, c_double, c_long, c_bool, c_short, c_wchar_p, c_uint32, POINTER, CFUNCTYPE +from sea_runtool import reset_global, global_storage + + +class Dummy: + def __enter__(self): + return self + + def __exit__(self, *_): + return False + + +class Task: + def __init__(self, itt, name, id, parent): + self.itt = itt + self.name = name + self.id = id + self.parent = parent + + def __enter__(self): + self.itt.lib.itt_task_begin(self.itt.domain, self.id, self.parent, self.itt.get_string_id(self.name), 0) + return self + + def arg(self, name, value): + assert self.id + try: + value = float(value) + self.itt.lib.itt_metadata_add(self.itt.domain, self.id, self.itt.get_string_id(name), value) + except ValueError: + self.itt.lib.itt_metadata_add_str(self.itt.domain, self.id, self.itt.get_string_id(name), str(value)) + return self + + def blob(self, name, pointer, size): + assert self.id + self.itt.lib.itt_metadata_add_blob(self.itt.domain, self.id, self.itt.get_string_id(name), pointer, size) + return self + + def __exit__(self, type, value, traceback): + self.itt.lib.itt_task_end(self.itt.domain, 0) + return False + + +class Track: + def __init__(self, itt, track): + self.itt = itt + self.track = track + + def __enter__(self): + self.itt.lib.itt_set_track(self.track) + return self + + def __exit__(self, type, value, traceback): + self.itt.lib.itt_set_track(None) + return False + + +def prepare_environ(args): + if 'sea_env' in global_storage(None): + return global_storage('sea_env') + env = os.environ.copy() + if args.verbose == 'info': + env['INTEL_SEA_VERBOSE'] = '1' + bitness = '32' if '32' in platform.architecture()[0] else '64' + env_name = 'INTEL_LIBITTNOTIFY' + bitness + if env_name not in env or 'SEAPI' not in env[env_name]: + if sys.platform == 'win32': + dl_name = 'IntelSEAPI.dll' + elif sys.platform == 'darwin': + dl_name = 'libIntelSEAPI.dylib' + else: + dl_name = 'libIntelSEAPI.so' + + env[env_name] = os.path.join(args.bindir, dl_name) + if args.bindir not in env['PATH']: + env['PATH'] += os.pathsep + args.bindir + + reset_global('sea_env', env) + return global_storage('sea_env') + + +class ITT(object): + scope_global = 1 + scope_process = 2 + scope_thread = 3 + scope_task = 4 + + _instance = None + + def __new__(cls, *args, **kwargs): + if not cls._instance: + cls._instance = super(ITT, cls).__new__(cls) + return cls._instance + + def __init__(self, domain): + if hasattr(self, 'lib'): + return + bitness = 32 if '32' in platform.architecture()[0] else 64 + env_name = 'INTEL_LIBITTNOTIFY' + str(bitness) + self.lib = None + self.strings = {} + self.tracks = {} + self.counters = {} + env = global_storage('sea_env') + if env_name not in env: + print("Warning:", env_name, "is not set...") + return + if os.path.exists(env[env_name]): + self.lib = cdll.LoadLibrary(env[env_name]) + if not self.lib: + print("Warning: Failed to load", env[env_name], "...") + return + + # void* itt_create_domain(const char* str) + self.lib.itt_create_domain.argtypes = [c_char_p] + self.lib.itt_create_domain.restype = c_void_p + + # void* itt_create_string(const char* str) + self.lib.itt_create_string.argtypes = [c_char_p] + self.lib.itt_create_string.restype = c_void_p + + # void itt_marker(void* domain, uint64_t id, void* name, int scope) + self.lib.itt_marker.argtypes = [c_void_p, c_ulonglong, c_void_p, c_int, c_ulonglong] + + # void itt_task_begin(void* domain, uint64_t id, uint64_t parent, void* name, uint64_t timestamp) + self.lib.itt_task_begin.argtypes = [c_void_p, c_ulonglong, c_ulonglong, c_void_p, c_ulonglong] + + # void itt_task_begin_overlapped(void* domain, uint64_t id, uint64_t parent, void* name, uint64_t timestamp) + self.lib.itt_task_begin_overlapped.argtypes = [c_void_p, c_ulonglong, c_ulonglong, c_void_p, c_ulonglong] + + # void itt_metadata_add(void* domain, uint64_t id, void* name, double value) + self.lib.itt_metadata_add.argtypes = [c_void_p, c_ulonglong, c_void_p, c_double] + + # void itt_metadata_add_str(void* domain, uint64_t id, void* name, const char* value) + self.lib.itt_metadata_add_str.argtypes = [c_void_p, c_ulonglong, c_void_p, c_char_p] + + # void itt_metadata_add_blob(void* domain, uint64_t id, void* name, const void* value, uint32_t size) + self.lib.itt_metadata_add_blob.argtypes = [c_void_p, c_ulonglong, c_void_p, c_void_p, c_uint32] + + # void itt_task_end(void* domain, uint64_t timestamp) + self.lib.itt_task_end.argtypes = [c_void_p, c_ulonglong] + + # void itt_task_end_overlapped(void* domain, uint64_t timestamp, uint64_t taskid) + self.lib.itt_task_end_overlapped.argtypes = [c_void_p, c_ulonglong, c_ulonglong] + + # void* itt_counter_create(void* domain, void* name) + self.lib.itt_counter_create.argtypes = [c_void_p, c_void_p] + self.lib.itt_counter_create.restype = c_void_p + + # void itt_set_counter(void* id, double value, uint64_t timestamp) + self.lib.itt_set_counter.argtypes = [c_void_p, c_double, c_ulonglong] + + # void* itt_create_track(const char* group, const char* track) + self.lib.itt_create_track.argtypes = [c_char_p, c_char_p] + self.lib.itt_create_track.restype = c_void_p + + # void itt_set_track(void* track) + self.lib.itt_set_track.argtypes = [c_void_p] + + # uint64_t itt_get_timestamp() + self.lib.itt_get_timestamp.restype = c_ulonglong + + if hasattr(self.lib, 'get_gpa_version'): + # const char* get_gpa_version() + self.lib.get_gpa_version.restype = c_char_p + + if sys.platform == 'win32': + # const char* resolve_pointer(const char* szModulePath, uint64_t addr) + self.lib.resolve_pointer.argtypes = [c_char_p, c_ulonglong] + self.lib.resolve_pointer.restype = c_char_p + + # bool ExportExeIconAsGif(LPCWSTR szExePath, LPCWSTR szGifPath) + if hasattr(self.lib, 'ExportExeIconAsGif'): + self.lib.ExportExeIconAsGif.argtypes = [c_wchar_p, c_wchar_p] + self.lib.ExportExeIconAsGif.restype = c_bool + + # bool ConvertToGif(LPCWSTR szImagePath, LPCWSTR szGifPath, long width, long height) + self.lib.ConvertToGif.argtypes = [c_wchar_p, c_wchar_p, c_long, c_long] + self.lib.ConvertToGif.restype = c_bool + + elif 'linux' in sys.platform: + # void itt_write_time_sync_markers() + self.lib.itt_write_time_sync_markers.argtypes = [] + + # typedef bool (*receive_t)(void* pReceiver, uint64_t time, uint16_t count, const wchar_t** names, const wchar_t** values, double progress); + self.receive_t = CFUNCTYPE(c_bool, c_ulonglong, c_ulonglong, c_short, POINTER(c_wchar_p), POINTER(c_wchar_p), c_double) + # typedef void* (*get_receiver_t)(const wchar_t* provider, const wchar_t* opcode, const wchar_t* taskName); + self.get_receiver_t = CFUNCTYPE(c_ulonglong, c_wchar_p, c_wchar_p, c_wchar_p) + if hasattr(self.lib, 'parse_standard_source'): + # bool parse_standard_source(const char* file, get_receiver_t get_receiver, receive_t receive) + self.lib.parse_standard_source.argtypes = [c_char_p, self.get_receiver_t, self.receive_t] + self.lib.parse_standard_source.restype = c_bool + + self.domain = self.lib.itt_create_domain(domain.encode()) + + def get_string_id(self, text): + try: + return self.strings[text] + except: + id = self.strings[text] = self.lib.itt_create_string(bytes(text, encoding='utf-8')) + return id + + def marker(self, text, scope=scope_process, timestamp=0, id=0): + if not self.lib: + return + self.lib.itt_marker(self.domain, id, self.get_string_id(text), scope, timestamp) + + def task(self, name, id=0, parent=0): + if not self.lib: + return Dummy() + return Task(self, name, id, parent) + + def task_submit(self, name, timestamp, dur, id=0, parent=0): + self.lib.itt_task_begin(self.domain, id, parent, self.get_string_id(name), timestamp) + self.lib.itt_task_end(self.domain, timestamp + dur) + + def counter(self, name, value, timestamp=0): + if not self.lib: + return + try: + counter = self.counters[name] + except: + counter = self.counters[name] = self.lib.itt_counter_create(self.domain, self.get_string_id(name)) + self.lib.itt_set_counter(counter, value, timestamp) + + def track(self, group, name): + if not self.lib: + return Dummy() + key = group + "/" + name + try: + track = self.tracks[key] + except: + track = self.tracks[key] = self.lib.itt_create_track(group, name) + return Track(self, track) + + def get_timestamp(self): + if not self.lib: + return 0 + return self.lib.itt_get_timestamp() + + def resolve_pointer(self, module, addr): + if sys.platform == 'win32': + if not self.lib: + return + return self.lib.resolve_pointer(module, addr) + + def time_sync(self): + if not self.lib: + return + self.lib.itt_write_time_sync_markers() + + def parse_standard_source(self, path, reader): + if not hasattr(self.lib, 'parse_standard_source'): + return None + receivers = [] + + def receive(receiver, time, count, names, values, progress): # typedef bool (*receive_t)(void* receiver, uint64_t time, uint16_t count, const wchar_t** names, const wchar_t** values, double progress); + receiver = receivers[receiver - 1] # Should be: receiver = cast(receiver, POINTER(py_object)).contents.value, but it doesn't work so we use index of the array + args = {} + for i in range(0, count): + args[names[i]] = values[i] + reader.set_progress(progress) + receiver.receive(time, args) + return True + + def get_receiver(provider, opcode, taskName): # typedef void* (*get_receiver_t)(const wchar_t* provider, const wchar_t* opcode, const wchar_t* taskName); + receiver = reader.get_receiver(provider, opcode, taskName) + if not receiver: + return 0 + receivers.append(receiver) + return len(receivers) # Should be: cast(pointer(py_object(receiver)), c_void_p).value, but it doesn't work, so we return index of the array + + return self.lib.parse_standard_source(bytes(path, encoding='utf-8'), self.get_receiver_t(get_receiver), self.receive_t(receive)) + + def can_parse_standard_source(self): + return hasattr(self.lib, 'parse_standard_source') + + def get_gpa_version(self): + if not self.lib: + return "" + return self.lib.get_gpa_version() diff --git a/thirdparty/itt_collector/runtool/sea_runtool.py b/thirdparty/itt_collector/runtool/sea_runtool.py new file mode 100644 index 00000000000000..e694947e0933ae --- /dev/null +++ b/thirdparty/itt_collector/runtool/sea_runtool.py @@ -0,0 +1,2313 @@ +# Intel® Single Event API +# +# This file is provided under the BSD 3-Clause license. +# Copyright (c) 2021, Intel Corporation +# All rights reserved. +# +# Redistribution and use in source and binary forms, with or without modification, are permitted provided that the following conditions are met: +# Redistributions of source code must retain the above copyright notice, this list of conditions and the following disclaimer. +# Redistributions in binary form must reproduce the above copyright notice, this list of conditions and the following disclaimer in the documentation and/or other materials provided with the distribution. +# Neither the name of the Intel Corporation nor the names of its contributors may be used to endorse or promote products derived from this software without specific prior written permission. +# +# THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. +# IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) +# HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. +# + +from __future__ import print_function +import os +import imp +import sys +import sea +import copy +import time +import shutil +import struct +import signal +import fnmatch +import tempfile +import binascii +import platform +import traceback +import threading +import subprocess +from python_compat import * +from glob import glob +from datetime import datetime, timedelta + +sys.path.append(os.path.realpath(os.path.join(os.path.dirname(__file__), 'decoders'))) + +try: + sys.setdefaultencoding("utf-8") +except: + pass + +ProgressConst = 20000 + +TIME_SHIFT_FOR_GT = 1000 +# on OSX an Application launched from Launchpad has nothing in PATH +if sys.platform == 'darwin': + if '/usr/bin' not in os.environ['PATH']: + os.environ['PATH'] += os.pathsep + '/usr/bin' + if '/usr/sbin' not in os.environ['PATH']: + os.environ['PATH'] += os.pathsep + '/usr/sbin' + + +def global_storage(name, default={}): + if isinstance(__builtins__, dict): + seapi = __builtins__.setdefault('SEAPI', {}) + else: # pypy + if not hasattr(__builtins__, 'SEAPI'): + setattr(__builtins__, 'SEAPI', {}) + seapi = getattr(__builtins__, 'SEAPI', None) + return seapi.setdefault(name, copy.deepcopy(default)) if name else seapi # FIXME put copy.deepcopy under condition + + +def reset_global(name, value): + global_storage(None)[name] = value + + +def format_bytes(num): + for unit in ['', 'K', 'M', 'G']: + if abs(num) < 1024.0: + return "%3.1f %sB" % (num, unit) + num /= 1024.0 + return str(num) + 'B' + + +class DummyWith(): # for conditional with statements + def __enter__(self): + return self + + def __exit__(self, type, value, traceback): + return False + + +class Profiler(): + def __enter__(self): + try: + import cProfile as profile + except: + import profile + self.profiler = profile.Profile() + self.profiler.enable() + return self + + def __exit__(self, type, value, traceback): + self.profiler.disable() + self.profiler.print_stats('time') + return False + + +def get_extensions(name, multiple=False): + big_name = (name + 's').upper() + this_module = sys.modules[__name__] + if big_name in dir(this_module): + return getattr(this_module, big_name) + extensions = {} + root = os.path.join(os.path.dirname(os.path.realpath(__file__)), name + 's') + for extension in glob(os.path.join(root, '*.py')): + module_name = name + '.' + os.path.splitext(os.path.basename(extension))[0] + if name not in sys.modules: + sys.modules[name] = imp.new_module(name) + if module_name in sys.modules: + module = sys.modules[module_name] + else: + module = imp.load_source(module_name, extension) + for desc in getattr(module, name.upper() + '_DESCRIPTORS', []): + if desc['available']: + if multiple: + extensions.setdefault(desc['format'], []).append(desc[name]) + else: + extensions[desc['format']] = desc[name] + setattr(this_module, big_name, extensions) + return extensions + + +def get_exporters(): + return get_extensions('exporter') + + +def get_collectors(): + return get_extensions('collector') + + +verbose_choices = ['fatal', 'error', 'warning', 'info'] + + +def parse_args(args): + import argparse + parser = argparse.ArgumentParser(epilog="After this command line add ! followed by command line of your program") + format_choices = list(get_exporters().keys()) + if sys.platform == 'win32': + format_choices.append("etw") + elif sys.platform == 'darwin': + format_choices.append("xcode") + elif sys.platform == 'linux': + format_choices.append("kernelshark") + parser.add_argument("-f", "--format", choices=format_choices, nargs='*', default=[], help='One or many output formats.') + parser.add_argument("-o", "--output", help='Output folder pattern - will be added to it') + parser.add_argument("-b", "--bindir", help='If you run script not from its location') + parser.add_argument("-i", "--input", help='Provide input folder for transformation (-)') + parser.add_argument("-t", "--trace", nargs='*', help='Additional trace file in one of supported formats') + parser.add_argument("-d", "--dir", help='Working directory for target (your program)') + parser.add_argument("-v", "--verbose", default="warning", choices=verbose_choices) + parser.add_argument("-c", "--cuts", nargs='*', help='Set "all" to merge all cuts in one trace') + parser.add_argument("-r", "--ring", type=int, const='5', default=None, action='store', nargs='?', help='Makes trace to cycle inside ring buffer of given length in seconds') + parser.add_argument("--target", help='Pid of target') + parser.add_argument("--stacks", action="store_true", help='Collect stacks') + parser.add_argument("--profile", action="store_true", help='Internal: profile runtool execution') + parser.add_argument("--collector", choices=list(get_collectors().keys()) + ['default']) + + separators = ['!', '?', '%'] + separator = None + for sep in separators: + if sep in args: + separator = args.index(sep) + break + # separator = args.index("!") if "!" in args else args.index("?") if "?" in args else None + if separator is not None: + parsed_args = parser.parse_args(args[:separator]) + if parsed_args.input: + parser.print_help() + print("Error: Input argument (-i) contradicts launch mode") + sys.exit(-1) + victim = args[separator + 1:] + victim[-1] = victim[-1].strip() # removal of trailing '\r' - when launched from .sh + if not parsed_args.output: + if sys.platform != 'win32': + parsed_args.output = '/tmp/isea_collection' + print('Collection will be written into:' , parsed_args.output) + else: + parser.print_help() + print("Error: No output (-o) given in launch mode") + sys.exit(-1) + handle_args(parsed_args) + return parsed_args, victim + else: # nothing to launch, transformation mode + if args: + args[-1] = args[-1].strip() # removal of trailing '\r' - when launched from .sh + parsed_args = parser.parse_args(args) + handle_args(parsed_args) + if not parsed_args.input: + if sys.platform != 'win32': + parsed_args.input = '/tmp/isea_collection' + if os.path.exists(parsed_args.input): + print('Collection will be read from:', parsed_args.input) + else: + parser.print_help() + sys.exit(-1) + else: + print("--input argument is required for transformation mode.") + parser.print_help() + sys.exit(-1) + if not parsed_args.format: + parsed_args.format = ['gt'] + setattr(parsed_args, 'user_input', parsed_args.input) + if not parsed_args.output: + parsed_args.output = parsed_args.input + return parsed_args, None + + +def handle_args(args): + if args.input: + args.input = subst_env_vars(args.input) + if args.output: + args.output = subst_env_vars(args.output) + if args.dir: + args.dir = subst_env_vars(args.dir) + if not args.bindir: + args.bindir = os.path.join(os.path.dirname(os.path.realpath(__file__)), '../bin') + args.bindir = os.path.abspath(args.bindir) + + +def get_args(): + return global_storage('arguments') + + +def get_original_env(): + return global_storage('environ') + + +def verbose_level(level=None, statics={}): + if not statics: + args = get_args() + if not args: + return verbose_choices.index(level) if level else 'warning' + statics['level'] = verbose_choices.index(get_args().verbose) + return verbose_choices.index(level) if level else statics['level'] + + +def message(level, txt, statics={}): + assert isinstance(statics, dict) + if level and verbose_level(level) > verbose_level(): # see default in "parse_args" + return False + + # in python2 type(parent_frame) is tuple with length == 4 + # in python3 type(parent_frame) is FrameSummary + parent_frame = traceback.extract_stack()[-2] + + # slice operation returns tuple + history = statics.setdefault(parent_frame[:4], {'count': 0, 'heap': []}) + + history['count'] += 1 + if history['count'] < 5 or not level: + print('\n', (level.upper() + ':') if level else '', '%s' % txt) + print('\tFile "%s", line %d, in %s' % parent_frame[:3]) + Collector.log("\n%s:\t%s\n" % (level.upper() if level else 'RUNTIME', txt), stack=(verbose_level(level) <= verbose_level('warning'))) + elif history['count'] == 5: + print('\n', level.upper(), 'Stopping pollution from', parent_frame[:3]) + return True + + +def main(): + reset_global('environ', os.environ.copy()) + (args, victim) = parse_args(sys.argv[1:]) # skipping the script name + reset_global('arguments', args) + + if victim: + ensure_dir(args.output, clean=True) + launch(args, victim) + else: + ext = os.path.splitext(args.input)[1] if not os.path.isdir(args.input) else None + transform_all(args) + Collector.log('Started with arguments: %s' % str(sys.argv)) + save_domains() + + +def os_lib_ext(): + if sys.platform == 'win32': + return '.dll' + elif sys.platform == 'darwin': + return '.dylib' + elif 'linux' in sys.platform: + return '.so' + assert (not "Unsupported platform") + + +def get_pids(victim, tracer): + assert len(victim) == 1 # one wildcard is supported yet + assert sys.platform != 'win32' # no Windows support yet + out, err = tracer.execute('ps -o pid,ppid,command -ax', log=False) + if err: + tracer.log(err) + return [] + + parsed = {} + for line in out.split('\n'): + if not line: + continue + parts = line.split() + if len(parts) < 3: + continue + cmd = ' '.join(parts[2:]) + if fnmatch.fnmatch(cmd.lower(), victim[0].lower()) and __file__ not in cmd: # get matching cmd + parsed[parts[0]] = cmd + print("Matching cmd:\t", parts[0], cmd) + return set(parsed.keys()) + + +def launch(args, victim): + sea.prepare_environ(args) + sea_itf = sea.ITT('tools') + + global_storage('collection').setdefault('time', {'start': time.time(), 'itt_start': sea_itf.get_timestamp()}) + + env = {} + paths = [] + macosx = sys.platform == 'darwin' + win32 = sys.platform == 'win32' + bits_array = [''] if macosx else ['32', '64'] + for bits in bits_array: + search = os.path.sep.join([args.bindir, "*IntelSEAPI" + os_lib_ext()]) + files = glob(search) + if not len(files): + message('warning', "didn't find any files for: %s" % search) + continue + paths.append((bits, files[0])) + if not len(paths): + print("Error: didn't find any *IntelSEAPI%s files. Please check that you run from bin directory, or use --bindir." % os_lib_ext()) + sys.exit(-1) + if macosx: + env["DYLD_INSERT_LIBRARIES"] = paths[0][1] + else: + paths = dict(paths) + if '32' in paths: + env["INTEL_LIBITTNOTIFY32"] = paths['32'] + env["INTEL_JIT_PROFILER32"] = paths['32'] + if '64' in paths: + env["INTEL_LIBITTNOTIFY64"] = paths['64'] + env["INTEL_JIT_PROFILER64"] = paths['64'] + + env["INTEL_SEA_FEATURES"] = os.environ['INTEL_SEA_FEATURES'] if 'INTEL_SEA_FEATURES' in os.environ else "" + env["INTEL_SEA_FEATURES"] += (" " + str(args.format)) if args.format else "" + env["INTEL_SEA_FEATURES"] += " stacks" if args.stacks else "" + + if args.verbose == 'info': + env['INTEL_SEA_VERBOSE'] = '1' + + if args.ring: + env["INTEL_SEA_RING"] = str(args.ring) + + if args.output: + env["INTEL_SEA_SAVE_TO"] = os.path.join(args.output, 'pid') + + # vulkan support + os_name = 'WIN' if win32 else 'OSX' if macosx else 'LIN' + var_name = os.pathsep.join(['VK_LAYER_INTEL_SEA_%s%s' % (os_name, bits) for bits in bits_array]) + + env['VK_INSTANCE_LAYERS'] = (os.environ['VK_INSTANCE_LAYERS'] + os.pathsep + var_name) if 'VK_INSTANCE_LAYERS' in os.environ else var_name + env['VK_LAYER_PATH'] = (os.environ['VK_LAYER_PATH'] + os.pathsep + args.bindir) if 'VK_LAYER_PATH' in os.environ else args.bindir + + message('info', "Running: " + str(victim)) + message('info', "Environment: " + str(env)) + + environ = global_storage('sea_env') + for key, val in env.items(): + if key in environ and val != environ[key]: + assert key in ['LD_PRELOAD', 'DYLD_INSERT_LIBRARIES'] + environ[key] += ':' + val + else: + environ[key] = val + + if 'kernelshark' in args.format: + victim = 'trace-cmd record -e IntelSEAPI/* ' + victim + + tracer = None + + if args.collector: + tracer = get_collectors()[args.collector] + elif not tracer: # using default collector per system + if 'linux' in sys.platform: + tracer = get_collectors()['ftrace'] + elif 'win32' == sys.platform: + tracer = get_collectors()['etw'] + elif 'darwin' in sys.platform: + tracer = get_collectors()['dtrace'] + run_suspended = False + + if args.dir: + full_victim = os.path.join(args.dir, victim[0]) + if os.path.exists(full_victim): + victim[0] = full_victim + + setattr(args, 'victim', victim[0]) + + tracer = tracer(args) if tracer else None # turning class into instance + if '!' in sys.argv[1:]: + assert tracer + + if hasattr(tracer, 'launch_victim'): + victim[0] = victim[0].replace(' ', r'\ ') + proc = tracer.launch_victim(victim, env=environ) + else: + if run_suspended: # might consider using preload of SEA lib and do the suspend there. Or allow tracers to run it. + suspended = '(cd "%s"; kill -STOP $$; exec %s )' % (args.dir or '.', ' '.join(victim)) + proc = subprocess.Popen(suspended, env=environ, shell=True, stdout=subprocess.PIPE, stderr=subprocess.PIPE) + else: + proc = subprocess.Popen(victim, env=environ, shell=False, cwd=args.dir, stdout=subprocess.PIPE, stderr=subprocess.PIPE) + + if sys.platform != 'win32' and not run_suspended: # FIXME: implement suspended start on Windows! + if not args.ring: + proc.send_signal(signal.SIGSTOP) + + args.target = proc.pid + + tracer.start() + + if sys.platform != 'win32': # collector start may be long, so we freeze victim during this time + print("PID:", proc.pid) + if not args.ring: + proc.send_signal(signal.SIGCONT) + + + print("Waiting application to exit...") + global_storage('collection')['time']['before'] = time.time() + + try: + proc.wait() + except KeyboardInterrupt: + print("Stopping all...") + proc.send_signal(signal.SIGABRT) + out, err = proc.communicate() + if out or err: + print("\n\n -= Target output =- {\n") + print(out.decode().strip()) + print("\n", "-" * 50, "\n") + print(err.decode().strip()) + print("\n}\n\n") + elif '?' in sys.argv[1:]: + print("Attach to:", victim) + pids = get_pids(victim, tracer) + if not pids: + print("Error: nothing found...") + return + if tracer: + args.target = list(pids) + tracer.start() + + print("Waiting for CTRL+C...") + global_storage('collection')['time']['before'] = time.time() + + def is_running(pid): + try: + os.kill(int(pid), 0) + return True + except OSError: + return False + + try: + while any(is_running(pid) for pid in pids): + time.sleep(0.5) + except KeyboardInterrupt: + pass + else: + message('error', 'unsupported separator') + return -1 + + global_storage('collection')['time']['after'] = time.time() + print("Stopping collectors...") + if tracer: + args.trace = tracer.stop() + + if not args.output: + return [] + + args.input = args.output + + times = global_storage('collection')['time'] + times['end'] = time.time() + times['itt_end'] = sea_itf.get_timestamp() + + if args.target: + if isinstance(args.target, list): + allowed_pids = args.target + else: + allowed_pids = [args.target] + global_storage('collection').setdefault('targets', allowed_pids) + + if args.format: + transform_all(args) + + +def subst_env_vars(path): + return os.path.expandvars(path) if sys.platform == 'win32' else os.path.expanduser(path) + + +UserProfile = subst_env_vars('%USERPROFILE%' if sys.platform == 'win32' else '~') +PermanentCache = os.path.join(UserProfile, '.isea_cache.dict') + + +def ensure_dir(path, clean, statics={}): + if path in statics: + assert(statics[path] or not clean) + return + statics[path] = clean + if os.path.exists(path): + if clean: + shutil.rmtree(path) + else: + return + os.makedirs(path) + + +def transform_all(args): + setattr(args, 'user_input', args.input) + path = os.path.join(args.user_input, 'transform') + ensure_dir(path, True) + + output = [] + saved_output = args.output + sea_folders = [folder for folder in glob(os.path.join(args.input, 'pid-*')) if os.path.isdir(folder)] + if sea_folders: + for folder in sea_folders: + args.input = folder + args.output = saved_output + '.' + os.path.basename(folder) + output += transform(args) + + args.output = saved_output + + replacement = ('/', '\\') if sys.platform == 'win32' else ('\\', '/') + for path in output: + print('Result:', os.path.abspath(path).replace(*replacement), format_bytes(os.path.getsize(path))) + + return output + + +def split_filename(path): + (dir, name) = os.path.split(path) + (name, ext) = os.path.splitext(name) + ring = None + cut = None + if '-' in name: + (name, ring) = name.split("-") + if '!' in name: + (name, cut) = name.split("!") + return {'dir': dir, 'name': name, 'cut': cut, 'ring':ring, 'ext': ext} + + +def default_tree(args): + tree = {"strings": {}, "domains": {}, "threads": {}, "groups": {}, "modules": {}, "ring_buffer": False, "cuts": set()} + if os.path.isdir(args.input): + for filename in glob(os.path.join(args.input, '*.mdl')): + with open(filename, 'r') as file: + parts = file.readline().split() + tree["modules"][int(os.path.basename(filename).replace(".mdl", ""))] = [' '.join(parts[0:-1]), parts[-1]] + return tree + + +def build_tid_map(args, path): + tid_map = {} + + def parse_process(src): + if not os.path.isdir(src): + return + pid = src.rsplit('-', 1)[1] + if not pid.isdigit(): + return + pid = int(pid) + for folder in glob(os.path.join(src, '*', '*.sea')): + tid = int(os.path.basename(folder).split('!')[0].split('-')[0].split('.')[0]) + tid_map[tid] = pid + if pid not in tid_map: + tid_map[pid] = pid + + for folder in glob(os.path.join(path, '*-*')): + parse_process(folder) + return tid_map + + +def sea_reader(args): # reads the structure of .sea format folder into dictionary + folder = args.input + if not os.path.exists(folder): + print("""Error: folder "%s" doesn't exist""" % folder) + tree = default_tree(args) + pos = folder.rfind("-") # pid of the process is encoded right in the name of the folder + tree["pid"] = int(folder[pos + 1:]) + folder = folder.replace("\\", "/").rstrip("/") + toplevel = next(os.walk(folder)) + for filename in toplevel[2]: + with open("/".join([folder, filename]), "r") as file: + if filename.endswith(".str"): # each string_handle_create writes separate file, name is the handle, content is the value + tree["strings"][int(filename.replace(".str", ""))] = file.readline() + elif filename.endswith(".tid"): # named thread makes record: name is the handle and content is the value + tree["threads"][filename.replace(".tid", "")] = file.readline() + elif filename.endswith(".pid"): # named groups (pseudo pids) makes record: group is the handle and content is the value + tree["groups"][filename.replace(".pid", "")] = file.readline() + for domain in toplevel[1]: # data from every domain gets recorded into separate folder which is named after the domain name + tree["domains"][domain] = {"files": []} + for file in next(os.walk("/".join([folder, domain])))[2]: # each thread of this domain has separate file with data + if not file.endswith(".sea"): + print("Warning: weird file found:", file) + continue + filename = file[:-4] + + tree["ring_buffer"] = tree["ring_buffer"] or ('-' in filename) + tid = int(filename.split("!")[0].split("-")[0]) + tree["cuts"].add(split_filename(filename)['cut']) + + tree["domains"][domain]["files"].append((tid, "/".join([folder, domain, file]))) + + def time_sort(item): + with open(item[1], "rb") as file: + tuple = read_chunk_header(file) + return tuple[0] + + tree["domains"][domain]["files"].sort(key=time_sort) + return tree + + +g_progress_interceptor = None +verbose_progress = True + +# FIXME: doesn't belong this file, move to 'utils' + + +class Progress: + def __init__(self, total, steps, message=""): + self.total = total + self.steps = steps + self.shown_steps = -1 + self.message = message + self.last_tick = None + + def __enter__(self): + return self + + def time_to_tick(self, interval=1): + return (datetime.now() - self.last_tick).total_seconds() > interval if self.last_tick else True + + def tick(self, current): + self.last_tick = datetime.now() + if g_progress_interceptor: + g_progress_interceptor(self.message, current, self.total) + if self.total: + self.show_progress(int(self.steps * current / self.total), float(current) / self.total) + + def show_progress(self, show_steps, percentage): + if self.shown_steps < show_steps: + if verbose_progress: + print('\r%s: %d%%' % (self.message, int(100*percentage)), end='') + sys.stdout.flush() + self.shown_steps = show_steps + + def __exit__(self, type, value, traceback): + if g_progress_interceptor: + g_progress_interceptor(self.message, self.total, self.total) + self.show_progress(self.steps, 1) + if verbose_progress: + print('\r%s: %d%%\n' % (self.message, 100)) + return False + + @staticmethod + def set_interceptor(interceptor, verbose_mode=False): + global g_progress_interceptor + global verbose_progress + g_progress_interceptor = interceptor + verbose_progress = verbose_mode + + +class PseudoProgress(Progress): + + def profiler(self, frame, event, arg): + if 'return' not in event: + return + cur_time = time.time() + if cur_time - self.time > 1: + self.time = cur_time + self.tick(cur_time) + + def __init__(self, message=""): + self.time = None + Progress.__init__(self, 0, 0, message) + self.old_profiler = sys.getprofile() + + def __enter__(self): + self.time = time.time() + sys.setprofile(self.profiler) + return self + + def __exit__(self, type, value, traceback): + sys.setprofile(self.old_profiler) + return Progress.__exit__(self, type, value, traceback) + + +def read_chunk_header(file): + chunk = file.read(10) # header of the record, see STinyRecord in Recorder.cpp + if not chunk: + return 0, 0, 0 + return struct.unpack('Qbb', chunk) + + +def transform(args): + message('info', "Transform: " + str(args)) + tree = sea_reader(args) # parse the structure + if args.cuts and args.cuts == ['all'] or not args.cuts: + return transform2(args, tree) + else: + result = [] + output = args.output[:] # deep copy + for current_cut in tree['cuts']: + if args.cuts and current_cut not in args.cuts: + continue + args.output = (output + "!" + current_cut) if current_cut else output + print("Cut #", current_cut if current_cut else "") + + def skip_fn(path): + filename = os.path.split(path)[1] + if current_cut: # read only those having this cut name in filename + if current_cut != split_filename(filename)['cut']: + return True + else: # reading those having not cut name in filename + if "!" in filename: + return True + return False + + result += transform2(args, tree, skip_fn) + args.output = output + return result + + +# FIXME: doesn't belong this file, move to Combiners or something + +TaskTypes = [ + "task_begin", "task_end", + "task_begin_overlapped", "task_end_overlapped", + "metadata_add", + "marker", + "counter", + "frame_begin", "frame_end", + "object_new", "object_snapshot", "object_delete", + "relation" +] + + +class TaskCombinerCommon: + def __init__(self, args, tree): + self.no_begin = [] # for the ring buffer case when we get task end but no task begin + self.time_bounds = [2 ** 64, 0] # left and right time bounds + self.tree = tree + self.args = args + self.domains = {} + self.prev_sample = 0 + self.total_memory = 0 + self.prev_memory = None + self.memcounters = {} + + def finish(self): + pass + + def __call__(self, fn, data): + domain = self.domains.setdefault(data['domain'], {'tasks': {}, 'counters': {}}) + thread = domain['tasks'].setdefault(data['tid'], {'byid': {}, 'stack': [], 'args': {}}) + + def get_tasks(id): + if not id: + return thread['stack'] + return thread['byid'].setdefault(id, []) + + def get_task(id): + if id: + tasks = get_tasks(id) + if not tasks: # they can be stacked + tasks = get_tasks(None) + if not tasks or ('id' not in tasks[-1]) or tasks[-1]['id'] != id: + return None + else: + tasks = get_tasks(None) + if tasks: + return tasks[-1] + else: + return None + + def find_task(id): + for thread_stacks in domain['tasks'].values(): # look in all threads + if (id in thread_stacks['byid']) and thread_stacks['byid'][id]: + return thread_stacks['byid'][id][-1] + else: + for item in thread_stacks['stack']: + if ('id' in item) and item['id'] == id: + return item + + def get_stack(tid): + stack = [] + for domain in self.domains.values(): + if tid not in domain['tasks']: + continue + thread = domain['tasks'][tid] + for byid in thread['byid'].values(): + stack += byid + if thread['stack']: + stack += thread['stack'] + stack.sort(key=lambda item: item['time']) + return stack + + def get_last_index(tasks, type): + if not len(tasks): + return None + index = len(tasks) - 1 + while index > -1 and tasks[index]['type'] != type: + index -= 1 + if index > -1: + return index + return None + + if fn == "task_begin" or fn == "task_begin_overlapped": + if not (('str' in data) or ('pointer' in data)): + data['str'] = 'Unknown' + self.time_bounds[0] = min(self.time_bounds[0], data['time']) + if 'delta' in data and data['delta']: # turbo mode, only begins are written + end = data.copy() + end['time'] = data['time'] + int(data['delta']) + self.time_bounds[1] = max(self.time_bounds[1], end['time']) + self.complete_task('task', data, end) # for now arguments are not supported in turbo tasks. Once argument is passed, task gets converted to normal. + else: + get_tasks(None if fn == "task_begin" else data['id']).append(data) + elif fn == "task_end" or fn == "task_end_overlapped": + self.time_bounds[1] = max(self.time_bounds[1], data['time']) + tasks = get_tasks(None if fn == "task_end" else data['id']) + index = get_last_index(tasks, data['type'] - 1) + if index is not None: + item = tasks.pop(index) + if self.task_postprocessor: + self.task_postprocessor.postprocess('task', item, data) + if not self.handle_special('task', item, data): + if data['time'] > item['time']: + self.complete_task('task', item, data) + else: + message('warning', 'Negative length task: %s => %s' % (str(item), str(data))) + else: + assert (self.tree["ring_buffer"] or self.tree['cuts']) + if 'str' in data: # nothing to show without name + self.no_begin.append(data) + elif fn == "frame_begin": + get_tasks(data['id'] if 'id' in data else None).append(data) + elif fn == "frame_end": + frames = get_tasks(data['id'] if 'id' in data else None) + index = get_last_index(frames, 7) + if index is not None: + item = frames.pop(index) + self.complete_task("frame", item, data) + else: + assert (self.tree["ring_buffer"] or self.tree['cuts']) + elif fn == "metadata_add": + if 'id' in data: + task = get_task(data['id']) + if task: + args = task.setdefault('args', {}) + else: + args = thread['args'].setdefault(data['id'], {}) + + args[data['str']] = data['delta'] if 'delta' in data else '0x0' + else: # global metadata + if not self.handle_special('meta', data, None): + self.global_metadata(data) + elif fn == "object_snapshot": + if 'args' in data: + args = data['args'].copy() + else: + args = {'snapshot': {}} + if 'data' in data: + state = data['data'] + for pair in state.split(","): + (key, value) = tuple(pair.split("=")) + args['snapshot'][key] = value + data['args'] = args + self.complete_task(fn, data, data) + elif fn in ["marker", "counter", "object_new", "object_delete"]: + if fn == "marker" and data['data'] == 'task': + markers = get_tasks("marker_" + (data['id'] if 'id' in data else "")) + if markers: + item = markers.pop() + item['type'] = 7 # frame_begin + item['domain'] += ".continuous_markers" + item['time'] += 1 + self.complete_task("frame", item, data) + markers.append(data) + else: + if ('id' in data) and (data['id'] in thread['args']): + data['args'] = thread['args'][data['id']] + del thread['args'][data['id']] + self.complete_task(fn, data, data) + elif fn == "relation": + self.relation( + data, + get_task(data['id'] if 'id' in data else None), + get_task(data['parent']) or find_task(data['parent']) + ) + else: + assert (not "Unsupported type:" + fn) + + def compress_counter(self, cache, data): + values = cache['values'] + if values and not data: + length = len(values) + avg_value = sum([value['delta'] for value in values]) / length + if cache['last'] != avg_value: + avg_time = int(sum([value['time'] for value in values]) / length) + self.process(values[0]['pid']).thread(values[0]['tid']).counter(values[0]['str']).set_value(avg_time, avg_value) + cache['last'] = avg_value + cache['values'] = [] + + def handle_special(self, kind, begin, end): + if self.sea_decoders: + for decoder in self.sea_decoders: + if decoder.handle_special(kind, begin, end): + return True + return False + + def flush_counters(self, domain, data): + for name, counter in domain['counters'].items(): + common_data = data.copy() + common_data['time'] = counter['begin'] + (counter['end'] - counter['begin']) / 2 + common_data['str'] = name + common_data['delta'] = sum(counter['values']) / len(counter['values']) + self.complete_task('counter', common_data, common_data) + + def flush_compressed_counters(self): + for pid, threads in self.memcounters.items(): + for tid, counters in threads.items(): + for name, counter in counters.items(): + self.compress_counter(counter, None) + + +def default_event_filer(cls, type, begin, end): + if begin['domain'] == 'Metal': + if 'FailureType' in begin['str']: + return None, None, None + return type, begin, end + + +class Callbacks(TaskCombinerCommon): + event_filter = default_event_filer + task_postprocessor = None + + def __init__(self, args, tree): + TaskCombinerCommon.__init__(self, args, tree) + self.callbacks = [] # while parsing we might have one to many 'listeners' - output format writers + self.stack_sniffers = [] # only stack listeners + self.allowed_pids = set() + self.processes = {} + self.tasks_from_samples = {} + self.on_finalize_callbacks = [] + + collection = global_storage('collection') + if 'targets' in collection: + self.allowed_pids = set(collection['targets']) + else: + self.allowed_pids = set() + self.tid_map = self.get_globals()['tid_map'] + if hasattr(self.args, 'user_input') and os.path.isdir(self.args.user_input): + tid_map = build_tid_map(self.args, self.args.user_input) + self.tid_map.update(tid_map) + self.allowed_pids |= set(tid_map.values()) + + for fmt in args.format: + self.callbacks.append(get_exporters()[fmt](args, tree)) + + if args.target: + if isinstance(args.target, list): + self.allowed_pids += args.target + else: + self.allowed_pids.add(int(args.target)) + + self.sea_decoders = [] + + self.globals = self.get_globals() + self.cpus = set() + self.all_cpus_started = os.path.isfile(self.args.user_input) or None + self.proc_names = {} + + @classmethod + def get_globals(cls): + return global_storage('Callbacks', { + 'starts': {}, 'ends': {}, 'dtrace': {'finished': False}, 'tid_map': {} + }) + + def add_stack_sniffer(self, sniffer): + self.stack_sniffers.append(sniffer) + + @classmethod + def set_event_filter(cls, filter): + prev = cls.event_filter + cls.event_filter = filter + return prev + + @classmethod + def set_task_postprocessor(cls, postprocessor): + cls.task_postprocessor = postprocessor + + def on_finalize(self, function): # will be called with callbacks(self) as the only argument + self.on_finalize_callbacks.append(function) + + def is_empty(self): + return 0 == len(self.callbacks) + + def __enter__(self): + [callback.__enter__() for callback in self.callbacks] + return self + + def __exit__(self, type, value, traceback): + self.finalize() + [callback.__exit__(type, value, traceback) for callback in self.callbacks] # emulating 'with' statement + return False + + def finalize(self): + for decoder in self.sea_decoders: + decoder.finalize() + for kind, data in self.tasks_from_samples.items(): + for pid, threads in data.items(): + for tid, tasks in threads.items(): + self.handle_stack(pid, tid, tasks.last_stack_time + TIME_SHIFT_FOR_GT * len(tasks) + 1, [], kind) + for function in self.on_finalize_callbacks: + function(self) + + if self.allowed_pids: + global_storage('collection').setdefault('targets', self.allowed_pids) + + self.finish() + + def on_event(self, type, data): + if self.event_filter: + type, data, end = self.event_filter(type, data, None) + if not type: + return False + + if not is_domain_enabled(data['domain']): + return False + + if data.get('internal_name', None) and not is_domain_enabled('%s.%s' % (data['domain'], data['internal_name'])): + return False + + self.__call__(type, data) + return True + + def complete_task(self, type, begin, end): + if self.event_filter: + type, begin, end = self.event_filter(type, begin, end) + if not type: + return False + if self.handle_special(type, begin, end): # returns True if event is consumed and doesn't require processing + return True + + if not is_domain_enabled(begin['domain']): + return False + + if end: + # copy here as handler can change the data for own good - this shall not affect other handlers + [callback.complete_task(type, begin.copy(), end.copy() if end else end) for callback in self.callbacks] + return True + else: + return False + + def global_metadata(self, data): + [callback.global_metadata(data.copy()) for callback in self.callbacks] + + def relation(self, data, head, tail): + for callback in self.callbacks: + callback.relation(data, head, tail) + + def get_result(self): + res = [] + for callback in self.callbacks: + res += callback.get_targets() + return res + + def check_time_in_cs_bounds(self, timestamp, statics={}): + if not statics: + globals = self.get_globals() + if not globals['dtrace']['finished'] or 'context_switch' not in self.globals['ends']: + return None + statics['start'] = globals['starts']['context_switch'] + statics['end'] = globals['ends']['context_switch'] + + return statics['start'] <= timestamp <= statics['end'] + + def get_pid(self, tid): + if tid in self.tid_map: + return self.tid_map[tid] + return None + + class Process: + def __init__(self, callbacks, pid, name): + self.callbacks = callbacks + self.pid = int(pid) + self.threads = {} + if name: + self.set_name(name) + + def set_name(self, name): + self.callbacks.set_process_name(self.pid, name) + + class Thread: + def __init__(self, process, tid, name): + self.process = process + self.tid = int(tid) + tid_map = self.process.callbacks.tid_map + if process.pid > 0 and self.tid > 0: + if self.tid not in tid_map: + tid_map[self.tid] = process.pid + elif tid_map[self.tid] != process.pid: + message('error', 'TID %d was part of PID %d and now PID %d... How come?' % (self.tid, tid_map[self.tid], process.pid)) + self.overlapped = {} + self.to_overlap = {} + self.task_stack = [] + self.task_pool = {} + self.snapshots = {} + self.lanes = {} + if name: + self.set_name(name) + self.process.callbacks.on_finalize(self.finalize) + + def auto_break_overlapped(self, call_data, begin): + id = call_data['id'] + if begin: + call_data['realtime'] = call_data['time'] # as we gonna change 'time' + call_data['lost'] = 0 + self.overlapped[id] = call_data + else: + if id in self.overlapped: + real_time = self.overlapped[id]['realtime'] + to_remove = [] + del self.overlapped[id] # the task has ended, removing it from the pipeline + time_shift = 0 + for begin_data in sorted(self.overlapped.values(), key=lambda data: data['realtime']): # finish all and start again to form melting task queue + time_shift += 1 # making sure the order of tasks on timeline, probably has to be done in Chrome code rather + end_data = begin_data.copy() # the end of previous part of task is also here + end_data['time'] = call_data['time'] - time_shift # new begin for every task is here + end_data['type'] = call_data['type'] + self.process.callbacks.on_event('task_end_overlapped', end_data) # finish it + if begin_data['realtime'] < real_time: + begin_data['lost'] += 1 + if begin_data['lost'] > 10: # we seem lost the end ETW call + to_remove.append(begin_data['id']) # main candidate is the event that started earlier but nor finished when finished the one started later + else: + begin_data['time'] = call_data['time'] + time_shift # new begin for every task is here + self.process.callbacks.on_event('task_begin_overlapped', begin_data) # and start again + for id in to_remove: # FIXME: but it's better somehow to detect never ending tasks and not show them at all or mark somehow + if id in self.overlapped: + del self.overlapped[id] # the task end was probably lost + else: + message('error', '???') + + def process_overlapped(self, threshold=100): + if not threshold or 0 != (len(self.to_overlap) % threshold): + return + keys = sorted(self.to_overlap)[0:threshold//2] + to_del = set() + for key in keys: + task = self.to_overlap[key] + if task.overlap_begin: + self.auto_break_overlapped(task.data, True) + self.process.callbacks.on_event("task_begin_overlapped", task.data) + task.overlap_begin = False + else: + end_data = task.data.copy() + end_data['time'] = key + end_data['type'] += 1 + self.auto_break_overlapped(end_data, False) + self.process.callbacks.on_event("task_end_overlapped", end_data) + to_del.add(key) + for key in to_del: + del self.to_overlap[key] + + def finalize(self, _): + self.process_overlapped(0) + + def set_name(self, name): + self.process.callbacks.set_thread_name(self.process.pid, self.tid, name) + + class EventBase: + def __init__(self, thread, name, domain, internal_name=None): + self.thread = thread + self.name = name + self.domain = domain + self.internal_name = internal_name + + class Counter(EventBase): + def __init__(self, *args): + Callbacks.Process.Thread.EventBase.__init__(self, *args) + + def set_value(self, time_stamp, value): + data = { + 'pid': self.thread.process.pid, 'tid': self.thread.tid, + 'domain': self.domain, 'str': self.name, + 'time': time_stamp, 'delta': value, 'type': 6, + 'internal_name': self.internal_name + } + self.thread.process.callbacks.on_event('counter', data) + + def set_multi_value(self, time_stamp, values_dict): # values_dict is name:value dictionary + data = { + 'pid': self.thread.process.pid, 'tid': self.thread.tid, + 'domain': self.domain, 'str': self.name, + 'time': time_stamp, 'args': values_dict, 'type': 6 + } + self.thread.process.callbacks.on_event('counter', data) + + def counter(self, name, domain='sea', internal_name=None): + return Callbacks.Process.Thread.Counter(self, name, domain, internal_name) + + class Marker(EventBase): + def __init__(self, thread, scope, name, domain): + Callbacks.Process.Thread.EventBase.__init__(self, thread, name, domain) + self.scope = scope + + def set(self, time_stamp, args=None): + data = { + 'pid': self.thread.process.pid, 'tid': self.thread.tid, + 'domain': self.domain, 'str': self.name, + 'time': time_stamp, 'type': 5, 'data': self.scope + } + if args is not None: + data.update({'args': args}) + + return self.thread.process.callbacks.on_event('marker', data) + + def marker(self, scope, name, domain='sea'): # scope is one of 'task', 'global', 'process', 'thread' + scopes = {'task': 'task', 'global': 'global', 'process': 'track_group', 'thread': 'track'} + return Callbacks.Process.Thread.Marker(self, scopes[scope], name, domain) + + class TaskBase(EventBase): + def __init__(self, type_id, type_name, thread, name, domain): + Callbacks.Process.Thread.EventBase.__init__(self, thread, name, domain) + self.data = None + self.args = {} + self.meta = {} + # These must be set in descendants! + self.event_type = type_id # first of types + self.event_name = type_name + self.overlap_begin = True + + def __begin(self, time_stamp, task_id, args, meta): + data = { + 'pid': self.thread.process.pid, 'tid': self.thread.tid, + 'domain': self.domain, 'str': self.name, + 'time': time_stamp, 'str': self.name, 'type': self.event_type + } + if task_id is not None: + data.update({'id': task_id}) + if args: + data.update({'args': args}) + if meta: + data.update(meta) + return data + + def begin(self, time_stamp, task_id=None, args={}, meta={}): + self.data = self.__begin(time_stamp, task_id, args, meta) + + if self.event_type == 2: # overlapped task + self.thread.auto_break_overlapped(self.data, True) + self.thread.process.callbacks.on_event("task_begin_overlapped", self.data) + return self + + def add_args(self, args): # dictionary is expected + self.args.update(args) + return self + + def add_meta(self, meta): # dictionary is expected + self.meta.update(meta) + return self + + def get_data(self): + return self.data + + def get_args(self): + args = self.data['args'].copy() + args.update(self.args) + return args + + def end(self, time_stamp): + assert self.data # expected to be initialized in self.begin call + if time_stamp: + end_data = self.data.copy() + end_data.update({'time': time_stamp, 'type': self.event_type + 1}) + if self.args: + if 'args' in end_data: + end_data['args'].update(self.args) + else: + end_data['args'] = self.args + if self.meta: + end_data.update(self.meta) + else: + end_data = None # special case when end is unknown and has to be calculated by viewer + + if self.event_type == 2: # overlapped task + self.thread.auto_break_overlapped(end_data, False) + self.thread.process.callbacks.on_event("task_end_overlapped", end_data) + else: + self.thread.process.callbacks.complete_task(self.event_name, self.data, end_data) + self.data = None + self.args = {} + self.meta = {} + + def complete(self, start_time, duration, task_id=None, args={}, meta={}): + begin_data = self.__begin(start_time, task_id, args, meta) + end_data = begin_data.copy() + end_data['time'] = start_time + duration + end_data['type'] = self.event_type + 1 + self.thread.process.callbacks.complete_task(self.event_name, begin_data, end_data) + return begin_data + + def end_overlap(self, time_stamp): + while self.data['time'] in self.thread.to_overlap: + self.data['time'] += 1 + self.thread.to_overlap[self.data['time']] = self + while time_stamp in self.thread.to_overlap: + time_stamp -= 1 + self.thread.to_overlap[time_stamp] = self + self.data['id'] = time_stamp + self.data['type'] = self.event_type = 2 + self.thread.process_overlapped() + + class Task(TaskBase): + def __init__(self, thread, name, domain, overlapped): + Callbacks.Process.Thread.TaskBase.__init__( + self, + 2 if overlapped else 0, + 'task', + thread, + name, domain + ) + self.relation = None + self.related_begin = None + + def end(self, time_stamp): + begin_data = self.data.copy() # expected to be initialized in self.begin call + Callbacks.Process.Thread.TaskBase.end(self, time_stamp) + self.__check_relation(begin_data) + + def __check_relation(self, begin): + if not self.relation: + return + if self.related_begin: # it's the later task, let's emit the relation + self.__emit_relation(begin, self.related_begin) + self.related_begin = None + else: # we store our begin in the related task and it will emit the relation on its end + self.relation.related_begin = begin + self.relation = None + + def __emit_relation(self, left, right): + relation = (left.copy(), right.copy(), left) + if 'realtime' in relation[1]: + relation[1]['time'] = relation[1]['realtime'] + if 'realtime' in relation[2]: + relation[2]['time'] = relation[2]['realtime'] + relation[0]['parent'] = left['id'] if 'id' in left else id(left) + self.thread.process.callbacks.relation(*relation) + + def complete(self, start_time, duration, task_id=None, args={}, meta={}): + begin_data = Callbacks.Process.Thread.TaskBase.complete(self, start_time, duration, task_id, args, meta) + self.__check_relation(begin_data) + + def relate(self, task): # relation is being written when last of two related tasks was fully emitted + if self.relation != task: + self.relation = task + task.relate(self) + + def end_overlap(self, time_stamp): + Callbacks.Process.Thread.TaskBase.end_overlap(self, time_stamp) + if self.relation: + self.__emit_relation(self.data, self.relation.data) + + def task(self, name, domain='sea', overlapped=False): + return Callbacks.Process.Thread.Task(self, name, domain, overlapped) + + class Frame(TaskBase): + def __init__(self, thread, name, domain): + Callbacks.Process.Thread.TaskBase.__init__(self, 7, 'frame', thread, name, domain) + + def frame(self, name, domain='sea'): + return Callbacks.Process.Thread.Frame(self, name, domain) + + class Lane: + def __init__(self, thread, name, domain): + self.thread, self.domain = thread, domain + self.name = '%s (%d):' % (name, thread.tid) + self.first_frame = None + self.id = hex(hash(self)) + self.thread.process.callbacks.on_finalize(self.finalize) + + def finalize(self, _): + if self.first_frame: + Callbacks.Process.Thread\ + .TaskBase(7, 'frame', self.thread, self.name, self.domain) \ + .begin(self.first_frame - 1000, self.id).end(None) # the open-ended frame (automatically closed by viewer) + + def frame_begin(self, time_stamp, name, args={}, meta={}): + if not self.first_frame or time_stamp < self.first_frame: + self.first_frame = time_stamp + return Callbacks.Process.Thread.TaskBase(7, 'frame', self.thread, name, self.domain).begin(time_stamp, self.id, args, meta) + + def lane(self, name, domain='sea'): + if name not in self.lanes: + self.lanes[name] = Callbacks.Process.Thread.Lane(self, name, domain) + return self.lanes[name] + + class Object(EventBase): + def __init__(self, thread, id, name, domain): + Callbacks.Process.Thread.EventBase.__init__(self, thread, name, domain) + self.id = id + if not self.thread.snapshots: + self.thread.snapshots = {'last_time': 0} + + def create(self, time_stamp): + data = { + 'pid': self.thread.process.pid, 'tid': self.thread.tid, + 'domain': self.domain, 'str': self.name, + 'time': time_stamp, 'type': 9, 'id': self.id + } + self.thread.process.callbacks.on_event("object_new", data) + return self + + def snapshot(self, time_stamp, args): + if time_stamp is None or time_stamp <= self.thread.snapshots['last_time']: + time_stamp = self.thread.snapshots['last_time'] + 1 + self.thread.snapshots['last_time'] = time_stamp + data = { + 'pid': self.thread.process.pid, 'tid': self.thread.tid, + 'domain': self.domain, 'str': self.name, + 'time': time_stamp, 'type': 10, 'id': self.id, + 'args': {'snapshot': args} + } + self.thread.process.callbacks.on_event("object_snapshot", data) + return self + + @staticmethod # use to prepare argument for 'snapshot' call, only png in base64 string is supported by chrome + def create_screenshot_arg(png_base64): + return {'screenshot': png_base64} + + def destroy(self, time_stamp): + data = { + 'pid': self.thread.process.pid, 'tid': self.thread.tid, + 'domain': self.domain, 'str': self.name, + 'time': time_stamp, 'type': 11, 'id': self.id + } + self.thread.process.callbacks.on_event("object_delete", data) + + def object(self, id, name, domain='sea'): + return Callbacks.Process.Thread.Object(self, id, name, domain) + + def thread(self, tid, name=None): + if tid not in self.threads: + self.threads[tid] = Callbacks.Process.Thread(self, tid, name) + return self.threads[tid] + + def process(self, pid, name=None): + if pid not in self.processes: + self.processes[pid] = Callbacks.Process(self, pid, name) + return self.processes[pid] + + def vsync(self, time_stamp, args={}, statics={}): + if not statics: + statics['marker'] = self.process(-1).thread(-1, 'VSYNC').marker('thread', 'vblank', 'gpu') + args.update({'AbsTime': time_stamp}) + statics['marker'].set(time_stamp, args) + + def context_switch(self, time_stamp, cpu, prev_tid, next_tid, prev_name='', next_name='', prev_state='S', prev_prio=0, next_prio=0): + if cpu not in self.cpus: + self.cpus.add(cpu) + all_cpus_started = max(self.cpus) + 1 == len(self.cpus) + if self.all_cpus_started != all_cpus_started: + self.globals['starts']['context_switch'] = time_stamp + self.all_cpus_started = all_cpus_started + if not self.all_cpus_started: + return + self.globals['ends']['context_switch'] = time_stamp + for callback in self.callbacks: + callback.context_switch( + time_stamp, cpu, + { + 'tid': prev_tid, + 'name': prev_name.replace(' ', '_'), + 'state': prev_state, + 'prio': prev_prio, + }, + { + 'tid': next_tid, + 'prio': next_prio, + 'name': next_name.replace(' ', '_') + } + ) + + def wakeup(self, time_stamp, cpu, prev_pid, prev_tid, next_pid, next_tid, prev_name='', next_name='', sync_prim='', sync_prim_addr=None): + if prev_pid not in self.allowed_pids and next_pid not in self.allowed_pids: + return False + + args = {'target': next_tid, 'type': sync_prim, 'addr': sync_prim_addr} if sync_prim_addr else {} + args.update({'target': next_tid, 'by': prev_tid}) + event_width = 2000 + from_task = self.process(prev_pid).thread(prev_tid).task('wakes').begin(time_stamp - event_width, args=args) + to_task = self.process(next_pid).thread(next_tid).task('woken').begin(time_stamp, args=args) + from_task.relate(to_task) + from_task.end(time_stamp - event_width/2) + to_task.end(time_stamp + event_width/2) + + for callback in self.callbacks: + callback.wakeup( + time_stamp, cpu, + { + 'pid': prev_pid, + 'tid': prev_tid, + 'name': prev_name.replace(' ', '_') + }, + { + 'pid': next_pid, + 'tid': next_tid, + 'name': next_name.replace(' ', '_') + } + ) + + def get_process_name(self, pid): + return self.proc_names[pid] if pid in self.proc_names else None + + def set_process_name(self, pid, name, labels=[]): + order = -1 if pid in self.allowed_pids else pid + if pid not in self.proc_names: + self.proc_names[pid] = [name] + self.__call__("metadata_add", {'domain': 'IntelSEAPI', 'str': '__process__', 'pid': pid, 'tid': -1, 'delta': order, 'data': name, 'labels': labels}) + elif name not in self.proc_names[pid]: + self.proc_names[pid].append(name) + full_name = '->'.join(self.proc_names[pid]) + self.__call__("metadata_add", {'domain': 'IntelSEAPI', 'str': '__process__', 'pid': pid, 'tid': -1, 'delta': order, 'data': full_name, 'labels': labels}) + message('warning', 'Pid %d name changed: %s' % (pid, full_name)) + + def set_thread_name(self, pid, tid, name): + self.__call__("metadata_add", {'domain': 'IntelSEAPI', 'str': '__thread__', 'pid': pid, 'tid': tid, 'data': '%s (%d)' % (name, tid), 'delta': tid}) + + def add_metadata(self, name, data): + self.__call__("metadata_add", {'domain': 'IntelSEAPI', 'data': data, 'str': name, 'tid': None}) + + class AttrDict(dict): + pass # native dict() refuses setattr call + + def handle_stack(self, pid, tid, time, stack, kind='sampling'): + use_lanes = False + + tasks = self.tasks_from_samples.setdefault(kind, {}).setdefault(pid, {}).setdefault(tid, self.AttrDict()) + tasks.last_stack_time = time + to_remove = [] + + if not use_lanes: + pid = -pid if pid > 100 else pid + tid = -tid + + # Find currently present tasks: + present = set() + for frame in stack: + ptr = frame['ptr'] + if not frame['str']: + frame['str'] = '0x%x' % ptr + else: + frame['str'] = '%s(0x%x)' % (frame['str'], ptr) + present.add(ptr) + + # Remove currently absent tasks (they are finished): + for ptr in tasks: + if ptr not in present: + to_remove.append(ptr) + + to_add = [] + # Find affected tasks, those to the right of most recent of removed. These affected are to be 'restarted' + if to_remove: + leftmost_time = min(tasks[ptr]['begin'] for ptr in to_remove) + for ptr, task in tasks.items(): + if task['begin'] > leftmost_time and ptr not in to_remove: + to_remove.append(ptr) + to_add.append(task.copy()) + + # Actual removal of the tasks with flushing them to timeline: + to_remove.sort(key=lambda ptr: tasks[ptr]['begin']) + shift = 1 + if use_lanes: + lane = self.process(pid).thread(tid).lane(kind) #TODO: implement proper lane frames + else: + thread = self.process(pid).thread(tid) + for ptr in to_remove: + task = tasks[ptr] + end_time = time - TIME_SHIFT_FOR_GT * shift + if end_time <= task['begin']: # this might happen on finalization and with very deep stack + continue + args = {'module': task['module'].replace('\\', '/')} + if '__file__' in task and '__line__' in task: + args.update({ + 'pos': '%s(%d)' % (task['__file__'], int(task['__line__'])) + }) + if use_lanes: + lane.frame_begin( + task['begin'], task['str'], args=args, meta={'sampled': True} + ).end(end_time) + else: + if kind in ['sampling', 'ustack'] or (pid == 0 and kind == 'kstack'): # temporary workaround for OSX case where there are three stacks + thread.task(task['str']).begin(task['begin'], args=args, meta={'sampled': True}).end(end_time) + del tasks[ptr] + shift += 1 + + # pre-sort restarted tasks by their initial time to keep natural order + to_add.sort(key=lambda task: task['begin']) + + # Add new tasks to the end of the list + for frame in reversed(stack): # Frames originally come in reverse order [bar, foo, main] + if frame['ptr'] not in tasks: + to_add.append(frame.copy()) + + # Actual adding of tasks: + shift = 1 + for task in to_add: + task['begin'] = time + TIME_SHIFT_FOR_GT * shift + tasks[task['ptr']] = task + shift += 1 + + for callback in self.callbacks + self.stack_sniffers: + callback.handle_stack({'pid': pid, 'tid': tid, 'time': time}, stack, kind) + + + +# example: +# +# the_thread = callbacks.process(-1).thread(-1) +# counter = the_thread.counter(domain='mydomain', name='countername') +# for i in range(5): +# counter.set_value(time_stamp=%timestamp%, value=i) +# task = the_thread.task('MY_TASK') # same with frames +# for i in range(7): +# task.begin(%timestamp%) +# task.add_args({'a':1, 'b':'2'}) +# task.end(%timestamp%) + +# FIXME: doesn't belong this file, move to 'SEA reader' or something + + +class FileWrapper: + def __init__(self, path, args, tree, domain, tid): + self.args = args + self.tree = tree + self.domain = domain + self.tid = tid + self.next_wrapper = None + self.file = open(path, "rb") + self.record = self.read() + + def __del__(self): + self.file.close() + + def next(self): + self.record = self.read() + + def get_record(self): + return self.record + + def get_pos(self): + return self.file.tell() + + def get_size(self): + return os.path.getsize(self.file.name) + + def get_path(self): + return self.file.name + + def read(self): + call = {"tid": self.tid, "pid": self.tree["pid"], "domain": self.domain} + + tuple = read_chunk_header(self.file) + if tuple == (0, 0, 0): # mem mapping wasn't trimmed on close, zero padding goes further + return None + call["time"] = tuple[0] + + assert (tuple[1] < len(TaskTypes)) # sanity check + call["type"] = tuple[1] + + flags = tuple[2] + if flags & 0x1: # has id + chunk = self.file.read(2 * 8) + call["id"] = struct.unpack('QQ', chunk)[0] + if flags & 0x2: # has parent + chunk = self.file.read(2 * 8) + call["parent"] = struct.unpack('QQ', chunk)[0] + if flags & 0x4: # has string + chunk = self.file.read(8) + str_id = struct.unpack('Q', chunk)[0] # string handle + call["str"] = self.tree["strings"][str_id] + if flags & 0x8: # has tid, that differs from the calling thread (virtual tracks) + chunk = self.file.read(8) + call["tid"] = int(struct.unpack('q', chunk)[0]) + + if flags & 0x10: # has data + chunk = self.file.read(8) + length = struct.unpack('Q', chunk)[0] + call["data"] = self.file.read(length).decode() + + if flags & 0x20: # has delta + chunk = self.file.read(8) + call["delta"] = struct.unpack('d', chunk)[0] + + if flags & 0x40: # has pointer + chunk = self.file.read(8) + ptr = struct.unpack('Q', chunk)[0] + if not resolve_pointer(self.args, self.tree, ptr, call): + call["pointer"] = ptr + + if flags & 0x80: # has pseudo pid + chunk = self.file.read(8) + call["pid"] = struct.unpack('q', chunk)[0] + + return call + + def set_next(self, wrapper): + self.next_wrapper = wrapper + + def get_next(self): + return self.next_wrapper + + +def transform2(args, tree, skip_fn=None): + with Callbacks(args, tree) as callbacks: + if callbacks.is_empty(): + return callbacks.get_result() + + wrappers = {} + for domain, content in tree["domains"].items(): # go thru domains + for tid, path in content["files"]: # go thru per thread files + parts = split_filename(path) + + file_wrapper = FileWrapper(path, args, tree, domain, tid) + if file_wrapper.get_record(): # record is None if something wrong with file reading + wrappers.setdefault(parts['dir'] + '/' + parts['name'], []).append(file_wrapper) + + for unordered in wrappers.values(): # chain wrappers by time + ordered = sorted(unordered, key=lambda wrapper: wrapper.get_record()['time']) + prev = None + for wrapper in ordered: + if prev: + prev.set_next(wrapper) + prev = wrapper + + files = [] + for unordered in wrappers.values(): + for wrapper in unordered: + next = wrapper.get_next() + if skip_fn and skip_fn(wrapper.get_path()): # for "cut" support + continue + files.append(wrapper) + + if verbose_level() > verbose_level('warning'): + progress = DummyWith() + else: + size = sum([file.get_size() for file in files]) + progress = Progress(size, 50, 'Converting: %s (%s)' % (os.path.basename(args.input), format_bytes(size))) + + with progress: + count = 0 + while True: # records iteration + record = None + earliest = None + for file in files: + rec = file.get_record() + if not rec: # finished + continue + if not record or rec['time'] < record['time']: + record = rec + earliest = file + if not record: # all finished + break + earliest.next() + + if message('info', "%d\t%s\t%s" % (count, TaskTypes[record['type']], record)): + pass + elif count % ProgressConst == 0: + progress.tick(sum([file.get_pos() for file in files])) + callbacks.on_event(TaskTypes[record['type']], record) + count += 1 + + callbacks("metadata_add", {'domain': 'IntelSEAPI', 'str': '__process__', 'pid': tree["pid"], 'tid': -1, 'delta': -1}) + for pid, name in tree['groups'].items(): + callbacks.set_process_name(tree["pid"], name) + + return callbacks.get_result() + + +# FIXME: doesn't belong this file, move to 'utils' + +def get_module_by_ptr(tree, ptr): + keys = list(tree['modules'].keys()) + keys.sort() # looking for first bigger the address, previous is the module we search for + item = keys[0] + for key in keys[1:]: + if key > ptr: + break + item = key + module = tree['modules'][item] + if item < ptr < item + int(module[1]): + return item, module[0] + else: + return None, None + + +def win_parse_symbols(symbols): + sym = [] + for line in symbols.split('\n'): + line = line.strip() + if not line: + continue + if '\t' in line: + parts = line.strip().split('\t') + addr, size, name = parts[:3] + if int(size): + sym.append({'addr': int(addr), 'size': int(size), 'name': name}) + if len(parts) == 4: + sym[-1].update({'pos': parts[3]}) + sym.sort(key=lambda data: data['addr']) + return sym + + +def win_resolve(symbols, addr): + idx = bisect_right(symbols, addr, lambda data: data['addr']) - 1 + if idx > -1: + sym = symbols[idx] + if sym['addr'] <= addr <= (sym['addr'] + sym['size']): + return (sym['pos'] + '\n' + sym['name']) if 'pos' in sym else sym['name'] + return '' + + +def resolve_cmd(args, path, load_addr, ptr, cache={}): + if sys.platform == 'win32': + if path.startswith('\\'): + path = 'c:' + path + if path.lower() in cache: + return win_resolve(cache[path.lower()], ptr - load_addr) + bitness = '32' if '32' in platform.architecture()[0] else '64' + executable = os.path.sep.join([args.bindir, 'TestIntelSEAPI%s.exe' % bitness]) + cmd = '"%s" "%s"' % (executable, path) + elif sys.platform == 'darwin': + cmd = 'atos -o "%s" -l %s %s' % (path, to_hex(load_addr), to_hex(ptr)) + elif 'linux' in sys.platform: + cmd = 'addr2line %s -e "%s" -i -p -f -C' % (to_hex(ptr), path) + else: + assert (not "Unsupported platform!") + + env = dict(os.environ) + if "INTEL_SEA_VERBOSE" in env: + del env["INTEL_SEA_VERBOSE"] + + try: + proc = subprocess.Popen(cmd, shell=True, stdout=subprocess.PIPE, stderr=subprocess.PIPE, env=env) + (symbol, err) = proc.communicate() + except IOError: + err = traceback.format_exc() + import gc + gc.collect() + print("gc.collect()") + except: + err = traceback.format_exc() + if err: + print(cmd) + print(err) + return '' + + if sys.platform == 'win32': + cache[path.lower()] = win_parse_symbols(symbol.decode()) + return win_resolve(cache[path.lower()], ptr - load_addr) + return symbol + + +# finds first bigger +def bisect_right(array, value, key=lambda item: item): #upper_bound, dichotomy, binary search + lo = 0 + hi = len(array) + while lo < hi: + mid = (lo + hi) // 2 + if value < key(array[mid]): + hi = mid + else: + lo = mid + 1 + return lo + + +def resolve_jit(tree, ptr, cache): + if 'jit' not in tree: + return False + jit = tree['jit'] + if jit['start'] <= ptr <= jit['end']: + jit_data = jit['data'] + idx = bisect_right(jit_data, ptr, lambda item: item['addr']) - 1 + if idx > -1: + offset = ptr - jit_data[idx]['addr'] + if offset > jit_data[idx]['size']: + return False + cache[ptr] = {'module': 'jit'} + cache[ptr]['str'] = jit_data[idx]['name'] + if not cache[ptr]['str']: + cache[ptr]['str'] = 'jit_method_%d' % jit_data[idx]['id'] + cache[ptr]['__file__'] = jit_data[idx]['file'] + lines = jit_data[idx]['lines'] + idx = bisect_right(lines, offset, lambda item: item[0]) - 1 + if idx > -1: + cache[ptr]['__line__'] = lines[idx][1] + return True + else: + return False + + +def resolve_pointer(args, tree, ptr, call, cache={}): + if ptr not in cache: + if not resolve_jit(tree, ptr, cache): + (load_addr, path) = get_module_by_ptr(tree, ptr) + if path is None or not os.path.exists(path): + cache[ptr] = None + else: + symbol = resolve_cmd(args, path, load_addr, ptr) + cache[ptr] = {'module': path} + lines = symbol.splitlines() + if lines: + if sys.platform == 'win32': + if len(lines) == 1: + cache[ptr]['str'] = lines[0] + elif len(lines) == 2: + cache[ptr]['str'] = lines[1] + (cache[ptr]['__file__'], cache[ptr]['__line__']) = lines[0].rstrip(")").rsplit("(", 1) + elif sys.platform == 'darwin': + if '(in' in lines[0]: + parts = lines[0].split(" (in ") + cache[ptr]['str'] = parts[0] + if ') (' in parts[1]: + (cache[ptr]['__file__'], cache[ptr]['__line__']) = parts[1].split(') (')[1].split(':') + cache[ptr]['__line__'] = cache[ptr]['__line__'].strip(')') + else: + if ' at ' in lines[0]: + (cache[ptr]['str'], fileline) = lines[0].split(' at ') + (cache[ptr]['__file__'], cache[ptr]['__line__']) = fileline.strip().split(':') + if not cache[ptr] or 'str' not in cache[ptr]: + return False + call.update(cache[ptr]) + return True + + +def resolve_stack(args, tree, data): + if tree['process']['bits'] == 64: + frames = struct.unpack('Q' * (len(data) / 8), data) + else: + frames = struct.unpack('I' * (len(data) / 4), data) + stack = [] + for frame in frames: + res = {'ptr': frame} + if resolve_pointer(args, tree, frame, res): + stack.append(res) + return stack + + +def attachme(): + print("Attach me!") + while not sys.gettrace(): + pass + import time + time.sleep(1) + + +class TaskCombiner: + not_implemented_err_string = 'You must implement this method in the TaskCombiner derived class!' + + def __enter__(self): + return self + + def __exit__(self, type, value, traceback): + self.finish() + return False + + def __init__(self, args, tree): + self.tree = tree + self.args = args + self.event_map = {} + self.events = [] + + (self.source_scale_start, self.target_scale_start, self.ratio) = tuple([0, 0, 1. / 1000]) # nanoseconds to microseconds + + def get_targets(self): + """Returns list with the paths to output files.""" + raise NotImplementedError(TaskCombiner.not_implemented_err_string) + + def complete_task(self, type, begin, end): + """ + Handles task to the derived class output format. + + Args: + type: Task type. + begin: Dictionary with task begin data. + end: Dictionary with task end data. + """ + raise NotImplementedError(TaskCombiner.not_implemented_err_string) + + def finish(self): + """Called to finalize a derived class.""" + raise NotImplementedError(TaskCombiner.not_implemented_err_string) + + def convert_time(self, time): + return (time - self.source_scale_start) * self.ratio + self.target_scale_start + + def global_metadata(self, data): + pass + + def relation(self, data, head, tail): + pass + + def handle_stack(self, task, stack, name='stack'): + pass + + def context_switch(self, time, cpu, prev, next): + """ + Called to process context switch events on CPU. + :param cpu: CPU number (int) + :param prev: previous task description (dict. Example: {'tid': 2935135, 'state': 'S', 'name': u'vtplay', 'prio': 31}) + :param next: next task description (dict. see above) + """ + pass + + def wakeup(self, time, cpu, prev, next): + """ + Called to process thread wakup events on CPU. + :param cpu: CPU on which the event occurred + :param prev: currently running process description for CPU (dict. Example: {'tid': 123, 'name': 'kuku', 'pid': 12}) + :param next: thread being woken up (dict. see above) + """ + pass + + +def to_hex(value): + return "0x" + hex(value).rstrip('L').replace("0x", "").upper() + + +def get_name(begin): + if 'str' in begin: + return begin['str'] + elif 'pointer' in begin: + return "func<" + to_hex(begin['pointer']) + ">" + else: + return "" + + +def get_filter_path(): + filter = os.environ.get('INTEL_SEA_FILTER') + if filter: + filter = subst_env_vars(filter) + else: + filter = os.path.join(UserProfile, '.isea_domains.fltr') + return filter + + +def is_domain_enabled(domain, default=True): + domains = global_storage('sea.is_domain_enabled', {}) + if not domains: + filter = get_filter_path() + try: + with open(filter) as file: + for line in file: + enabled = not line.startswith('#') + name = line.strip(' #\n\r') + domains[name] = enabled + if not enabled: + message('warning', 'The domain "%s" is disabled in %s' % (name, filter)) + except IOError: + pass + if domain not in domains: + domains[domain] = default + return domains[domain] + + +def save_domains(): + domains = global_storage('sea.is_domain_enabled', {}) + + filter = get_filter_path() + print("Saving domains:", filter) + + with open(filter, 'w') as file: + for key, value in domains.items(): + file.write('%s%s\n' % ('#' if not value else '', key)) + + +class GraphCombiner(TaskCombiner): + def __init__(self, args, tree): + TaskCombiner.__init__(self, args, tree) + self.args = args + self.per_domain = {} + self.relations = {} + self.threads = set() + self.per_thread = {} + + @staticmethod + def get_name_ex(begin): + name = get_name(begin) + if ':' in name: + parts = name.split(':') + if parts[1].isdigit(): + return parts[0] + return name + + def get_per_domain(self, domain): + return self.per_domain.setdefault(domain, { + 'counters': {}, 'objects': {}, 'frames': {}, 'tasks': {}, 'markers': {}, 'threads': {} + }) + + def complete_task(self, type, begin, end): + if 'sampled' in begin and begin['sampled']: + return + tid = begin['tid'] if 'tid' in begin else None + self.threads.add(tid) + domain = self.get_per_domain(begin['domain']) + if type == 'task': + task = domain['tasks'].setdefault(self.get_name_ex(begin), {'time': []}) + task['time'].append(end['time'] - begin['time']) + if '__file__' in begin: + task['src'] = begin['__file__'] + ":" + begin['__line__'] + + if begin['type'] == 0: # non-overlapped only + # We expect parents to be reported in the end order (when the end time becomes known) + orphans = self.per_thread.setdefault(begin['tid'], []) + left_index = bisect_right(orphans, begin['time'], lambda orphan: orphan[0]['time']) # first possible child + right_index = bisect_right(orphans, end['time'], lambda orphan: orphan[0]['time']) - 1 # last possible child + for i in range(right_index, left_index - 1, -1): # right to left to be able deleting from array + orphan = orphans[i] + if orphan[1]['time'] < end['time']: # a parent is found! + self.add_relation({ + 'label': 'calls', 'from': self.make_id(begin['domain'], self.get_name_ex(begin)), + 'to': self.make_id(orphan[0]['domain'], self.get_name_ex(orphan[0]))}) + del orphans[i] + orphans.insert(left_index, (begin, end)) + else: + self.add_relation({'label': 'executes', 'from': self.make_id("threads", str(tid)), + 'to': self.make_id(begin['domain'], self.get_name_ex(begin)), 'color': 'gray'}) + elif type == 'marker': + domain['markers'].setdefault(begin['str'], []) + elif type == 'frame': + pass + elif type == 'counter': + if 'delta' in begin: + domain['counters'].setdefault(begin['str'], []).append(begin['delta']) + else: + return # TODO: add multi-value support + elif 'object' in type: + if 'snapshot' in type: + return + objects = domain['objects'].setdefault(begin['str'], {}) + object = objects.setdefault(begin['id'], {}) + if 'new' in type: + object['create'] = begin['time'] + elif 'delete' in type: + object['destroy'] = begin['time'] + else: + message('message', "Unhandled: " + type) + + def finish(self): + for tid, orphans in self.per_thread.items(): + last_time = 0 + for orphan in orphans: + if (orphan[1]['time'] < last_time): + print("FIXME: orphan[1]['time'] < last_time") + last_time = orphan[1]['time'] + begin = orphan[0] + self.add_relation({'label': 'executes', 'from': self.make_id("threads", str(tid)), + 'to': self.make_id(begin['domain'], self.get_name_ex(begin)), 'color': 'gray'}) + + @staticmethod + def make_id(domain, name): + import re + res = "%s_%s" % (domain, name) + return re.sub("[^a-z0-9]", "_", res.lower()) + + def relation(self, data, head, tail): + if head and tail: + self.add_relation({'label': self.get_name_ex(data), 'from': self.make_id(head['domain'], self.get_name_ex(head)), 'to': self.make_id(tail['domain'], self.get_name_ex(tail)), 'color': 'red'}) + + def add_relation(self, relation): + key = frozenset(relation.items()) + if key in self.relations: + return + self.relations[key] = relation + + def handle_stack(self, task, stack, name='stack'): + tid = abs(task['tid']) if 'tid' in task else None + self.threads.add(tid) + parent = None + for frame in reversed(stack): + domain = self.get_per_domain(frame['module']) + name = frame['str'].split('+')[0] + domain['tasks'].setdefault(name, {'time': [0]}) + if parent: + self.add_relation({'label': 'calls', 'from': self.make_id(parent['module'], parent['name']), 'to': self.make_id(frame['module'], name)}) + else: + self.add_relation({'label': 'executes', 'from': self.make_id("threads", str(tid)), 'to': self.make_id(frame['module'], name), 'color': 'gray'}) + parent = frame.copy() + parent.update({'name': name}) + + +class Collector: + def __init__(self, args): + self.args = args + + @classmethod + def set_output(cls, output): # has to be object supporting 'write' method + global_storage('log')['file'] = output + + @classmethod + def get_output(cls, statics = {}): + log = global_storage('log') + if not log: + args = get_args() + log_name = datetime.now().strftime('sea_%Y_%m_%d__%H_%M_%S.log') + if args: + log_path = subst_env_vars(args.output) + if os.path.isfile(log_path): + log_path = os.path.dirname(log_path) + ensure_dir(log_path, False) + if 'tempfile' in statics: + statics['tempfile'].close() + if os.path.dirname(statics['tempfile'].name) != log_path: + shutil.copy(statics['tempfile'].name, log_path) + del statics['tempfile'] + else: + if 'tempfile' in statics: + return statics['tempfile'] + log_path = (tempfile.gettempdir() if sys.platform == 'win32' else '/tmp') + log_file = os.path.join( + log_path, + log_name + ) + print("For execution details see:", log_file) + if args: + cls.set_output(open(log_file, 'a')) + else: + statics['tempfile'] = open(log_file, 'a') + return statics['tempfile'] + return log['file'] + + @classmethod + def log(cls, msg, stack=False): + assert type(stack) is bool # to avoid "log" function being misused as "print" where comma allows more args + msg = msg.strip() + cut = '\n' + '-' * 100 + '\n' + msg = cut + msg + '\n\n' + (''.join(traceback.format_stack()[:-1]) if stack else '') + cut + output = cls.get_output() + output.write(msg + '\n') + output.flush() + + @classmethod + def execute(cls, cmd, log=True, **kwargs): + start_time = time.time() + if 'stdout' not in kwargs: + kwargs['stdout'] = subprocess.PIPE + if 'stderr' not in kwargs: + kwargs['stderr'] = subprocess.PIPE + if 'env' not in kwargs: + kwargs['env'] = get_original_env() + if sys.version[0] == '3': + kwargs['encoding'] = 'utf8' + + (out, err) = subprocess.Popen(cmd, shell=True, **kwargs).communicate() + if log: + cls.log("\ncmd:\t%s:\nout:\t%s\nerr:\t%s\ntime: %s" % (cmd, str(out).strip(), str(err).strip(), str(timedelta(seconds=(time.time() - start_time)))), stack=True if err else False) + if verbose_level() == verbose_level('info'): + print("\n\n -= '%s' output =- {\n" % cmd) + print(out.strip() if out else '') + print("\n", "-" * 50, "\n") + print(err.strip() if err else '') + print("\n}\n\n") + return out, err + + @classmethod + def execute_detached(cls, cmd, **kwargs): + cls.log("\nDetached:\t%s" % cmd) + if sys.platform == 'win32': + DETACHED_PROCESS = 0x00000008 + CREATE_NEW_PROCESS_GROUP = 0x00000200 + CREATE_NO_WINDOW = 0x08000000 + info = subprocess.STARTUPINFO() + info.dwFlags = subprocess.STARTF_USESHOWWINDOW + info.wShowWindow = 0 # SW_HIDE + subprocess.Popen(cmd, shell=True, startupinfo=info, stdin=None, stdout=None, stderr=None, creationflags=(CREATE_NO_WINDOW | CREATE_NEW_PROCESS_GROUP), **kwargs) + else: + subprocess.Popen(cmd, shell=True, stdin=None, stdout=None, stderr=None, **kwargs) + + def start(self): + raise NotImplementedError('Collector.start is not implemented!') + + def stop(self, wait=True): + raise NotImplementedError('Collector.stop is not implemented!') + + @classmethod + def detect_instances(cls, what): + instances = [] + cmd = 'where' if sys.platform == 'win32' else 'which' + (out, err) = cls.execute('%s %s' % (cmd, what)) + out = out.decode() if hasattr(out, 'decode') else out + if err: + return instances + for line in out.split('\n'): + line = line.strip() + if line: + instances.append(line) + return instances + + +if __name__ == "__main__": + start_time = time.time() + main() + elapsed = time.time() - start_time + print("Time Elapsed:", str(timedelta(seconds=elapsed)).split('.')[0]) diff --git a/thirdparty/itt_collector/sea_itt_lib/CMakeLists.txt b/thirdparty/itt_collector/sea_itt_lib/CMakeLists.txt new file mode 100644 index 00000000000000..b3a83db2cebb3e --- /dev/null +++ b/thirdparty/itt_collector/sea_itt_lib/CMakeLists.txt @@ -0,0 +1,48 @@ +# ****************************************************************************** +# Copyright 2017-2021 Intel Corporation +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +# ****************************************************************************** + +set(TARGET_NAME sea_itt_lib) + +set(CMAKE_DEBUG_POSTFIX "") +set(CMAKE_RELEASE_POSTFIX "") + +if (WIN32) + add_custom_command(OUTPUT "${PROJECT_BINARY_DIR}/IntelSEAPI.rc" "${PROJECT_BINARY_DIR}/IntelSEAPI.h" + PRE_BUILD + COMMAND mc -um ${CMAKE_CURRENT_SOURCE_DIR}/IntelSEAPI.man -h ${PROJECT_BINARY_DIR} -r ${PROJECT_BINARY_DIR} + DEPENDS ${CMAKE_CURRENT_SOURCE_DIR}/IntelSEAPI.man + MAIN_DEPENDENCY IttNotifyStdSrc.h + COMMENT "Generating ${PROJECT_BINARY_DIR}/IntelSEAPI.rc ${PROJECT_BINARY_DIR}/IntelSEAPI.h" + ) +endif() +file(GLOB_RECURSE SOURCES "*.cpp" "*.h") + +add_library(${TARGET_NAME} SHARED ${SOURCES}) + +target_include_directories(${TARGET_NAME} PRIVATE ${CMAKE_CURRENT_SOURCE_DIR} + ${PROJECT_BINARY_DIR}) +set_target_properties(${TARGET_NAME} PROPERTIES OUTPUT_NAME IntelSEAPI) + +target_link_libraries(${TARGET_NAME} PRIVATE ittnotify) + +if(UNIX) + target_link_libraries(${TARGET_NAME} PRIVATE dl) + target_compile_options(${TARGET_NAME} PRIVATE -Wno-undef -Wno-deprecated-declarations -Wno-multichar) +elseif(WIN32) + target_link_libraries(${TARGET_NAME} PRIVATE Dbghelp) +endif() + +add_cpplint_target(${TARGET_NAME}_cpplint FOR_TARGETS ${TARGET_NAME}) diff --git a/thirdparty/itt_collector/sea_itt_lib/IntelSEAPI.man b/thirdparty/itt_collector/sea_itt_lib/IntelSEAPI.man new file mode 100644 index 00000000000000..042203d6610713 Binary files /dev/null and b/thirdparty/itt_collector/sea_itt_lib/IntelSEAPI.man differ diff --git a/thirdparty/itt_collector/sea_itt_lib/IttNotifyStdSrc.cpp b/thirdparty/itt_collector/sea_itt_lib/IttNotifyStdSrc.cpp new file mode 100644 index 00000000000000..890c084d4edce5 --- /dev/null +++ b/thirdparty/itt_collector/sea_itt_lib/IttNotifyStdSrc.cpp @@ -0,0 +1,1892 @@ +/********************************************************************************************************************************************************************************************************************************************************************************************* +# Intel® Single Event API +# +# This file is provided under the BSD 3-Clause license. +# Copyright (c) 2021, Intel Corporation +# All rights reserved. +# +# Redistribution and use in source and binary forms, with or without modification, are permitted provided that the following conditions are met: +# Redistributions of source code must retain the above copyright notice, this list of conditions and the following disclaimer. +# Redistributions in binary form must reproduce the above copyright notice, this list of conditions and the following disclaimer in the documentation and/or other materials provided with the distribution. +# Neither the name of the Intel Corporation nor the names of its contributors may be used to endorse or promote products derived from this software without specific prior written permission. +# +# THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. +# IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) +# HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. +# +**********************************************************************************************************************************************************************************************************************************************************************************************/ + +#include "IttNotifyStdSrc.h" + +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include + +#ifdef _WIN32 + #include + #include +#else + #include + #include +#endif + +#ifdef __APPLE__ + //#define __APPLE_API_UNSTABLE + #include + #include +#endif + +namespace sea { +IHandler* g_handlers[MAX_HANDLERS] = {}; //10 is more than enough for now + +CIttLocker::CIttLocker() { + m_pGlobal = GetITTGlobal(); + __itt_mutex_lock(&m_pGlobal->mutex); +} + + +CIttLocker::~CIttLocker() { + if (m_pGlobal) { + __itt_mutex_unlock(&m_pGlobal->mutex); + } +} + +} // namespace sea + +//FIXME: in general add much more comments + +std::map g_stats; //can't be static function variable due to lifetime limits + +class CIttFnStat { +public: + CIttFnStat(const char* name) { + if (!sea::IsVerboseMode()) return; + sea::CIttLocker locker; + ++GetStats()[name]; + } + + static std::map& GetStats() { + return g_stats; + } +}; + +#ifdef _DEBUG + #define ITT_FUNCTION_STAT() CIttFnStat oIttFnStat(__FUNCTION__) +#else + #define ITT_FUNCTION_STAT() +#endif + + +struct __itt_frame_t { + __itt_domain* pDomain; + __itt_id id; +}; + +inline bool operator < (const __itt_id& left, const __itt_id& right) { + return memcmp(&left, &right, sizeof(__itt_id)) < 0; +} + +inline bool operator == (const __itt_id& left, const __itt_id& right) { + return (left.d1 == right.d1) && (left.d2 == right.d2); +} + +namespace sea { + +int64_t g_nRingBuffer = 1000000000ll * atoi(get_environ_value("INTEL_SEA_RING").c_str()); //in nanoseconds +uint64_t g_nAutoCut = 1024ull * 1024 * atoi(get_environ_value("INTEL_SEA_AUTOCUT").c_str()); //in MB +uint64_t g_features = sea::GetFeatureSet(); + +class DomainFilter { +protected: + std::string m_path; + typedef std::map TDomains; + TDomains m_domains; + + void ReadFilters(TDomains& domains) { + std::ifstream ifs(m_path); + for (std::string domain; std::getline(ifs, domain);) { + if (domain[0] == '#') + m_domains[domain.c_str() + 1] = true; + else + m_domains[domain] = false; + } + } + +public: + DomainFilter() { + m_path = get_environ_value("INTEL_SEA_FILTER"); + if (m_path.empty()) return; + ReadFilters(m_domains); + } + + operator bool() const { + return !m_path.empty(); + } + + bool IsEnabled(const char* szDomain) { + return !m_domains[szDomain]; //new domain gets initialized with bool() which is false, so we invert it + } + + void Finish() { + if (m_path.empty()) return; + TDomains domains; + ReadFilters(domains); + domains.insert(m_domains.begin(), m_domains.end()); + m_domains.swap(domains); + + std::ofstream ifs(m_path); + for (const auto& pair : m_domains) { + if (pair.second) + ifs << '#'; + ifs << pair.first << std::endl; + } + } +} g_oDomainFilter; + +bool PathExists(const std::string& path) { +#ifdef _WIN32 + return -1 != _access(path.c_str(), 0); +#else + return -1 != access(path.c_str(), F_OK); +#endif +} + +int mkpath(const char *path, uint32_t mode) { + struct stat sb = {}; + + if (!stat(path, &sb)) + return 0; + + char parent[1024] = {}; +#ifdef _WIN32 + strcpy_s(parent, path); +#else + strcpy(parent, path); // NOLINT +#endif + char* last_slash = strrchr(parent, '//'); + if (!last_slash) { + VerbosePrint("Invalid dir: %s\n", parent); + return -1; + } + *last_slash = 0; + + int res = mkpath(parent, mode); + if (res == -1) { + VerbosePrint("Failed to create dir: %s err=%d\n", parent, errno); + return res; + } else { + VerbosePrint("Created dir: %s\n", parent); + } + +#ifdef _WIN32 + return _mkdir(path); +#else + return mkdir(path, mode); +#endif +} + +std::string GetDir(std::string path, const std::string& append) { + if (path.empty()) return path; + path += append; + VerbosePrint("GetDir: %s\n", path.c_str()); + + std::replace(path.begin(), path.end(), '\\', '/'); + char lastSym = path[path.size() - 1]; + if (lastSym != '/') + path += "/"; + + std::string dir_name = path.substr(0, path.length() - 1); + mkpath(dir_name.c_str(), FilePermissions); + return path; +} + +std::string GetSavePath() { + static std::string save_to = get_environ_value("INTEL_SEA_SAVE_TO"); + VerbosePrint("Got save path: %s\n", save_to.c_str()); + if (save_to.empty()) { + return save_to; + } + return GetDir(save_to, + ("-" + std::to_string(CTraceEventFormat::GetRegularFields().pid))); +} + +bool IsVerboseMode() { + static bool bVerboseMode = !get_environ_value("INTEL_SEA_VERBOSE").empty(); + return bVerboseMode; +} + +std::string g_savepath = GetSavePath(); // NOLINT +std::shared_ptr g_spCutName; + +std::string Escape4Path(std::string str) { + std::replace_if(str.begin(), str.end(), + [](char sym){return strchr("/\\:*?\"<>|", sym);}, + '_'); + return str; +} + +void InitDomain(__itt_domain* pDomain) { + CIttLocker locker; + pDomain->extra2 = new DomainExtra{}; + if (g_savepath.size()) { + DomainExtra* pDomainExtra = reinterpret_cast(pDomain->extra2); + pDomainExtra->strDomainPath = GetDir(g_savepath, Escape4Path(pDomain->nameA)); + pDomainExtra->bHasDomainPath = !pDomainExtra->strDomainPath.empty(); + } + + if (!g_oDomainFilter) + return; + pDomain->flags = g_oDomainFilter.IsEnabled(pDomain->nameA) ? 1 : 0; +} + +SThreadRecord* GetThreadRecord() { + static thread_local SThreadRecord* pThreadRecord = nullptr; + if (pThreadRecord) + return pThreadRecord; + + CIttLocker lock; + + pThreadRecord = new SThreadRecord{}; + static __itt_global* pGlobal = GetITTGlobal(); + + __itt_domain* pDomain = pGlobal->domain_list; + DomainExtra* pDomainExtra = reinterpret_cast(pDomain->extra2); + SThreadRecord* pRecord = pDomainExtra->pThreadRecords; + if (pRecord) { + while (pRecord->pNext) + pRecord = pRecord->pNext; + pRecord->pNext = pThreadRecord; + } else { + pDomainExtra->pThreadRecords = pThreadRecord; + } + + return pThreadRecord; +} + +void UNICODE_AGNOSTIC(thread_set_name)(const char* name) { + ITT_FUNCTION_STAT(); + + for (size_t i = 0; (i < MAX_HANDLERS) && g_handlers[i]; ++i) { + g_handlers[i]->SetThreadName(GetRegularFields(), name); + } + +#if defined(__APPLE__) + pthread_setname_np(name); +#elif defined(__linux__) + pthread_setname_np(pthread_self(), name); +#endif +} +#ifdef _WIN32 +void thread_set_nameW(const wchar_t* name) { + UNICODE_AGNOSTIC(thread_set_name)(W2L(name).c_str()); +} +#endif + + + +inline uint64_t ConvertClockDomains(unsigned long long timestamp, __itt_clock_domain* pClock) { + if (!pClock) return timestamp; + uint64_t start = *(uint64_t*)pClock->extra2; // NOLINT + return start + (timestamp - pClock->info.clock_base) * SHiResClock::period::den / pClock->info.clock_freq; +} + +CTraceEventFormat::SRegularFields GetRegularFields(__itt_clock_domain* clock_domain, unsigned long long timestamp) { + CTraceEventFormat::SRegularFields rf = CTraceEventFormat::GetRegularFields(); + + __itt_track* pTrack = GetThreadRecord()->pTrack; + + if (pTrack) { + CTraceEventFormat::SRegularFields& trackRF = *(CTraceEventFormat::SRegularFields*)pTrack->extra2; + rf.changed |= (rf.pid != trackRF.pid) ? CTraceEventFormat::SRegularFields::ecPid : CTraceEventFormat::SRegularFields::ecNothing; + rf.pid = trackRF.pid; + rf.changed |= (rf.tid != trackRF.tid) ? CTraceEventFormat::SRegularFields::ecTid : CTraceEventFormat::SRegularFields::ecNothing; + rf.tid = trackRF.tid; + } + if (clock_domain || timestamp) { + rf.nanoseconds = ConvertClockDomains(timestamp, clock_domain); + rf.changed |= CTraceEventFormat::SRegularFields::ecTime; + } + return rf; +} + + +__itt_domain* UNICODE_AGNOSTIC(domain_create)(const char* name) { + ITT_FUNCTION_STAT(); + __itt_domain *h_tail = NULL, *h = NULL; + + if (name == NULL) { + return NULL; + } + { + CIttLocker locker; + static __itt_global* pGlobal = GetITTGlobal(); + for (h_tail = NULL, h = pGlobal->domain_list; h != NULL; h_tail = h, h = h->next) { + if (h->nameA != NULL && !__itt_fstrcmp(h->nameA, name)) break; + } + if (h == NULL) { + NEW_DOMAIN_A(pGlobal, h, h_tail, name); + } + } + InitDomain(h); + return h; +} + +#ifdef _WIN32 +__itt_domain* domain_createW(const wchar_t* name) { + return UNICODE_AGNOSTIC(domain_create)(W2L(name).c_str()); +} +#endif + +inline __itt_string_handle* get_tail_of_global_string_list(const __itt_global* const pGlobal) { + if (!pGlobal->string_list) return nullptr; + + __itt_string_handle* result = pGlobal->string_list; + + while (result->next) { + result = result->next; + } + + return result; +} + +inline __itt_string_handle* create_and_add_string_handle_to_list(const char* name) { + static __itt_global* pGlobal = GetITTGlobal(); + static __itt_string_handle *string_handle_list_tail = get_tail_of_global_string_list(pGlobal); + + __itt_string_handle *result = NULL; + + NEW_STRING_HANDLE_A(pGlobal, result, string_handle_list_tail, name); + string_handle_list_tail = result; + return result; +} + +__itt_string_handle* ITTAPI UNICODE_AGNOSTIC(string_handle_create)(const char* name) { + ITT_FUNCTION_STAT(); + if (name == NULL) { + return NULL; + } + CIttLocker locker; + static std::unordered_map handle_map; + auto found_handle = handle_map.find(name); + if (found_handle != handle_map.end()) { + return found_handle->second; + } + + __itt_string_handle *result = create_and_add_string_handle_to_list(name); + handle_map[name] = result; + sea::ReportString(result); + return result; +} + +#ifdef _WIN32 +__itt_string_handle* string_handle_createW(const wchar_t* name) { + return UNICODE_AGNOSTIC(string_handle_create)(W2L(name).c_str()); +} +#endif + +void marker_ex(const __itt_domain *pDomain, + __itt_clock_domain* clock_domain, + unsigned long long timestamp, + __itt_id id, + __itt_string_handle *pName, + __itt_scope scope) { + ITT_FUNCTION_STAT(); + CTraceEventFormat::SRegularFields rf = GetRegularFields(clock_domain, timestamp); + + for (size_t i = 0; (i < MAX_HANDLERS) && g_handlers[i]; ++i) { + g_handlers[i]->Marker(rf, pDomain, id, pName, scope); + } +} + +void marker(const __itt_domain *pDomain, __itt_id id, __itt_string_handle *pName, __itt_scope scope) { + ITT_FUNCTION_STAT(); + marker_ex(pDomain, nullptr, 0, id, pName, scope); +} + + +bool IHandler::RegisterHandler(IHandler* pHandler) { + for (size_t i = 0; i < MAX_HANDLERS; ++i) { + if (!g_handlers[i]) { + g_handlers[i] = pHandler; + pHandler->SetCookieIndex(i); + return true; + } + } + return false; +} + +//FIXME: Use one coding style, since itt functions are mapped, there's no problem with that +void task_begin(const __itt_domain *pDomain, __itt_id taskid, __itt_id parentid, __itt_string_handle *pName) { + ITT_FUNCTION_STAT(); + SThreadRecord* pThreadRecord = GetThreadRecord(); + + CTraceEventFormat::SRegularFields rf = GetRegularFields(); + pThreadRecord->pTask = placement_new(STaskDescriptor) { + pThreadRecord->pTask, //chaining the previous task inside + rf, + pDomain, pName, + taskid, parentid + }; // NOLINT + + for (size_t i = 0; (i < MAX_HANDLERS) && g_handlers[i]; ++i) { + g_handlers[i]->TaskBegin(*pThreadRecord->pTask, false); + } +} + +void task_begin_fn(const __itt_domain *pDomain, __itt_id taskid, __itt_id parentid, void* fn) { + ITT_FUNCTION_STAT(); + + CTraceEventFormat::SRegularFields rf = GetRegularFields(); + SThreadRecord* pThreadRecord = GetThreadRecord(); + + pThreadRecord->pTask = placement_new(STaskDescriptor) { + pThreadRecord->pTask, //chaining the previous task inside + rf, + pDomain, nullptr, + taskid, parentid, + fn + }; // NOLINT + + for (size_t i = 0; (i < MAX_HANDLERS) && g_handlers[i]; ++i) { + g_handlers[i]->TaskBegin(*pThreadRecord->pTask, false); + } +} + +void task_end(const __itt_domain *pDomain) { + ITT_FUNCTION_STAT(); + + SThreadRecord* pThreadRecord = GetThreadRecord(); + const char* domain = pDomain->nameA; + if (!pThreadRecord->pTask) { + VerbosePrint("Uneven begin/end count for domain: %s\n", domain); + return; + } + + CTraceEventFormat::SRegularFields rf = GetRegularFields(); //FIXME: get from begin except for time + + for (size_t i = 0; (i < MAX_HANDLERS) && g_handlers[i]; ++i) { + g_handlers[i]->TaskEnd(*pThreadRecord->pTask, rf, false); + } + + STaskDescriptor* prev = pThreadRecord->pTask->prev; + placement_free(pThreadRecord->pTask); + pThreadRecord->pTask = prev; +} + +void Counter(const __itt_domain *pDomain, __itt_string_handle *pName, double value, __itt_clock_domain* clock_domain, unsigned long long timestamp) { + CTraceEventFormat::SRegularFields rf = GetRegularFields(clock_domain, timestamp); + + for (size_t i = 0; (i < MAX_HANDLERS) && g_handlers[i]; ++i) { + g_handlers[i]->Counter(rf, pDomain, pName, value); + } +} + +void counter_inc_delta_v3(const __itt_domain *pDomain, __itt_string_handle *pName, unsigned long long delta) { + ITT_FUNCTION_STAT(); + Counter(pDomain, pName, double(delta)); // NOLINT +} + +void FixCounter(__itt_counter_info_t* pCounter) { + pCounter->extra2 = new SDomainName{ + UNICODE_AGNOSTIC(domain_create)(pCounter->domainA), + UNICODE_AGNOSTIC(string_handle_create)(pCounter->nameA) + }; + for (size_t i = 0; (i < MAX_HANDLERS) && g_handlers[i]; ++i) { + g_handlers[i]->CreateCounter(reinterpret_cast<__itt_counter>(pCounter)); + } +} + +__itt_counter ITTAPI UNICODE_AGNOSTIC(counter_create_typed)(const char *name, const char *domain, __itt_metadata_type type) { + ITT_FUNCTION_STAT(); + + if (!name || !domain) + return nullptr; + + VerbosePrint("%s: name=%s domain=%s type=%d\n", __FUNCTION__, name, domain, (int)type); // NOLINT + + __itt_counter_info_t *h_tail = NULL, *h = NULL; + + CIttLocker locker; + __itt_global* pGlobal = GetITTGlobal(); + for (h_tail = NULL, h = pGlobal->counter_list; h != NULL; h_tail = h, h = h->next) { + if (h->nameA != NULL && h->type == type && !__itt_fstrcmp(h->nameA, name) && ((h->domainA == NULL && domain == NULL) || + (h->domainA != NULL && domain != NULL && !__itt_fstrcmp(h->domainA, domain)))) + break; + } + if (!h) { + NEW_COUNTER_A(pGlobal, h, h_tail, name, domain, type); + FixCounter(h); + } + + return (__itt_counter)h; +} + +#ifdef _WIN32 +__itt_counter counter_create_typedW(const wchar_t *name, const wchar_t *domain, __itt_metadata_type type) { + return UNICODE_AGNOSTIC(counter_create_typed)(W2L(name).c_str(), W2L(domain).c_str(), type); +} +#endif + +__itt_counter UNICODE_AGNOSTIC(counter_create)(const char *name, const char *domain) { + ITT_FUNCTION_STAT(); + return UNICODE_AGNOSTIC(counter_create_typed)(name, domain, __itt_metadata_double); +} + +#ifdef _WIN32 +__itt_counter counter_createW(const wchar_t *name, const wchar_t *domain) { + return UNICODE_AGNOSTIC(counter_create)(W2L(name).c_str(), W2L(domain).c_str()); +} +#endif + +template +double Convert(void* ptr) { + return static_cast(*reinterpret_cast(ptr)); +} +typedef double(*FConvert)(void* ptr); + +FConvert g_MetatypeFormatConverter[] = { + nullptr, + Convert, + Convert, + Convert, + Convert, + Convert, + Convert, + Convert, + Convert, +}; + +void counter_set_value_ex(__itt_counter id, __itt_clock_domain *clock_domain, unsigned long long timestamp, void *value_ptr) { + ITT_FUNCTION_STAT(); + if (id->type < __itt_metadata_u64 || id->type > __itt_metadata_double) { + VerbosePrint("%s: weird type: %d stack: %s\n", __FUNCTION__, (int)id->type, GetStackString().c_str()); // NOLINT + return; + } + double val = g_MetatypeFormatConverter[id->type](value_ptr); + SDomainName* pDomainName = reinterpret_cast(id->extra2); + Counter(pDomainName->pDomain, pDomainName->pName, val, clock_domain, timestamp); +} + +void counter_set_value(__itt_counter id, void *value_ptr) { + ITT_FUNCTION_STAT(); + counter_set_value_ex(id, nullptr, 0, value_ptr); +} + +void UNICODE_AGNOSTIC(sync_create)(void *addr, const char *objtype, const char *objname, int attribute) { + ITT_FUNCTION_STAT(); + + std::string name((attribute == __itt_attr_mutex) ? "mutex:" : "barrier:"); + name += objtype; + name += ":"; + name += objname; + __itt_string_handle* pName = UNICODE_AGNOSTIC(string_handle_create)(name.c_str()); + __itt_id id = __itt_id_make(addr, 0); + + CTraceEventFormat::SRegularFields rf = GetRegularFields(); + WriteRecord(ERecordType::ObjectNew, SRecord{rf, *g_pIntelSEAPIDomain, id, __itt_null, pName}); +} + +#ifdef _WIN32 +void sync_createW(void *addr, const wchar_t *objtype, const wchar_t *objname, int attribute) { + UNICODE_AGNOSTIC(sync_create)(addr, W2L(objtype).c_str(), W2L(objname).c_str(), attribute); +} +#endif + +void sync_destroy(void *addr) { + ITT_FUNCTION_STAT(); + + __itt_id id = __itt_id_make(addr, 0); + CTraceEventFormat::SRegularFields rf = GetRegularFields(); + WriteRecord(ERecordType::ObjectDelete, SRecord{rf, *g_pIntelSEAPIDomain, id, __itt_null}); +} + +inline void SyncState(void * addr, const char * state) { + ITT_FUNCTION_STAT(); + + __itt_id id = __itt_id_make(addr, 0); + + CTraceEventFormat::SRegularFields rf = GetRegularFields(); + WriteRecord(ERecordType::ObjectSnapshot, SRecord{rf, *g_pIntelSEAPIDomain, id, __itt_null, nullptr, nullptr, state, strlen(state)}); +} + +void UNICODE_AGNOSTIC(sync_rename)(void * addr, const char * name) { + ITT_FUNCTION_STAT(); + + SyncState(addr, (std::string("name=") + name).c_str()); +} +#ifdef _WIN32 +void sync_renameW(void * addr, const wchar_t * name) { + UNICODE_AGNOSTIC(sync_rename)(addr, W2L(name).c_str()); +} +#endif + +void sync_prepare(void *addr) { + ITT_FUNCTION_STAT(); + + SyncState(addr, "state=prepare"); +} + +void sync_cancel(void *addr) { + ITT_FUNCTION_STAT(); + + SyncState(addr, "state=cancel"); +} + +void sync_acquired(void *addr) { + ITT_FUNCTION_STAT(); + SyncState(addr, "state=acquired"); +} + +void sync_releasing(void *addr) { + ITT_FUNCTION_STAT(); + SyncState(addr, "state=releasing"); +} + +//region is the same as frame only explicitly named +void region_begin(const __itt_domain *pDomain, __itt_id id, __itt_id parentid, const __itt_string_handle *pName) { + ITT_FUNCTION_STAT(); + + CTraceEventFormat::SRegularFields rf = GetRegularFields(); + WriteRecord(ERecordType::BeginFrame, SRecord{rf, *pDomain, id, parentid, pName}); +} + +void region_end(const __itt_domain *pDomain, __itt_id id) { + ITT_FUNCTION_STAT(); + + CTraceEventFormat::SRegularFields rf = GetRegularFields(); + WriteRecord(ERecordType::EndFrame, SRecord{rf, *pDomain, id, __itt_null}); +} + +__itt_clock_domain* clock_domain_create(__itt_get_clock_info_fn fn, void* fn_data) { + ITT_FUNCTION_STAT(); + CIttLocker lock; + __itt_domain* pDomain = g_pIntelSEAPIDomain; + DomainExtra* pDomainExtra = (DomainExtra*)pDomain->extra2; // NOLINT + __itt_clock_domain** ppClockDomain = &pDomainExtra->pClockDomain; + while (*ppClockDomain && (*ppClockDomain)->next) { + ppClockDomain = &(*ppClockDomain)->next; + } + + __itt_clock_info ci = {}; + uint64_t now1 = CTraceEventFormat::GetRegularFields().nanoseconds; + fn(&ci, fn_data); + uint64_t now2 = CTraceEventFormat::GetRegularFields().nanoseconds; + + *ppClockDomain = new __itt_clock_domain{ + ci, fn, fn_data, 0, + new uint64_t((now1 + now2) / 2) //let's keep current time point in extra2 + }; + + return *ppClockDomain; +} + +void clock_domain_reset() { + ITT_FUNCTION_STAT(); + + TraverseDomains([](__itt_domain& domain){ + DomainExtra* pDomainExtra = (DomainExtra*)domain.extra2; // NOLINT + if (!pDomainExtra) return; + __itt_clock_domain* pClockDomain = pDomainExtra->pClockDomain; + while (pClockDomain) { + uint64_t now1 = CTraceEventFormat::GetRegularFields().nanoseconds; + pClockDomain->fn(&pClockDomain->info, pClockDomain->fn_data); + uint64_t now2 = CTraceEventFormat::GetRegularFields().nanoseconds; + *(uint64_t*)pClockDomain->extra2 = (now1 + now2) / 2; // NOLINT + pClockDomain = pClockDomain->next; + } + }); +} + +void task_begin_ex(const __itt_domain* pDomain, + __itt_clock_domain* clock_domain, + unsigned long long timestamp, + __itt_id taskid, + __itt_id parentid, + __itt_string_handle* pName) { + ITT_FUNCTION_STAT(); + + SThreadRecord* pThreadRecord = GetThreadRecord(); + + CTraceEventFormat::SRegularFields rf = GetRegularFields(clock_domain, timestamp); + + pThreadRecord->pTask = placement_new(STaskDescriptor) { + pThreadRecord->pTask, //chaining the previous task inside + rf, + pDomain, pName, + taskid, parentid + }; // NOLINT + + for (size_t i = 0; (i < MAX_HANDLERS) && g_handlers[i]; ++i) { + g_handlers[i]->TaskBegin(*pThreadRecord->pTask, false); + } +} + +void task_end_ex(const __itt_domain* pDomain, __itt_clock_domain* clock_domain, unsigned long long timestamp) { + ITT_FUNCTION_STAT(); + + CTraceEventFormat::SRegularFields rf = GetRegularFields(clock_domain, timestamp); + + SThreadRecord* pThreadRecord = GetThreadRecord(); + if (!pThreadRecord->pTask) { + VerbosePrint("Uneven begin/end count for domain: %s\n", pDomain->nameA); + return; + } + for (size_t i = 0; (i < MAX_HANDLERS) && g_handlers[i]; ++i) { + g_handlers[i]->TaskEnd(*pThreadRecord->pTask, rf, false); + } + STaskDescriptor* prev = pThreadRecord->pTask->prev; + placement_free(pThreadRecord->pTask); + pThreadRecord->pTask = prev; +} + +void id_create(const __itt_domain *pDomain, __itt_id id) { + ITT_FUNCTION_STAT(); + //noting to do here yet +} + +void id_destroy(const __itt_domain *pDomain, __itt_id id) { + ITT_FUNCTION_STAT(); + //noting to do here yet +} + +void set_track(__itt_track* track) { + ITT_FUNCTION_STAT(); + GetThreadRecord()->pTrack = track; +} + +int64_t g_lastPseudoThread = -1; +int64_t g_lastPseudoProcess = -1; + + +__itt_track_group* track_group_create(__itt_string_handle* pName, __itt_track_group_type track_group_type) { + ITT_FUNCTION_STAT(); + CIttLocker lock; + __itt_domain* pDomain = g_pIntelSEAPIDomain; + DomainExtra* pDomainExtra = (DomainExtra*)pDomain->extra2; // NOLINT + __itt_track_group** ppTrackGroup = &pDomainExtra->pTrackGroup; + while (*ppTrackGroup && (*ppTrackGroup)->next) { + if ((*ppTrackGroup)->name == pName) + return *ppTrackGroup; + ppTrackGroup = &(*ppTrackGroup)->next; + } + if (pName) { + WriteGroupName(g_lastPseudoProcess, pName->strA); + } + //zero name means current process + return *ppTrackGroup = new __itt_track_group{ pName, nullptr, track_group_type, int(pName ? g_lastPseudoProcess-- : g_PID) }; // NOLINT +} + +__itt_track* track_create(__itt_track_group* track_group, __itt_string_handle* name, __itt_track_type track_type) { + ITT_FUNCTION_STAT(); + CIttLocker locker; + + if (!track_group) { + track_group = track_group_create(nullptr, __itt_track_group_type_normal); + } + + __itt_track** ppTrack = &track_group->track; + while (*ppTrack && (*ppTrack)->next) { + if ((*ppTrack)->name == name) + return *ppTrack; + ppTrack = &(*ppTrack)->next; + } + + CTraceEventFormat::SRegularFields* pRF = new CTraceEventFormat::SRegularFields{int64_t(track_group->extra1), g_lastPseudoThread--}; + + for (size_t i = 0; (i < MAX_HANDLERS) && g_handlers[i]; ++i) { + g_handlers[i]->SetThreadName(*pRF, name->strA); + } + + return *ppTrack = new __itt_track{name, track_group, track_type, 0, pRF}; +} + +class COverlapped { +public: + static COverlapped& Get() { + SThreadRecord* pThreadRecord = GetThreadRecord(); + if (pThreadRecord->pOverlapped) + return *pThreadRecord->pOverlapped; + return *(pThreadRecord->pOverlapped = new COverlapped); + } + + void Begin(__itt_id taskid, const CTraceEventFormat::SRegularFields& rf, const __itt_domain* domain, __itt_string_handle* name, __itt_id parentid) { + m_map[taskid].reset(placement_new(STaskDescriptor){ + nullptr, //chaining the previous task inside + rf, + domain, name, + taskid, parentid + }, placement_free); + + for (size_t i = 0; (i < MAX_HANDLERS) && g_handlers[i]; ++i) { + g_handlers[i]->TaskBegin(*m_map[taskid], true); + } + } + + bool AddArg(const __itt_domain *domain, __itt_id id, __itt_string_handle *key, const char *data, size_t length) { + TTaskMap::iterator it = m_map.find(id); + if (m_map.end() == it) + return false; + for (size_t i = 0; (i < MAX_HANDLERS) && g_handlers[i]; ++i) { + g_handlers[i]->AddArg(*m_map[id], key, data, length); + } + return true; + } + + bool AddArg(const __itt_domain *domain, __itt_id id, __itt_string_handle *key, double value) { + TTaskMap::iterator it = m_map.find(id); + if (m_map.end() == it) + return false; + for (size_t i = 0; (i < MAX_HANDLERS) && g_handlers[i]; ++i) { + g_handlers[i]->AddArg(*m_map[id], key, value); + } + return true; + } + + void End(__itt_id taskid, const CTraceEventFormat::SRegularFields& rf, const __itt_domain* domain) { + TTaskMap::iterator it = m_map.find(taskid); + if (m_map.end() == it) return; + for (size_t i = 0; (i < MAX_HANDLERS) && g_handlers[i]; ++i) { + g_handlers[i]->TaskEnd(*m_map[taskid], rf, true); + } + m_map.erase(it); + } + + static void FinishAll() { + TraverseThreadRecords([](SThreadRecord& record){ + if (record.pOverlapped) + record.pOverlapped->Finish(); + }); + } + +protected: + void Finish() { + CTraceEventFormat::SRegularFields rf = CTraceEventFormat::GetRegularFields(); + for (const auto& pair : m_map) { + for (size_t i = 0; (i < MAX_HANDLERS) && g_handlers[i]; ++i) { + g_handlers[i]->TaskEnd(*pair.second, rf, true); + } + } + m_map.clear(); + } + + typedef std::map<__itt_id, std::shared_ptr> TTaskMap; + TTaskMap m_map; +}; + +void task_begin_overlapped_ex(const __itt_domain* pDomain, + __itt_clock_domain* clock_domain, + unsigned long long timestamp, + __itt_id taskid, + __itt_id parentid, + __itt_string_handle* pName) { + ITT_FUNCTION_STAT(); + + COverlapped::Get().Begin(taskid, GetRegularFields(clock_domain, timestamp), pDomain, pName, parentid); +} + +void task_begin_overlapped(const __itt_domain* pDomain, __itt_id taskid, __itt_id parentid, __itt_string_handle* pName) { + ITT_FUNCTION_STAT(); + + task_begin_overlapped_ex(pDomain, nullptr, 0, taskid, parentid, pName); +} + +void task_end_overlapped_ex(const __itt_domain* pDomain, __itt_clock_domain* clock_domain, unsigned long long timestamp, __itt_id taskid) { + ITT_FUNCTION_STAT(); + + COverlapped::Get().End(taskid, GetRegularFields(clock_domain, timestamp), pDomain); +} + +void task_end_overlapped(const __itt_domain *pDomain, __itt_id taskid) { + ITT_FUNCTION_STAT(); + + task_end_overlapped_ex(pDomain, nullptr, 0, taskid); +} + +std::map<__itt_id, __itt_string_handle*> g_namedIds; + +void SetIdName(const __itt_id& id, const char *data) { + CIttLocker lock; + g_namedIds[id] = UNICODE_AGNOSTIC(string_handle_create)(data); +} + +template +void MetadataAdd(const __itt_domain *pDomain, __itt_id id, __itt_string_handle *pKey, Args ... args) { + if (id.d1 || id.d2) { + SThreadRecord* pThreadRecord = GetThreadRecord(); + if (!COverlapped::Get().AddArg(pDomain, id, pKey, args...) && pThreadRecord->pTask && pThreadRecord->pTask->id == id) { + for (size_t i = 0; (i < MAX_HANDLERS) && g_handlers[i]; ++i) { + g_handlers[i]->AddArg(*pThreadRecord->pTask, pKey, args...); + } + } + } +} + +void UNICODE_AGNOSTIC(metadata_str_add)(const __itt_domain *pDomain, __itt_id id, __itt_string_handle *pKey, const char *data, size_t length) { + ITT_FUNCTION_STAT(); + + if (id == __itt_null) { + if (0 == strcmp(pKey->strA, "__sea_cut")) { + marker(pDomain, id, pKey, __itt_marker_scope_process); + SetCutName(data); + return; + } + if (0 == strcmp(pKey->strA, "__sea_set_folder")) { + SetFolder(data); + return; + } + if (0 == strcmp(pKey->strA, "__sea_set_ring")) { + SetRing(1000000000ull * atoi(data)); + return; + } + if (0 == strcmp(pKey->strA, "__sea_ftrace_sync")) { +#ifdef __linux__ + WriteFTraceTimeSyncMarkers(); +#endif + return; + } + } + if (!length) + length = data ? strlen(data) : 0; + if (!pKey) + SetIdName(id, data); + else + MetadataAdd(pDomain, id, pKey, data, length); +} + +#ifdef _WIN32 +void metadata_str_addW(const __itt_domain *pDomain, __itt_id id, __itt_string_handle *pKey, const wchar_t *data, size_t length) { + UNICODE_AGNOSTIC(metadata_str_add)(pDomain, id, pKey, W2L(data).c_str(), length); +} +#endif + +void metadata_add(const __itt_domain *pDomain, __itt_id id, __itt_string_handle *pKey, __itt_metadata_type type, size_t count, void *data) { + ITT_FUNCTION_STAT(); + + if (id.d1 || id.d2) { + if (data) { + if (__itt_metadata_unknown != type) { + double res = g_MetatypeFormatConverter[type](data); + MetadataAdd(pDomain, id, pKey, res); + } else { + if (count) + MetadataAdd(pDomain, id, pKey, (const char*)data, count); //raw data with size // NOLINT + else + MetadataAdd(pDomain, id, pKey, (double)(uint64_t)data); //just pointer, convert it to number // NOLINT + } + } + } else { + if (__itt_metadata_unknown == type) + return; + Counter(pDomain, pKey, g_MetatypeFormatConverter[type](data)); + } +} + +const char* api_version(void) { + ITT_FUNCTION_STAT(); + return "IntelSEAPI"; +} + +void frame_begin_v3(const __itt_domain *pDomain, __itt_id *id) { + ITT_FUNCTION_STAT(); + + CTraceEventFormat::SRegularFields rf = GetRegularFields(); + WriteRecord(ERecordType::BeginFrame, SRecord{rf, *pDomain, id ? *id : __itt_null, __itt_null}); +} + +void frame_end_v3(const __itt_domain *pDomain, __itt_id *id) { + ITT_FUNCTION_STAT(); + + CTraceEventFormat::SRegularFields rf = GetRegularFields(); + WriteRecord(ERecordType::EndFrame, SRecord{rf, *pDomain, id ? *id : __itt_null, __itt_null}); +} + +__itt_frame_t* UNICODE_AGNOSTIC(frame_create)(const char *domain) { + ITT_FUNCTION_STAT(); + return new __itt_frame_t{ + UNICODE_AGNOSTIC(domain_create)(domain), + __itt_id_make(const_cast(domain), 0) + }; +} + +#ifdef _WIN32 +__itt_frame_t* frame_createW(const wchar_t* domain) { + return UNICODE_AGNOSTIC(frame_create)(W2L(domain).c_str()); +} +#endif + +void frame_begin(__itt_frame_t* frame) { + ITT_FUNCTION_STAT(); + frame_begin_v3(frame->pDomain, &frame->id); +} + +void frame_end(__itt_frame_t* frame) { + ITT_FUNCTION_STAT(); + frame_end_v3(frame->pDomain, &frame->id); +} + +void frame_submit_v3(const __itt_domain *pDomain, __itt_id *pId, __itt_timestamp begin, __itt_timestamp end) { + ITT_FUNCTION_STAT(); + + CTraceEventFormat::SRegularFields rf = GetRegularFields(); + if (__itt_timestamp_none == end) + end = rf.nanoseconds; + const __itt_string_handle *pName = nullptr; + if (pId) { + if (pId->d3) { + pName = reinterpret_cast<__itt_string_handle *>(pId->d3); + } else { + CIttLocker lock; + auto it = g_namedIds.find(*pId); + if (g_namedIds.end() != it) { + pName = it->second; + pId->d3 = (unsigned long long)pName; + } + } + } + rf.nanoseconds = begin; + WriteRecord(ERecordType::BeginFrame, SRecord{ rf, *pDomain, pId ? *pId : __itt_null, __itt_null, pName }); + rf.nanoseconds = end; + WriteRecord(ERecordType::EndFrame, SRecord{rf, *pDomain, pId ? *pId : __itt_null, __itt_null}); +} + +__itt_timestamp get_timestamp() { + ITT_FUNCTION_STAT(); + return GetRegularFields().nanoseconds; +} + +void Pause() { + static __itt_global* pGlobal = GetITTGlobal(); + while (pGlobal) { + pGlobal->state = __itt_collection_paused; + ___itt_domain* pDomain = pGlobal->domain_list; + while (pDomain) { + pDomain->flags = 0; //this flag is analyzed by static part of ITT to decide where to call dynamic part or not + pDomain = pDomain->next; + } + pGlobal = pGlobal->next; + } +} + +void pause() { + ITT_FUNCTION_STAT(); + static __itt_string_handle* pPause = UNICODE_AGNOSTIC(string_handle_create)("PAUSE"); + static __itt_global* pGlobal = GetITTGlobal(); + static __itt_id id = __itt_id_make(pGlobal, 0); + region_begin(pGlobal->domain_list, id, __itt_null, pPause); + Pause(); +} + +void Resume() { + static __itt_global* pGlobal = GetITTGlobal(); + + while (pGlobal) { + ___itt_domain* pDomain = pGlobal->domain_list; + while (pDomain) { + pDomain->flags = 1; //this flag is analyzed by static part of ITT to decide where to call dynamic part or not + pDomain = pDomain->next; + } + pGlobal->state = __itt_collection_normal; + pGlobal = pGlobal->next; + } +} + +void resume() { + ITT_FUNCTION_STAT(); + static __itt_global* pGlobal = GetITTGlobal(); + static __itt_id id = __itt_id_make(pGlobal, 0); + region_end(pGlobal->domain_list, id); + Resume(); +} + +using TRelations = __itt_string_handle* [__itt_relation_is_predecessor_to + 1]; +//it's not static member of function to avoid racing +TRelations g_relations = {}; //will be filled in InitSEA + +void relation_add_ex(const __itt_domain *pDomain, + __itt_clock_domain* clock_domain, + unsigned long long timestamp, + __itt_id head, + __itt_relation relation, + __itt_id tail) { + ITT_FUNCTION_STAT(); + CTraceEventFormat::SRegularFields rf = GetRegularFields(clock_domain, timestamp); + + for (size_t i = 0; (i < MAX_HANDLERS) && g_handlers[i]; ++i) { + g_handlers[i]->AddRelation(rf, pDomain, head, g_relations[relation], tail); + } +} + +void relation_add_to_current(const __itt_domain *pDomain, __itt_relation relation, __itt_id tail) { + ITT_FUNCTION_STAT(); + relation_add_ex(pDomain, nullptr, 0, __itt_null, relation, tail); +} + +void relation_add(const __itt_domain *pDomain, __itt_id head, __itt_relation relation, __itt_id tail) { + ITT_FUNCTION_STAT(); + relation_add_ex(pDomain, nullptr, 0, head, relation, tail); +} + +void relation_add_to_current_ex(const __itt_domain *pDomain, + __itt_clock_domain* clock_domain, + unsigned long long timestamp, + __itt_relation relation, + __itt_id tail) { + ITT_FUNCTION_STAT(); + relation_add_ex(pDomain, clock_domain, timestamp, __itt_null, relation, tail); +} + +struct SHeapFunction { + __itt_domain* pDomain; + std::string name; + ___itt_string_handle* pName; +}; + +__itt_heap_function ITTAPI UNICODE_AGNOSTIC(heap_function_create)(const char* name, const char* domain) { + ITT_FUNCTION_STAT(); + std::string counter_name = std::string(name) + ":ALL(bytes)"; + return new SHeapFunction { + UNICODE_AGNOSTIC(domain_create)(domain), + name, + UNICODE_AGNOSTIC(string_handle_create)(counter_name.c_str()) + }; +} + +#ifdef _WIN32 +__itt_heap_function ITTAPI heap_function_createW(const wchar_t* name, const wchar_t* domain) { + return UNICODE_AGNOSTIC(heap_function_create)(W2L(name).c_str(), W2L(domain).c_str()); +} +#endif + +class CMemoryTracker { +protected: + TCritSec m_cs; + + typedef std::pair TDomainString; + + struct SNode { + struct SMemory { + int32_t current_amount = 0; + int32_t max_amount = 0; + }; + std::map memory; + std::map chilren; + }; + SNode m_tree; + + std::map> m_size_map; + typedef std::pair<__itt_string_handle*, size_t/*count*/> TBlockData; + std::map m_counter_map; + bool m_bInitialized = false; + +public: + CMemoryTracker() + : m_bInitialized(true) {} + void Alloc(SHeapFunction* pHeapFunction, const void* addr, size_t size) { + static bool bMemCount = !!(GetFeatureSet() & sfMemCounters); + + if (!m_bInitialized) return; + + for (size_t i = 0; (i < MAX_HANDLERS) && g_handlers[i]; ++i) { + g_handlers[i]->Alloc(GetRegularFields(), addr, size, pHeapFunction->pDomain->nameA, pHeapFunction->name.c_str()); + } + + SNode* pNode = UpdateAllocation(size, +1, nullptr); + TBlockData block; + { + std::lock_guard lock(m_cs); + m_size_map[addr] = std::make_pair(size, pNode); + if (bMemCount) { + auto it = m_counter_map.find(size); + if (m_counter_map.end() == it) { + std::string name = pHeapFunction->name + std::string(":size<") + std::to_string(size) + ">(count)"; + __itt_string_handle* pName = UNICODE_AGNOSTIC(string_handle_create)(name.c_str()); + it = m_counter_map.insert(m_counter_map.end(), std::make_pair(size, std::make_pair(pName, size_t(1)))); + } else { + ++it->second.second; + } + block = it->second; + } + } + if (bMemCount) { + Counter(pHeapFunction->pDomain, block.first, double(block.second)); //report current count for this size // NOLINT + } + } + + SNode* UpdateAllocation(size_t size, int32_t delta, SNode* pNode) { + static bool bMemStat = (GetFeatureSet() & sfMemStat) && InitMemStat(); + if (!bMemStat) + return nullptr; + SThreadRecord* pThreadRecord = GetThreadRecord(); + STaskDescriptor* pTask = pThreadRecord->pTask; + std::stack stack; + if (!pNode) { + for (; pTask; pTask = pTask->prev) { + stack.push(TDomainString(pTask->pDomain, pTask->pName ? pTask->pName : pTask->fn)); + } + } + std::lock_guard lock(m_cs); + if (!pNode) { + pNode = &m_tree; + while (!stack.empty()) { + pNode = &m_tree.chilren[stack.top()]; + stack.pop(); + } + } + SNode::SMemory & mem = pNode->memory[size]; + mem.current_amount += delta; + if (mem.current_amount > mem.max_amount) + mem.max_amount = mem.current_amount; + return pNode; + } + + void Free(SHeapFunction* pHeapFunction, const void* addr) { + static bool bMemCount = !!(GetFeatureSet() & sfMemCounters); + size_t size = 0; + if (m_bInitialized) { + std::lock_guard lock(m_cs); + + const auto& pair = m_size_map[addr]; + size = pair.first; + SNode* pNode = pair.second; + m_size_map.erase(addr); + if (bMemCount) { + auto it = m_counter_map.find(size); + if (m_counter_map.end() == it) + return; //how come? + else + --it->second.second; + Counter(pHeapFunction->pDomain, it->second.first, double(it->second.second)); // NOLINT + } + if (pNode) //if we missed allocation, we don't care about freeing + UpdateAllocation(size, -1, pNode); + } + for (size_t i = 0; (i < MAX_HANDLERS) && g_handlers[i]; ++i) { + g_handlers[i]->Free(GetRegularFields(), addr, size, pHeapFunction->pDomain->nameA, pHeapFunction->name.c_str()); + } + } + + void SaveMemoryStatistics() { + if (!(GetFeatureSet() & sfMemStat)) + return; + std::lock_guard lock(m_cs); + WriteNode(m_tree); + } + + template + void WriteMem(T value) { + WriteMemStat(&value, sizeof(T)); + } + + void WriteNode(const SNode& node) { + WriteMem((uint32_t)node.memory.size()); + for (const auto& pair : node.memory) { + WriteMem((uint32_t)pair.first); //size + WriteMem(pair.second.current_amount); //SNode::SMemory + WriteMem((uint32_t)pair.second.max_amount); //SNode::SMemory + } + WriteMem((uint32_t)node.chilren.size()); + for (const auto& pair : node.chilren) { + const TDomainString& domain_string = pair.first; + WriteMem((const void*)domain_string.first); //domain + WriteMem((const void*)domain_string.second); //string + WriteNode(pair.second); + } + } + + ~CMemoryTracker() { + m_bInitialized = false; + SaveMemoryStatistics(); + } +} g_oMemoryTracker; + +void heap_allocate_begin(__itt_heap_function h, size_t size, int initialized) { + ITT_FUNCTION_STAT(); +} + +void heap_allocate_end(__itt_heap_function h, void** addr, size_t size, int) { + ITT_FUNCTION_STAT(); + g_oMemoryTracker.Alloc(reinterpret_cast(h), *addr, size); +} + +void heap_free_begin(__itt_heap_function h, void* addr) { + ITT_FUNCTION_STAT(); + g_oMemoryTracker.Free(reinterpret_cast(h), addr); +} + +void heap_free_end(__itt_heap_function h, void* addr) { + ITT_FUNCTION_STAT(); +} + +__itt_domain* get_events_domain() { + static __itt_domain* s_pEvents = UNICODE_AGNOSTIC(domain_create)("sea_events"); + return s_pEvents; +} + +__itt_event UNICODE_AGNOSTIC(event_create)(const char *name, int namelen) { + ITT_FUNCTION_STAT(); + __itt_domain* pEvents = get_events_domain(); + __itt_string_handle* pStr = UNICODE_AGNOSTIC(string_handle_create)(name); + return intptr_t(pStr) - intptr_t(pEvents); +} + +int event_start(__itt_event event) { + ITT_FUNCTION_STAT(); + __itt_domain* pEvents = get_events_domain(); + __itt_string_handle* pStr = reinterpret_cast<__itt_string_handle*>(intptr_t(pEvents) + event); + task_begin_overlapped(pEvents, __itt_id_make(pEvents, (unsigned long long)pStr), __itt_null, pStr); + return event; +} + +int event_end(__itt_event event) { + ITT_FUNCTION_STAT(); + __itt_domain* pEvents = get_events_domain(); + __itt_string_handle* pStr = reinterpret_cast<__itt_string_handle*>(intptr_t(pEvents) + event); + task_end_overlapped(pEvents, __itt_id_make(pEvents, (unsigned long long)pStr)); + return event; +} + +#ifdef _WIN32 +__itt_event ITTAPI event_createW(const wchar_t *name, int namelen) { + return UNICODE_AGNOSTIC(event_create)(W2L(name).c_str(), namelen); +} +#endif + +#ifdef _WIN32 + #define WIN(something) something +#else + #define WIN(nothing) +#endif + +#define _AW(macro, name) macro(UNICODE_AGNOSTIC(name)) WIN(macro(ITT_JOIN(name, W))) + +#define ORIGINAL_FUNCTIONS()\ + ITT_STUB_IMPL_ORIG(UNICODE_AGNOSTIC(domain_create))\ +WIN(ITT_STUB_IMPL_ORIG(domain_createW))\ + ITT_STUB_IMPL_ORIG(UNICODE_AGNOSTIC(string_handle_create))\ +WIN(ITT_STUB_IMPL_ORIG(string_handle_createW)) + +#define API_MAP()\ +_AW(ITT_STUB_IMPL, thread_set_name)\ + ITT_STUB_IMPL(task_begin)\ + ITT_STUB_IMPL(task_begin_fn)\ + ITT_STUB_IMPL(task_end)\ +_AW(ITT_STUB_IMPL, metadata_str_add)\ + ITT_STUB_IMPL(marker)\ + ITT_STUB_IMPL(marker_ex)\ + ITT_STUB_IMPL(counter_inc_delta_v3)\ +_AW(ITT_STUB_IMPL, counter_create)\ +_AW(ITT_STUB_IMPL, counter_create_typed)\ + ITT_STUB_IMPL(counter_set_value)\ + ITT_STUB_IMPL(counter_set_value_ex)\ + ITT_STUB_IMPL(clock_domain_create)\ + ITT_STUB_IMPL(clock_domain_reset)\ + ITT_STUB_IMPL(task_begin_ex)\ + ITT_STUB_IMPL(task_end_ex)\ + ITT_STUB_IMPL(id_create)\ + ITT_STUB_IMPL(set_track)\ + ITT_STUB_IMPL(track_create)\ + ITT_STUB_IMPL(track_group_create)\ + ITT_STUB_IMPL(task_begin_overlapped)\ + ITT_STUB_IMPL(task_begin_overlapped_ex)\ + ITT_STUB_IMPL(task_end_overlapped)\ + ITT_STUB_IMPL(task_end_overlapped_ex)\ + ITT_STUB_IMPL(id_destroy)\ + ITT_STUB_IMPL(api_version)\ + ITT_STUB_IMPL(frame_begin_v3)\ + ITT_STUB_IMPL(frame_end_v3)\ + ITT_STUB_IMPL(frame_submit_v3)\ +_AW(ITT_STUB_IMPL, frame_create)\ + ITT_STUB_IMPL(frame_begin)\ + ITT_STUB_IMPL(frame_end)\ + ITT_STUB_IMPL(region_begin)\ + ITT_STUB_IMPL(region_end)\ + ITT_STUB_IMPL(pause)\ + ITT_STUB_IMPL(resume)\ + ITT_STUB_IMPL(get_timestamp)\ + ITT_STUB_IMPL(metadata_add)\ +_AW(ITT_STUB_IMPL, sync_create)\ + ITT_STUB_IMPL(sync_destroy)\ + ITT_STUB_IMPL(sync_acquired)\ + ITT_STUB_IMPL(sync_releasing)\ +_AW(ITT_STUB_IMPL, sync_rename)\ + ITT_STUB_IMPL(sync_prepare)\ + ITT_STUB_IMPL(sync_cancel)\ + ITT_STUB_IMPL(relation_add_to_current)\ + ITT_STUB_IMPL(relation_add)\ + ITT_STUB_IMPL(relation_add_to_current_ex)\ + ITT_STUB_IMPL(relation_add_ex)\ +_AW(ITT_STUB_IMPL, heap_function_create)\ + ITT_STUB_IMPL(heap_allocate_begin)\ + ITT_STUB_IMPL(heap_allocate_end)\ + ITT_STUB_IMPL(heap_free_begin)\ + ITT_STUB_IMPL(heap_free_end)\ +_AW(ITT_STUB_IMPL, event_create)\ +WIN(_AW(ITT_STUB_IMPL, event_create))\ + ITT_STUB_IMPL(event_start)\ + ITT_STUB_IMPL(event_end)\ + ORIGINAL_FUNCTIONS()\ + ITT_STUB_NO_IMPL(thread_ignore)\ +_AW(ITT_STUB_NO_IMPL, thr_name_set)\ + ITT_STUB_NO_IMPL(thr_ignore)\ + ITT_STUB_NO_IMPL(counter_inc_delta)\ + ITT_STUB_NO_IMPL(enable_attach)\ + ITT_STUB_NO_IMPL(suppress_push)\ + ITT_STUB_NO_IMPL(suppress_pop)\ + ITT_STUB_NO_IMPL(suppress_mark_range)\ + ITT_STUB_NO_IMPL(suppress_clear_range)\ + ITT_STUB_NO_IMPL(model_site_beginA)\ +WIN(ITT_STUB_NO_IMPL(model_site_beginW))\ + ITT_STUB_NO_IMPL(model_site_beginAL)\ + ITT_STUB_NO_IMPL(model_site_end)\ +_AW(ITT_STUB_NO_IMPL, model_task_begin)\ + ITT_STUB_NO_IMPL(model_task_end)\ + ITT_STUB_NO_IMPL(model_lock_acquire)\ + ITT_STUB_NO_IMPL(model_lock_release)\ + ITT_STUB_NO_IMPL(model_record_allocation)\ + ITT_STUB_NO_IMPL(model_record_deallocation)\ + ITT_STUB_NO_IMPL(model_induction_uses)\ + ITT_STUB_NO_IMPL(model_reduction_uses)\ + ITT_STUB_NO_IMPL(model_observe_uses)\ + ITT_STUB_NO_IMPL(model_clear_uses)\ + ITT_STUB_NO_IMPL(model_site_begin)\ + ITT_STUB_NO_IMPL(model_site_beginA)\ +WIN(ITT_STUB_NO_IMPL(model_site_beginW))\ + ITT_STUB_NO_IMPL(model_site_beginAL)\ + ITT_STUB_NO_IMPL(model_task_begin)\ + ITT_STUB_NO_IMPL(model_task_beginA)\ +WIN(ITT_STUB_NO_IMPL(model_task_beginW))\ + ITT_STUB_NO_IMPL(model_task_beginAL)\ + ITT_STUB_NO_IMPL(model_iteration_taskA)\ +WIN(ITT_STUB_NO_IMPL(model_iteration_taskW))\ + ITT_STUB_NO_IMPL(model_iteration_taskAL)\ + ITT_STUB_NO_IMPL(model_site_end_2)\ + ITT_STUB_NO_IMPL(model_task_end_2)\ + ITT_STUB_NO_IMPL(model_lock_acquire_2)\ + ITT_STUB_NO_IMPL(model_lock_release_2)\ + ITT_STUB_NO_IMPL(model_aggregate_task)\ + ITT_STUB_NO_IMPL(model_disable_push)\ + ITT_STUB_NO_IMPL(model_disable_pop)\ + ITT_STUB_NO_IMPL(heap_reallocate_begin)\ + ITT_STUB_NO_IMPL(heap_reallocate_end)\ + ITT_STUB_NO_IMPL(heap_internal_access_begin)\ + ITT_STUB_NO_IMPL(heap_internal_access_end)\ + ITT_STUB_NO_IMPL(heap_record_memory_growth_begin)\ + ITT_STUB_NO_IMPL(heap_record_memory_growth_end)\ + ITT_STUB_NO_IMPL(heap_reset_detection)\ + ITT_STUB_NO_IMPL(heap_record)\ + ITT_STUB_NO_IMPL(task_group)\ + ITT_STUB_NO_IMPL(counter_inc_v3)\ +_AW(ITT_STUB_NO_IMPL, sync_set_name)\ +_AW(ITT_STUB_NO_IMPL, notify_sync_name)\ + ITT_STUB_NO_IMPL(notify_sync_prepare)\ + ITT_STUB_NO_IMPL(notify_sync_cancel)\ + ITT_STUB_NO_IMPL(notify_sync_acquired)\ + ITT_STUB_NO_IMPL(notify_sync_releasing)\ + ITT_STUB_NO_IMPL(memory_read)\ + ITT_STUB_NO_IMPL(memory_write)\ + ITT_STUB_NO_IMPL(memory_update)\ + ITT_STUB_NO_IMPL(state_get)\ + ITT_STUB_NO_IMPL(state_set)\ + ITT_STUB_NO_IMPL(obj_mode_set)\ + ITT_STUB_NO_IMPL(thr_mode_set)\ + ITT_STUB_NO_IMPL(counter_destroy)\ + ITT_STUB_NO_IMPL(counter_inc)\ + ITT_STUB_NO_IMPL(counter_inc_v3)\ +_AW(ITT_STUB_NO_IMPL, mark_create)\ +_AW(ITT_STUB_NO_IMPL, mark)\ + ITT_STUB_NO_IMPL(mark_off)\ +_AW(ITT_STUB_NO_IMPL, mark_global)\ + ITT_STUB_NO_IMPL(mark_global_off)\ + ITT_STUB_NO_IMPL(stack_caller_create)\ + ITT_STUB_NO_IMPL(stack_caller_destroy)\ + ITT_STUB_NO_IMPL(stack_callee_enter)\ + ITT_STUB_NO_IMPL(stack_callee_leave)\ + ITT_STUB_NO_IMPL(id_create_ex)\ + ITT_STUB_NO_IMPL(id_destroy_ex)\ + ITT_STUB_NO_IMPL(task_begin_fn_ex)\ + ITT_STUB_NO_IMPL(metadata_add_with_scope)\ +_AW(ITT_STUB_NO_IMPL, metadata_str_add_with_scope)\ +_AW(ITT_STUB_NO_IMPL, av_save) + +void FillApiList(__itt_api_info* api_list_ptr) { +#define ITT_STUB_IMPL(fn) if (0 == strcmp("__itt_" ITT_TO_STR(fn), api_list_ptr[i].name)) {*api_list_ptr[i].func_ptr = (void*)sea::fn; continue;} // NOLINT +#define ITT_STUB_IMPL_ORIG(name) ITT_STUB_IMPL(name) +#ifdef _DEBUG //dangerous stub that doesn't return anything (even when expected) but records the function call for statistics sake + #define ITT_STUB_NO_IMPL(fn) if (0 == strcmp("__itt_" ITT_TO_STR(fn), api_list_ptr[i].name)) { \ + struct local{ \ + static void stub(...) { CIttFnStat oIttFnStat("NO IMPL:\t" ITT_TO_STR(fn)); } \ + }; \ + *api_list_ptr[i].func_ptr = reinterpret_cast(local::stub); \ + continue; \ + } +#else + #define ITT_STUB_NO_IMPL(fn) +#endif + + for (int i = 0; (api_list_ptr[i].name != NULL) && (*api_list_ptr[i].name != 0); ++i) { + API_MAP(); //continue is called inside when function is found + VerbosePrint("Not bound: %s\n", api_list_ptr[i].name); + } +#undef ITT_STUB_IMPL +#undef ITT_STUB_IMPL_ORIG +#undef ITT_STUB_NO_IMPL +} + +uint64_t GetFeatureSet() { + static std::string env = get_environ_value("INTEL_SEA_FEATURES"); + static std::string save = GetSavePath(); + + static uint64_t features = + (std::string::npos != env.find("mfp") ? sfMetricsFrameworkPublisher : 0) + | + (std::string::npos != env.find("mfc") ? sfMetricsFrameworkConsumer : 0) + | + (save.size() ? sfSEA : 0) + | + (std::string::npos != env.find("stack") ? sfStack : 0) + | + (std::string::npos != env.find("vscv") ? sfConcurrencyVisualizer : 0) + | + (std::string::npos != env.find("rmtr") ? sfRemotery : 0) + | + (std::string::npos != env.find("brflr") ? sfBrofiler : 0) + | + (std::string::npos != env.find("memstat") ? sfMemStat : 0) + | + (std::string::npos != env.find("memcount") ? sfMemCounters : 0) + | + (std::string::npos != env.find("rad") ? sfRadTelemetry : 0); + return features; +} + +void TraverseDomains(const std::function& callback) { + __itt_global* pGlobal = GetITTGlobal(); + for (___itt_domain* pDomain = pGlobal->domain_list; pDomain; pDomain = pDomain->next) { + callback(*pDomain); + } +} + +void TraverseThreadRecords(const std::function& callback) { + TraverseDomains( + [&](___itt_domain& domain){ + if (DomainExtra* pDomainExtra = reinterpret_cast(domain.extra2)) { + for (SThreadRecord* pThreadRecord = pDomainExtra->pThreadRecords; pThreadRecord; pThreadRecord = pThreadRecord->pNext) + callback(*pThreadRecord); + } + }); +} + +void SetCutName(const std::string& name) { + CIttLocker lock; + g_spCutName = std::make_shared(Escape4Path(name)); + TraverseThreadRecords([](SThreadRecord& record){ + record.nSpeedupCounter = (std::numeric_limits::max)(); //changing number is safer than changing pointer to last recorder + }); +} + +//in global scope variables are initialized from main thread +//that's the simplest way to get tid of Main Thread +CTraceEventFormat::SRegularFields g_rfMainThread = CTraceEventFormat::GetRegularFields(); + +void SetFolder(const std::string& path) { + CIttLocker lock; + + std::string new_path = path.size() ? (path + "-" + std::to_string(CTraceEventFormat::GetRegularFields().pid) + "/") : ""; + + if (g_savepath == new_path) + return; + + //To move into a new folder we must make sure next things: + //1. per thread files are closed and reopened with new folder + //2. strings are reported to new folder + //3. domain paths are updated, so that any newly created files would be in right place + //4. modules are reported to new folder + //5. write process info to the new trace + + g_savepath = new_path; + + for (__itt_global* pGlobal = GetITTGlobal(); pGlobal; pGlobal = pGlobal->next) { + ReportModule(pGlobal); //4. we move to new folder and need to notify modules there + + for (___itt_domain* pDomain = pGlobal->domain_list; pDomain; pDomain = pDomain->next) { + DomainExtra* pDomainExtra = reinterpret_cast(pDomain->extra2); + if (pDomainExtra) { + pDomainExtra->strDomainPath = g_savepath.size() ? GetDir(g_savepath, Escape4Path(pDomain->nameA)) : ""; //3. + pDomainExtra->bHasDomainPath = !pDomainExtra->strDomainPath.empty(); + for (SThreadRecord* pThreadRecord = pDomainExtra->pThreadRecords; pThreadRecord; pThreadRecord = pThreadRecord->pNext) { + if (g_savepath.size()) { + pThreadRecord->bRemoveFiles = true; //1. on next attempt to get a file it will recreate all files with new paths + } else { + pThreadRecord->files.clear(); + } + } + } + } + + if (g_savepath.size()) { + for (___itt_string_handle* pString = pGlobal->string_list; pString; pString = pString->next) + sea::ReportString(const_cast<__itt_string_handle *>(pString)); //2. making string to be reported again - into the new folder + } + } + + if (g_savepath.size()) + GetSEARecorder().Init(g_rfMainThread); //5. + + if (g_savepath.size()) + g_features |= sfSEA; + else + g_features &=~sfSEA; +} + +void SetRing(uint64_t nanoseconds) { + if (g_nRingBuffer == nanoseconds) + return; + g_nRingBuffer = nanoseconds; + TraverseThreadRecords([](SThreadRecord& record){ + record.bRemoveFiles = true; + }); +} + +#ifdef __linux__ + bool WriteFTraceTimeSyncMarkers() { + int fd = open("/sys/kernel/debug/tracing/trace_marker", O_WRONLY); + if (-1 == fd) { + VerbosePrint("Warning: failed to access /sys/kernel/debug/tracing/trace_marker\n"); + return false; + } + for (size_t i = 0; i < 5; ++i) { + char buff[100] = {}; + int size = snprintf(buff, sizeof(buff), "IntelSEAPI_Time_Sync: %llu\n", (long long unsigned int)CTraceEventFormat::GetTimeNS()); + int res = write(fd, buff, (unsigned int)size); + if (-1 == res) return false; + } + close(fd); + return true; + } +#endif + +#ifdef __APPLE__ + bool WriteKTraceTimeSyncMarkers() { + for (size_t i = 0; i < 5; ++i) { + kdebug_signpost(APPSDBG_CODE(DBG_MACH_CHUD, 0x15EA), CTraceEventFormat::GetTimeNS(), 0x15EA15EA, 0x15EA15EA, 0x15EA15EA); + syscall(SYS_kdebug_trace, APPSDBG_CODE(DBG_MACH_CHUD, 0x15EA) | DBG_FUNC_NONE, CTraceEventFormat::GetTimeNS(), 0x15EA15EA, 0x15EA15EA, 0x15EA15EA); + } + return true; + } +#endif + +#ifdef _WIN32 + +typedef ULONG(__stdcall* TEtwNotificationRegister)( + LPCGUID Guid, + ULONG Type, + PVOID Callback, + PVOID Context, + REGHANDLE* RegHandle); + +TEtwNotificationRegister g_fnOrigEtwNotificationRegister = nullptr; + + ULONG _stdcall MyEtwNotificationRegister( + LPCGUID Guid, + ULONG Type, + PVOID Callback, + PVOID Context, + REGHANDLE* RegHandle) { + WCHAR strGuid[100] = {}; + StringFromGUID2(*Guid, strGuid, sizeof(strGuid) - 1); + char str[100] = {}; + sprintf_s(str, "%ls", strGuid); + VerbosePrint("\nEventRegister, provider: %s\n", str); + static __itt_string_handle* pKey = UNICODE_AGNOSTIC(string_handle_create)("EventRegister::Provider"); + WriteMeta(GetRegularFields(), pKey, str); + return g_fnOrigEtwNotificationRegister(Guid, Type, Callback, Context, RegHandle); +} +#endif + +void InitSEA() { + for (size_t i = 0; (i < MAX_HANDLERS) && g_handlers[i]; ++i) { + g_handlers[i]->Init(g_rfMainThread); + } +#ifdef __linux__ + WriteFTraceTimeSyncMarkers(); +#endif + + const char* relations[] = { + nullptr, + ("dependent_on"), /**< "A is dependent on B" means that A cannot start until B completes */ + ("sibling_of"), /**< "A is sibling of B" means that A and B were created as a group */ + ("parent_of"), /**< "A is parent of B" means that A created B */ + ("continuation_of"), /**< "A is continuation of B" means that A assumes the dependencies of B */ + ("child_of"), /**< "A is child of B" means that A was created by B (inverse of is_parent_of) */ + ("continued_by"), /**< "A is continued by B" means that B assumes the dependencies of A (inverse of is_continuation_of) */ + ("predecessor_to") /**< "A is predecessor to B" means that B cannot start until A completes (inverse of is_dependent_on) */ + }; + + size_t i = 0; + for (auto ptr : relations) + g_relations[i++] = ptr ? UNICODE_AGNOSTIC(string_handle_create)(ptr) : nullptr; + + + GetSEARecorder().Init(g_rfMainThread); + +#ifdef _WIN32 //adding information about process explicitly + ReportModule(GetModuleHandle(NULL)); +#else + //XXX ReportModule(dlopen(NULL, RTLD_LAZY)); +#endif +#if defined(_DEBUG) && defined(STANDARD_SOURCES) && 0 + void Test(); + Test(); +#endif +} + +void FinitaLaComedia() { + COverlapped::FinishAll(); + + for (size_t i = 0; (i < MAX_HANDLERS) && g_handlers[i]; ++i) { + delete g_handlers[i]; + g_handlers[i] = nullptr; + } + + { + CIttLocker locker; + if (sea::IsVerboseMode()) { + VerbosePrint("Call statistics:\n"); + const auto& map = CIttFnStat::GetStats(); + for (const auto& pair : map) { + VerbosePrint("%d\t%s\n", (int)pair.second, pair.first.c_str()); // NOLINT + } + } + + TraverseThreadRecords([](SThreadRecord& tr){tr.files.clear();}); + } +#ifdef __linux__ + WriteFTraceTimeSyncMarkers(); +#endif + + g_oDomainFilter.Finish(); +} +} //namespace sea + +extern "C" { + SEA_EXPORT void* itt_create_domain(const char* str) { + return UNICODE_AGNOSTIC(__itt_domain_create)(str); + } + SEA_EXPORT void* itt_create_string(const char* str) { + return UNICODE_AGNOSTIC(__itt_string_handle_create)(str); + } + SEA_EXPORT void itt_marker(void* domain, uint64_t id, void* name, int scope, uint64_t timestamp) { + __itt_marker_ex( + reinterpret_cast<__itt_domain*>(domain), + nullptr, //zero clock domain means that given time is already a correct timestamp + timestamp, + id ? __itt_id_make(domain, id) : __itt_null, + reinterpret_cast<__itt_string_handle*>(name), + (__itt_scope)scope); + } + + SEA_EXPORT void itt_task_begin(void* domain, uint64_t id, uint64_t parent, void* name, uint64_t timestamp) { + __itt_task_begin_ex( + reinterpret_cast<__itt_domain*>(domain), + nullptr, + timestamp, + id ? __itt_id_make(domain, id) : __itt_null, + parent ? __itt_id_make(domain, parent) : __itt_null, + reinterpret_cast<__itt_string_handle*>(name)); + } + + SEA_EXPORT void itt_task_begin_overlapped(void* domain, uint64_t id, uint64_t parent, void* name, uint64_t timestamp) { + __itt_task_begin_overlapped_ex( + reinterpret_cast<__itt_domain*>(domain), + nullptr, + timestamp, + __itt_id_make(domain, id), + parent ? __itt_id_make(domain, parent) : __itt_null, + reinterpret_cast<__itt_string_handle*>(name)); + } + + + SEA_EXPORT void itt_metadata_add(void* domain, uint64_t id, void* name, double value) { + __itt_metadata_add( + reinterpret_cast<__itt_domain*>(domain), + id ? __itt_id_make(domain, id) : __itt_null, + reinterpret_cast<__itt_string_handle*>(name), + __itt_metadata_double, 1, + &value); + } + + SEA_EXPORT void itt_metadata_add_str(void* domain, uint64_t id, void* name, const char* value) { + __itt_metadata_str_add( + reinterpret_cast<__itt_domain*>(domain), + id ? __itt_id_make(domain, id) : __itt_null, + reinterpret_cast<__itt_string_handle*>(name), + value, + 0); + } + + SEA_EXPORT void itt_metadata_add_blob(void* domain, uint64_t id, void* name, const void* value, uint32_t size) { + __itt_metadata_add( + reinterpret_cast<__itt_domain*>(domain), + id ? __itt_id_make(domain, id) : __itt_null, + reinterpret_cast<__itt_string_handle*>(name), + __itt_metadata_unknown, size, + const_cast(value)); + } + + SEA_EXPORT void itt_task_end(void* domain, uint64_t timestamp) { + __itt_task_end_ex( + reinterpret_cast<__itt_domain*>(domain), + nullptr, + timestamp); + } + + SEA_EXPORT void itt_task_end_overlapped(void* domain, uint64_t timestamp, uint64_t taskid) { + __itt_task_end_overlapped_ex( + reinterpret_cast<__itt_domain*>(domain), + nullptr, + timestamp, + __itt_id_make(domain, taskid)); + } + + SEA_EXPORT void* itt_counter_create(void* domain, void* name) { + return __itt_counter_create_typed( + reinterpret_cast<__itt_string_handle*>(name)->strA, + reinterpret_cast<__itt_domain*>(domain)->nameA, + __itt_metadata_u64); + } + + SEA_EXPORT void itt_set_counter(void* id, double value, uint64_t timestamp) { + __itt_counter_set_value_ex(reinterpret_cast<__itt_counter>(id), nullptr, timestamp, &value); + } + + SEA_EXPORT void* itt_create_track(const char* group, const char* track) { + return __itt_track_create( + __itt_track_group_create(((group) ? __itt_string_handle_create(group) : nullptr), __itt_track_group_type_normal), + __itt_string_handle_create(track), + __itt_track_type_normal); + } + + SEA_EXPORT void itt_set_track(void* track) { + __itt_set_track(reinterpret_cast<__itt_track*>(track)); + } + + SEA_EXPORT uint64_t itt_get_timestamp() { + return (uint64_t)__itt_get_timestamp(); + } + + SEA_EXPORT void itt_write_time_sync_markers() { +#ifdef __linux__ + sea::WriteFTraceTimeSyncMarkers(); +#endif +#ifdef __APPLE__ + sea::WriteKTraceTimeSyncMarkers(); +#endif + } +}; diff --git a/thirdparty/itt_collector/sea_itt_lib/IttNotifyStdSrc.h b/thirdparty/itt_collector/sea_itt_lib/IttNotifyStdSrc.h new file mode 100644 index 00000000000000..a50ef9a16c4114 --- /dev/null +++ b/thirdparty/itt_collector/sea_itt_lib/IttNotifyStdSrc.h @@ -0,0 +1,326 @@ +/********************************************************************************************************************************************************************************************************************************************************************************************* +# Intel® Single Event API +# +# This file is provided under the BSD 3-Clause license. +# Copyright (c) 2021, Intel Corporation +# All rights reserved. +# +# Redistribution and use in source and binary forms, with or without modification, are permitted provided that the following conditions are met: +# Redistributions of source code must retain the above copyright notice, this list of conditions and the following disclaimer. +# Redistributions in binary form must reproduce the above copyright notice, this list of conditions and the following disclaimer in the documentation and/or other materials provided with the distribution. +# Neither the name of the Intel Corporation nor the names of its contributors may be used to endorse or promote products derived from this software without specific prior written permission. +# +# THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. +# IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) +# HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. +# +**********************************************************************************************************************************************************************************************************************************************************************************************/ + +#pragma once + +#define INTEL_ITTNOTIFY_API_PRIVATE + +#include "ittnotify.h" +#include "ittnotify_config.h" + +#ifdef _WIN32 + #define SEA_EXPORT __declspec(dllexport) + #define _sprintf sprintf_s +#else + #define SEA_EXPORT __attribute__ ((visibility ("default"))) + #define _sprintf sprintf +#endif + +namespace sea { + bool IsVerboseMode(); +} + +#if defined(_WIN32) + #define VerbosePrint(...) { \ + if (sea::IsVerboseMode()) { \ + std::vector buff(1024); \ + sprintf_s(buff.data(), 1024, __VA_ARGS__); \ + OutputDebugStringA(buff.data()); \ + printf("%s", buff.data()); \ + } \ + } +#else + #define VerbosePrint(...) { \ + if (sea::IsVerboseMode()) \ + printf(__VA_ARGS__); \ + } +#endif + +#include "Utils.h" +#include "TraceEventFormat.h" +#include +#include + + +__itt_global* GetITTGlobal(); +extern __itt_domain* g_pIntelSEAPIDomain; + + +namespace sea { +extern std::string g_savepath; +extern uint64_t g_nAutoCut; +#ifdef __linux +bool WriteFTraceTimeSyncMarkers(); //For Driver instrumentation see: http://lwn.net/Articles/379903/ +#endif +void InitSEA(); +void FillApiList(__itt_api_info* pApiInfo); +void FinitaLaComedia(); +void Counter(const __itt_domain *pDomain, + __itt_string_handle *pName, + double value, + __itt_clock_domain* clock_domain = nullptr, + unsigned long long timestamp = 0); +__itt_clock_domain* clock_domain_create(__itt_get_clock_info_fn fn, void* fn_data); +void SetCutName(const std::string& path); +void SetFolder(const std::string& path); +void SetRing(uint64_t nanoseconds); +const char* GetProcessName(bool bFullPath); +void FixCounter(__itt_counter_info_t* pCounter); +struct SModuleInfo { + void* base; + size_t size; + std::string path; +}; +SModuleInfo Fn2Mdl(void* fn); +std::string GetDir(std::string path, const std::string& append = ""); +} // namespace sea + +struct SDomainName { + __itt_domain *pDomain; + __itt_string_handle *pName; +}; + +struct ___itt_counter : public __itt_counter_info_t{}; + +#include +#define USE_PROBES + +#ifdef _WIN32 + #include "windows.h" + #include "IntelSEAPI.h" +#elif defined(__linux__) + #ifndef USE_PROBES + __thread FILE* stdsrc_trace_info_t::pFile = nullptr; + #endif +#endif + +#ifdef _WIN32 + #define UNICODE_AGNOSTIC(name) name##A + inline std::string W2L(const wchar_t* wstr) { + size_t len = lstrlenW(wstr); + char* dest = (char*)alloca(len + 2); // NOLINT + errno_t err = wcstombs_s(&len, dest, len + 1, wstr, len + 1); + return std::string(dest, dest + len); + } + + static_assert(sizeof(__itt_id) == 24, "sizeof(__itt_id) == 24"); + static_assert(sizeof(GUID) == 16, "sizeof(GUID) == 16"); + + union IdCaster { + __itt_id from; //d3 is not used, so we fit d1 and d2 into 16 bytes + GUID to; + }; + + //http://www.geoffchappell.com/studies/windows/win32/ntdll/api/etw/eventwritefull.htm + //http://helpdoc-online.com/Microsoft_Platform_SDK_August_2001_Performance_Monitoring_en/Tracing_Event_Instances.php + typedef NTSYSAPI NTSTATUS (NTAPI * FZwTraceEvent)(IN ULONG TraceHandle, IN ULONG Flags, IN ULONG TraceHeaderLength, IN PEVENT_TRACE_HEADER TraceHeader); + +#else + #include + #define _strdup strdup + #define UNICODE_AGNOSTIC(name) name +#endif + +namespace sea { +__itt_counter UNICODE_AGNOSTIC(counter_create)(const char *name, const char *domain); +__itt_domain* UNICODE_AGNOSTIC(domain_create)(const char* name); +__itt_string_handle* ITTAPI UNICODE_AGNOSTIC(string_handle_create)(const char* name); + +enum SEAFeature { + sfSEA = 0x1, + sfSystrace = 0x2, + sfMetricsFrameworkPublisher = 0x4, + sfMetricsFrameworkConsumer = 0x8, + sfStack = 0x10, + sfConcurrencyVisualizer = 0x20, + sfRemotery = 0x40, + sfBrofiler = 0x80, + sfMemStat = 0x100, + sfMemCounters = 0x200, + sfRadTelemetry = 0x400, +}; + +uint64_t GetFeatureSet(); +CTraceEventFormat::SRegularFields GetRegularFields(__itt_clock_domain* clock_domain = nullptr, unsigned long long timestamp = 0); + +struct SThreadRecord; + +static const size_t MAX_HANDLERS = 10; + +struct STaskDescriptor { + STaskDescriptor* prev; + CTraceEventFormat::SRegularFields rf; + const __itt_domain *pDomain; + const __itt_string_handle *pName; + __itt_id id; + __itt_id parent; + void* fn; + struct SCookie { + void* pCookie; + void (*Deleter)(void*); + }; + SCookie cookies[MAX_HANDLERS]; + +#ifdef TURBO_MODE + uint64_t nMemCounter; + double *pDur; +#endif + + ~STaskDescriptor() { + for (size_t i = 0; i < MAX_HANDLERS; ++i) { + if (!cookies[i].pCookie) + continue; + cookies[i].Deleter(cookies[i].pCookie); + cookies[i].pCookie = nullptr; + } + } +}; + + +struct IHandler { +protected: + static bool RegisterHandler(IHandler* pHandler); + size_t m_cookie = ~0x0; + void SetCookieIndex(size_t cookie) { + m_cookie = cookie; + } + + template + T& Cookie(STaskDescriptor& oTask, TArgs&... args) { + if (!oTask.cookies[m_cookie].pCookie) { + struct SDeleter { + static void Deleter(void* ptr) { + placement_free(reinterpret_cast(ptr)); + } + }; + oTask.cookies[m_cookie] = STaskDescriptor::SCookie{placement_new(T)(args...), SDeleter::Deleter}; //consider placement new here! + } + return *reinterpret_cast(oTask.cookies[m_cookie].pCookie); + } + + const char* GetScope(__itt_scope theScope) { + static const char * scopes[] = { + "unknown", + "global", + "track_group", + "track", + "task", + "marker" + }; + + return scopes[theScope]; + } + +public: + struct SData { + CTraceEventFormat::SRegularFields rf; + SThreadRecord* pThreadRecord; + const __itt_domain *pDomain; + const __itt_id& taskid; + const __itt_id& parentid; + const __itt_string_handle *pName; + }; + + template + static T* Register(bool bRegister) { + T* pObject = nullptr; +#ifndef _DEBUG //register all in debug to discover all problems sooner + if (bRegister) //NOLINT +#endif + { + pObject = new T(); + if (!RegisterHandler(pObject)) { + assert(false); + delete pObject; + return nullptr; + } + } + return pObject; + } + + virtual void Init(const CTraceEventFormat::SRegularFields& main) {} + virtual void TaskBegin(STaskDescriptor& oTask, bool bOverlapped) {} + virtual void AddArg(STaskDescriptor& oTask, const __itt_string_handle *pKey, const char *data, size_t length) {} + virtual void AddArg(STaskDescriptor& oTask, const __itt_string_handle *pKey, double value) {} + virtual void AddRelation(const CTraceEventFormat::SRegularFields& rf, + const __itt_domain *pDomain, + __itt_id head, + __itt_string_handle* relation, + __itt_id tail) {} + virtual void TaskEnd(STaskDescriptor& oTask, const CTraceEventFormat::SRegularFields& rf, bool bOverlapped) {} + virtual void Marker(const CTraceEventFormat::SRegularFields& rf, const __itt_domain *pDomain, __itt_id id, __itt_string_handle *pName, __itt_scope scope) {} + virtual void CreateCounter(const __itt_counter& id) {} + virtual void Counter(const CTraceEventFormat::SRegularFields& rf, const __itt_domain *pDomain, const __itt_string_handle *pName, double value) {} + virtual void SetThreadName(const CTraceEventFormat::SRegularFields& rf, const char* name) {} + virtual void Alloc(const CTraceEventFormat::SRegularFields& rf, const void* addr, size_t size, const char* domain, const char* name) {} + virtual void Free(const CTraceEventFormat::SRegularFields& rf, const void* addr, size_t size, const char* domain, const char* name) {} + + virtual ~IHandler(){} +}; + +class COverlapped; + +struct SThreadRecord { + std::map files; + bool bRemoveFiles = false; + __itt_track* pTrack = nullptr; + SThreadRecord* pNext = nullptr; + STaskDescriptor* pTask = nullptr; + COverlapped* pOverlapped = nullptr; + bool bAllocRecursion = false; + void* pLastRecorder = nullptr; + const void* pLastDomain = nullptr; + int nSpeedupCounter = 0; +#ifdef TURBO_MODE + uint64_t nMemMoveCounter = 0; //updated every time memory window moves +#endif // TURBO_MODE +}; + +void TraverseDomains(const std::function& callback); +void TraverseThreadRecords(const std::function& callback); + + +void InitDomain(__itt_domain* pDomain); + +struct DomainExtra { + std::string strDomainPath; //always changed and accessed under lock + bool bHasDomainPath = false; //for light check of strDomainPath.empty() without lock + SThreadRecord* pThreadRecords = nullptr; //keeping track of thread records for later freeing + __itt_clock_domain* pClockDomain = nullptr; + __itt_track_group* pTrackGroup = nullptr; +}; + +SThreadRecord* GetThreadRecord(); + +#define CHECKRET(cond, res) {if (!(cond)) {VerbosePrint("Error: !(%s) at %s, %s:(%d)\n", #cond, __FUNCTION__, __FILE__, __LINE__); return res;}} + + +class CIttLocker { + __itt_global* m_pGlobal = nullptr; +public: + CIttLocker(); + ~CIttLocker(); +}; + +#ifdef _WIN32 + const uint32_t FilePermissions = _S_IWRITE|_S_IWRITE; //read by user, write by user +#else + const uint32_t FilePermissions = S_IRWXU | S_IRWXG | S_IRWXO; //read by all, write by all +#endif + +} //namespace sea diff --git a/thirdparty/itt_collector/sea_itt_lib/Recorder.cpp b/thirdparty/itt_collector/sea_itt_lib/Recorder.cpp new file mode 100644 index 00000000000000..1f40dc55978236 --- /dev/null +++ b/thirdparty/itt_collector/sea_itt_lib/Recorder.cpp @@ -0,0 +1,589 @@ +/********************************************************************************************************************************************************************************************************************************************************************************************* +# Intel® Single Event API +# +# This file is provided under the BSD 3-Clause license. +# Copyright (c) 2021, Intel Corporation +# All rights reserved. +# +# Redistribution and use in source and binary forms, with or without modification, are permitted provided that the following conditions are met: +# Redistributions of source code must retain the above copyright notice, this list of conditions and the following disclaimer. +# Redistributions in binary form must reproduce the above copyright notice, this list of conditions and the following disclaimer in the documentation and/or other materials provided with the distribution. +# Neither the name of the Intel Corporation nor the names of its contributors may be used to endorse or promote products derived from this software without specific prior written permission. +# +# THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. +# IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) +# HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. +# +**********************************************************************************************************************************************************************************************************************************************************************************************/ + +#include "IttNotifyStdSrc.h" +#include +#include +#include +#include + +#ifdef _WIN32 +#include +#include +#include + + +#define open crossopen +#define write _write +#define close _close +int crossopen(_In_z_ const char * _Filename, _In_ int _Openflag, int perm) { + int fd = 0; + _sopen_s(&fd, _Filename, _Openflag|_O_BINARY, _SH_DENYWR, perm); + return fd; +} +//FIXME: support wide char mode +#endif + +CRecorder::CRecorder() + : m_pCurPos(nullptr) {} + +size_t ChunkSize = 1*1020*1024; + +bool CRecorder::Init(const std::string& path, uint64_t time, void* pCut) { + Close(true); + m_path = path; +#ifdef IN_MEMORY_RING + m_pCurPos = m_pAlloc = VirtualAlloc(nullptr, m_nBufferSize, MEM_COMMIT | MEM_RESERVE, PAGE_READWRITE); +#else + m_memmap.reset(new CMemMap(path, ChunkSize)); + m_pCurPos = m_memmap->GetPtr(); +#endif + m_nWroteTotal = 0; + m_time = time; + ++m_counter; + m_pCut = pCut; + return !!m_pCurPos; +} + +size_t CRecorder::CheckCapacity(size_t size) { +#ifdef IN_MEMORY_RING + size_t nWroteBytes = (char*)m_pCurPos - (char*)m_pAlloc; // NOLINT + if (nWroteBytes + size > m_nBufferSize) { + if (m_pBackBuffer) + VirtualFree(m_pBackBuffer, 0, MEM_RELEASE); + m_nBufferSize *= 2; //We grow the buffer each time to accommodate needs + m_pBackBuffer = m_pAlloc; //back buffer will always be half of m_nBufferSize + m_nBackSize = nWroteBytes; + m_pCurPos = m_pAlloc = VirtualAlloc(nullptr, m_nBufferSize, MEM_COMMIT | MEM_RESERVE, PAGE_READWRITE); + sea::GetThreadRecord()->nMemMoveCounter += 1; + if (!m_pCurPos) + return 0; + } +#else + if (!m_memmap) + return 0; + size_t nWroteBytes = (char*)m_pCurPos - (char*)m_memmap->GetPtr(); // NOLINT + if (nWroteBytes + size > m_memmap->GetSize()) { + m_pCurPos = m_memmap->Remap((std::max)(ChunkSize, size), m_nWroteTotal); +#ifdef TURBO_MODE + sea::GetThreadRecord()->nMemMoveCounter += 1; +#endif + if (!m_pCurPos) + return 0; + } +#endif + return (std::max)(m_nWroteTotal, 1); +} + +void* CRecorder::Allocate(size_t size) { + //must be called only from one thread + void * pCurPos = m_pCurPos; + m_nWroteTotal += size; + m_pCurPos = (char*)m_pCurPos + size; // NOLINT + return pCurPos; +} + +void CRecorder::Close(bool bSave) { +#ifdef TURBO_MODE + sea::GetThreadRecord()->nMemMoveCounter += 1; +#endif +#ifdef IN_MEMORY_RING + if (bSave) { + int fd = open(m_path.c_str(), O_WRONLY | O_CREAT | O_EXCL, sea::FilePermissions); + int res = 0; + if (m_pBackBuffer) + res = write(fd, m_pBackBuffer, uint32_t(m_nBackSize)); + if (m_pAlloc) + res = write(fd, m_pAlloc, uint32_t((char*)m_pCurPos - (char*)m_pAlloc)); // NOLINT + close(fd); + } + if (m_pBackBuffer) + VirtualFree(m_pBackBuffer, 0, MEM_RELEASE); + if (m_pAlloc) + VirtualFree(m_pAlloc, 0, MEM_RELEASE); + m_pBackBuffer = m_pAlloc = nullptr; +#else // IN_MEMORY_RING + if (m_memmap) + m_memmap->Resize(m_nWroteTotal); + m_memmap.reset(); +#endif // IN_MEMORY_RING + m_pCurPos = nullptr; +} + +CRecorder::~CRecorder() { + Close(true); +} + +static_assert(sizeof(__itt_id) == 3*8, "sizeof(__itt_id) must be 3*8"); +static_assert(sizeof(CTraceEventFormat::SRegularFields().tid) == 8, "sizeof(tid) must be 8"); + +enum EFlags { + efHasId = 0x1, + efHasParent = 0x2, + efHasName = 0x4, + efHasTid = 0x8, + efHasData = 0x10, + efHasDelta = 0x20, + efHasFunction = 0x40, + efHasPid = 0x80, +}; + +#pragma pack(push, 1) +//File tree is pid/domain/tid (pid is one per dll instance) +struct STinyRecord { + uint64_t timestamp; + ERecordType ert; + uint8_t flags; //EFlags +}; +#pragma pack(pop) + +static_assert(sizeof(STinyRecord) == 10, "SRecord must fit in 10 bytes"); + +template +inline T* WriteToBuff(CRecorder& recorder, const T& value) { + T* ptr = (T*)recorder.Allocate(sizeof(T)); // NOLINT + if (ptr) + *ptr = value; + return ptr; +} + +namespace sea { + + extern int64_t g_nRingBuffer; + + extern std::shared_ptr g_spCutName; + + inline CRecorder* GetFile(const SRecord& record) { + DomainExtra* pDomainExtra = reinterpret_cast(record.domain.extra2); + if (!pDomainExtra || !pDomainExtra->bHasDomainPath) + return nullptr; + + static thread_local SThreadRecord* pThreadRecord = nullptr; + if (!pThreadRecord) + pThreadRecord = GetThreadRecord(); + + if (pThreadRecord->bRemoveFiles) { + pThreadRecord->pLastRecorder = nullptr; + pThreadRecord->pLastDomain = nullptr; + pThreadRecord->bRemoveFiles = false; + pThreadRecord->files.clear(); + } + //with very high probability the same thread will write into the same domain + if (pThreadRecord->pLastRecorder && (pThreadRecord->pLastDomain == record.domain.nameA) && (100 > pThreadRecord->nSpeedupCounter++)) + return reinterpret_cast(pThreadRecord->pLastRecorder); + pThreadRecord->nSpeedupCounter = 0; //we can't avoid checking ring size + pThreadRecord->pLastDomain = record.domain.nameA; + + auto it = pThreadRecord->files.find(record.domain.nameA); + CRecorder* pRecorder = nullptr; + if (it != pThreadRecord->files.end()) { + pRecorder = &it->second; + int64_t diff = record.rf.nanoseconds - pRecorder->GetCreationTime(); //timestamp can be in the past, it's ok + // just checking pointer of g_spCutName.get() is thread safe without any locks: we don't access internals. + // And if it's the same we work with the old path. + // but if it's changed we will lock and access the value below + bool bSameCut = pRecorder->SameCut(g_spCutName.get()); + if (bSameCut && (!g_nRingBuffer || (diff < g_nRingBuffer))) { + pThreadRecord->pLastRecorder = pRecorder; + return pRecorder; //normal flow + } + pRecorder->Close(!bSameCut); //time to create new file + } + + if (!pRecorder) { + pRecorder = &pThreadRecord->files[record.domain.nameA]; + } + CIttLocker lock; //locking only on file creation + //this is theoretically possible because we check pDomainExtra->bHasDomainPath without lock above + if (pDomainExtra->strDomainPath.empty()) { + pThreadRecord->pLastRecorder = nullptr; + return nullptr; + } + std::shared_ptr spCutName = g_spCutName; + + CTraceEventFormat::SRegularFields rf = CTraceEventFormat::GetRegularFields(); + char path[1024] = {}; + _sprintf(path, "%s%llu%s%s.sea", + pDomainExtra->strDomainPath.c_str(), + (unsigned long long)rf.tid, + spCutName ? (std::string("!") + *spCutName).c_str() : "", + (g_nRingBuffer ? ((pRecorder->GetCount() % 2) ? "-1" : "-0") : "")); + try { + VerbosePrint("Opening: %s\n", path); + if (!pRecorder->Init(path, rf.nanoseconds, spCutName.get())) { + VerbosePrint("Failed to init recorder\n"); + pThreadRecord->files.erase(record.domain.nameA); + pRecorder = nullptr; + } + } catch (const std::exception& exc) { + VerbosePrint("Exception: %s\n", exc.what()); + pThreadRecord->files.erase(record.domain.nameA); + pRecorder = nullptr; + } + pThreadRecord->pLastRecorder = pRecorder; + return pRecorder; + } +} // namespace sea + +double* WriteRecord(ERecordType type, const SRecord& record) { + CRecorder* pFile = sea::GetFile(record); + if (!pFile) return nullptr; + + CRecorder& stream = *pFile; + + const size_t MaxSize = sizeof(STinyRecord) + 2*sizeof(__itt_id) + 3*sizeof(uint64_t) + sizeof(double) + sizeof(void*); + size_t size = stream.CheckCapacity(MaxSize + record.length); + if (!size) + return nullptr; + + STinyRecord* pRecord = WriteToBuff(stream, STinyRecord{record.rf.nanoseconds, type}); + if (!pRecord) return nullptr; + + struct ShortId { unsigned long long a, b; }; + if (record.taskid.d1) { + WriteToBuff(stream, *(ShortId*)&record.taskid); // NOLINT + pRecord->flags |= efHasId; + } + + if (record.parentid.d1) { + WriteToBuff(stream, *(ShortId*)&record.parentid); // NOLINT + pRecord->flags |= efHasParent; + } + + if (record.pName) { + WriteToBuff(stream, (uint64_t)record.pName); + pRecord->flags |= efHasName; + } + + if ((long long)record.rf.tid < 0) { + WriteToBuff(stream, record.rf.tid); + pRecord->flags |= efHasTid; + } + + if (record.pData) { + WriteToBuff(stream, (uint64_t)record.length); + + void* ptr = stream.Allocate(record.length); + memcpy(ptr, record.pData, (unsigned int)record.length); + + pRecord->flags |= efHasData; + } + + double* pDelta = nullptr; + if (record.pDelta) { + pDelta = WriteToBuff(stream, *record.pDelta); + pRecord->flags |= efHasDelta; + } + + if (record.function) { + WriteToBuff(stream, (uint64_t)record.function); + pRecord->flags |= efHasFunction; + } + + if ((long long)record.rf.pid < 0) { + WriteToBuff(stream, record.rf.pid); + pRecord->flags |= efHasPid; + } + + if (sea::g_nAutoCut && (size >= sea::g_nAutoCut)) { + static size_t autocut = 0; + sea::SetCutName(std::string("autocut#") + std::to_string(autocut++)); + } + + return pDelta; +} + +CMemMap::CMemMap(const std::string &path, size_t size, size_t offset) { +#ifdef _WIN32 + m_hFile = CreateFile(path.c_str(), GENERIC_READ | GENERIC_WRITE, FILE_SHARE_READ, NULL, + CREATE_ALWAYS, FILE_ATTRIBUTE_TEMPORARY | FILE_FLAG_SEQUENTIAL_SCAN, NULL); + if (INVALID_HANDLE_VALUE == m_hFile) { + m_hFile = NULL; + throw std::runtime_error("Failed to open file: " + path + " err=" + std::to_string(GetLastError())); + } +#else + m_fdin = open(path.c_str(), O_CREAT|O_TRUNC|O_RDWR, sea::FilePermissions); + if (-1 == m_fdin) { + m_fdin = 0; + throw std::runtime_error("Failed to open file: " + path + " err=" + std::to_string(errno)); + } +#endif + Remap(size, offset); +} + +void* CMemMap::Remap(size_t size, size_t offset) { + Resize(size + offset); + static const size_t PageSize = GetMemPageSize(); + size_t nRoundOffset = offset / PageSize * PageSize; //align by memory page size + m_size = size + offset % PageSize; +#ifdef _WIN32 + m_hMapping = CreateFileMapping(m_hFile, NULL, PAGE_READWRITE, 0, 0, NULL); + ULARGE_INTEGER uliOffset = {}; + uliOffset.QuadPart = nRoundOffset; + m_pView = ::MapViewOfFile(m_hMapping, FILE_MAP_WRITE, uliOffset.HighPart, uliOffset.LowPart, m_size); +#else + m_pView = mmap(0, m_size, PROT_READ|PROT_WRITE, MAP_SHARED, m_fdin, nRoundOffset); + if (m_pView == MAP_FAILED) + throw std::runtime_error("Failed to map file: err=" + std::to_string(errno)); + +#endif + return (char*)m_pView + offset % PageSize; // NOLINT +} + +void CMemMap::Unmap() { +#ifdef _WIN32 + if (m_pView) { + UnmapViewOfFile(m_pView); + m_pView = nullptr; + } + if (m_hMapping) { + CloseHandle(m_hMapping); + m_hMapping = nullptr; + } +#else + if (m_pView) { + munmap(m_pView, m_size); + m_pView = nullptr; + } +#endif +} + +bool CMemMap::Resize(size_t size) { + Unmap(); +#ifdef _WIN32 + //resize + LARGE_INTEGER liSize = {}; + liSize.QuadPart = size; + return SetFilePointerEx(m_hFile, liSize, nullptr, FILE_BEGIN) && ::SetEndOfFile(m_hFile); +#else + return 0 == ftruncate(m_fdin, size); +#endif +} + +CMemMap::~CMemMap() { + Unmap(); +#ifdef _WIN32 + if (m_hMapping) { + CloseHandle(m_hMapping); + } + if (m_hFile) { + CloseHandle(m_hFile); + } +#else + if (m_fdin) { + close(m_fdin); + } +#endif +} + +using namespace sea; +const bool g_bWithStacks = !!(GetFeatureSet() & sfStack); + + +void WriteMeta(const CTraceEventFormat::SRegularFields& main, __itt_string_handle* pKey, const char* name, double* pDelta) { + WriteRecord(ERecordType::Metadata, SRecord{ main, *g_pIntelSEAPIDomain, __itt_null, __itt_null, pKey, pDelta, name, strlen(name) }); +} + + +class CSEARecorder: public IHandler { + void Init(const CTraceEventFormat::SRegularFields& main) override { + //write process name into trace + __itt_string_handle* pKey = UNICODE_AGNOSTIC(string_handle_create)("__process__"); + const char * name = GetProcessName(true); + + double delta = -1;//sort order - highest for processes written thru SEA + WriteMeta(main, pKey, name, &delta); + + if (!g_savepath.empty()) { + std::ofstream ss(GetDir(g_savepath) + "process.dct"); + ss << "{"; + ss << "'time_freq':" << GetTimeFreq(); +#if INTPTR_MAX == INT64_MAX + ss << ", 'bits':64"; +#else + ss << ", 'bits':32"; +#endif + ss << "}"; + } + } + + void TaskBegin(STaskDescriptor& oTask, bool bOverlapped) override { + const char *pData = nullptr; + size_t length = 0; + if (g_bWithStacks) { + static thread_local TStack* pStack = nullptr; + if (!pStack) + pStack = (TStack*)malloc(sizeof(TStack)); // NOLINT + length = (GetStack(*pStack) - 2) * sizeof(void*); + pData = reinterpret_cast(&(*pStack)[2]); + } +#ifdef TURBO_MODE + double duration = 0; + oTask.pDur = WriteRecord(bOverlapped ? ERecordType::BeginOverlappedTask : ERecordType::BeginTask, SRecord { + oTask.rf, *oTask.pDomain, oTask.id, oTask.parent, oTask.pName, &duration, pData, length, oTask.fn }); + oTask.nMemCounter = GetThreadRecord()->nMemMoveCounter; +#else + WriteRecord(bOverlapped ? ERecordType::BeginOverlappedTask : ERecordType::BeginTask, SRecord { + oTask.rf, *oTask.pDomain, oTask.id, oTask.parent, oTask.pName, nullptr, pData, length, oTask.fn }); +#endif + } + + void AddArg(STaskDescriptor& oTask, const __itt_string_handle *pKey, const char *data, size_t length) override { + WriteRecord(ERecordType::Metadata, SRecord{oTask.rf, *oTask.pDomain, oTask.id, __itt_null, pKey, nullptr, data, length}); +#ifdef TURBO_MODE + oTask.pDur = nullptr; //for now we don't support turbo tasks with arguments. But if count of arguments was saved it could work. +#endif + } + + void AddArg(STaskDescriptor& oTask, const __itt_string_handle *pKey, double value) override { + WriteRecord(ERecordType::Metadata, SRecord{ oTask.rf, *oTask.pDomain, oTask.id, __itt_null, pKey, &value}); +#ifdef TURBO_MODE + oTask.pDur = nullptr; //for now we don't support turbo tasks with arguments. But if count of arguments was saved it could work. +#endif + } + + void AddRelation(const CTraceEventFormat::SRegularFields& rf, + const __itt_domain *pDomain, + __itt_id head, + __itt_string_handle* relation, + __itt_id tail) override { + WriteRecord(ERecordType::Relation, SRecord{ rf, *pDomain, head, tail, relation}); + } + + void TaskEnd(STaskDescriptor& oTask, const CTraceEventFormat::SRegularFields& rf, bool bOverlapped) override { +#ifdef TURBO_MODE + if (oTask.pDur && (oTask.nMemCounter == GetThreadRecord()->nMemMoveCounter)) + *oTask.pDur = double(rf.nanoseconds - oTask.rf.nanoseconds); // NOLINT + else + WriteRecord(bOverlapped ? ERecordType::EndOverlappedTask : ERecordType::EndTask, SRecord { + rf, *oTask.pDomain, oTask.id, oTask.parent, oTask.pName, nullptr, nullptr, 0, oTask.fn }); +#else + WriteRecord(bOverlapped ? ERecordType::EndOverlappedTask : ERecordType::EndTask, SRecord{rf, *oTask.pDomain, oTask.id, __itt_null}); +#endif + } + + void Marker(const CTraceEventFormat::SRegularFields& rf, + const __itt_domain *pDomain, + __itt_id id, + __itt_string_handle *pName, + __itt_scope theScope) override { + const char* scope = GetScope(theScope); + WriteRecord(ERecordType::Marker, SRecord{rf, *pDomain, id, __itt_null, pName, nullptr, scope, strlen(scope)}); + } + + void Counter(const CTraceEventFormat::SRegularFields& rf, const __itt_domain *pDomain, const __itt_string_handle *pName, double value) override { + const char *pData = nullptr; + size_t length = 0; + if (g_bWithStacks) { + static thread_local TStack* pStack = nullptr; + if (!pStack) + pStack = (TStack*)malloc(sizeof(TStack)); // NOLINT + length = (GetStack(*pStack) - 3) * sizeof(void*); + pData = reinterpret_cast(&(*pStack)[3]); + } + WriteRecord(ERecordType::Counter, SRecord{rf, *pDomain, __itt_null, __itt_null, pName, &value, pData, length}); + } + + void SetThreadName(const CTraceEventFormat::SRegularFields& rf, const char* name) override { + WriteThreadName(rf, name); + } +}* g_pSEARecorder = IHandler::Register(true); + +IHandler& GetSEARecorder() { + return *g_pSEARecorder; +} + +namespace sea { + +bool WriteThreadName(const CTraceEventFormat::SRegularFields& rf, const char* name) { + CIttLocker lock; + if (g_savepath.empty()) return true; + std::string path = g_savepath + "/"; + path += std::to_string(rf.pid) + "," + std::to_string(rf.tid) + ".tid"; + int fd = open(path.c_str(), O_WRONLY|O_CREAT|O_EXCL, FilePermissions); + if (-1 == fd) return true; //file already exists, other thread was faster + int res = write(fd, name, (unsigned int)strlen(name)); + close(fd); + return res != -1; +} + +bool WriteGroupName(int64_t pid, const char* name) { + if (g_savepath.empty()) return true; + std::string path = g_savepath + "/"; + path += std::to_string(pid) + ".pid"; + int fd = open(path.c_str(), O_WRONLY | O_CREAT | O_EXCL, FilePermissions); + if (-1 == fd) return true; //file already exists, other thread was faster + int res = write(fd, name, (unsigned int)strlen(name)); + close(fd); + return res != -1; +} + +bool ReportString(__itt_string_handle* pStr) { + if (g_savepath.empty()) return true; + std::string path = g_savepath + "/"; + path += std::to_string((uint64_t)pStr) + ".str"; + int fd = open(path.c_str(), O_WRONLY|O_CREAT|O_EXCL, FilePermissions); + if (-1 == fd) return true; //file already exists, other thread was faster + int res = write(fd, pStr->strA, (unsigned int)strlen(pStr->strA)); + close(fd); + return res != -1; +} + +bool ReportModule(void* fn) { + if (g_savepath.empty()) + return true; + + SModuleInfo module_info = Fn2Mdl(fn); + + std::string path = GetDir(g_savepath) + std::to_string((uint64_t)module_info.base) + ".mdl"; + int fd = open(path.c_str(), O_WRONLY|O_CREAT|O_EXCL, FilePermissions); + if (-1 == fd) return true; //file already exists + std::string text = module_info.path + " " + std::to_string(module_info.size); + int res = write(fd, text.c_str(), (unsigned int)text.size()); + close(fd); + return res != -1; +} + +int g_jit_fd = 0; + +bool InitJit() { + std::string path = GetDir(g_savepath) + "/data.jit"; + g_jit_fd = open(path.c_str(), O_WRONLY | O_CREAT | O_EXCL, FilePermissions); + return -1 != g_jit_fd; +} + +bool WriteJit(const void* buff, size_t size) { + return -1 != write(g_jit_fd, buff, (unsigned int)size); +} + +int g_mem_fd = 0; + +bool InitMemStat() { + std::string path = GetDir(g_savepath) + "stat.mem"; + g_mem_fd = open(path.c_str(), O_WRONLY | O_CREAT | O_EXCL, FilePermissions); + return -1 != g_mem_fd; +} + +bool WriteMemStat(const void* buff, size_t size) { + if (g_mem_fd > -1) + return -1 != write(g_mem_fd, buff, (unsigned int)size); + else + return false; +} + +} //namespace sea diff --git a/thirdparty/itt_collector/sea_itt_lib/Recorder.h b/thirdparty/itt_collector/sea_itt_lib/Recorder.h new file mode 100644 index 00000000000000..674325efde140f --- /dev/null +++ b/thirdparty/itt_collector/sea_itt_lib/Recorder.h @@ -0,0 +1,155 @@ +/********************************************************************************************************************************************************************************************************************************************************************************************* +# Intel® Single Event API +# +# This file is provided under the BSD 3-Clause license. +# Copyright (c) 2021, Intel Corporation +# All rights reserved. +# +# Redistribution and use in source and binary forms, with or without modification, are permitted provided that the following conditions are met: +# Redistributions of source code must retain the above copyright notice, this list of conditions and the following disclaimer. +# Redistributions in binary form must reproduce the above copyright notice, this list of conditions and the following disclaimer in the documentation and/or other materials provided with the distribution. +# Neither the name of the Intel Corporation nor the names of its contributors may be used to endorse or promote products derived from this software without specific prior written permission. +# +# THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. +# IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) +# HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. +# +**********************************************************************************************************************************************************************************************************************************************************************************************/ + +#pragma once + +//#define TURBO_MODE + +#ifdef _WIN32 + #include +#else + #include + #include + #include + #include + #include + #include +#endif +#include +#include "IttNotifyStdSrc.h" +#include "ittnotify.h" + +inline size_t GetMemPageSize() { +#ifdef _WIN32 + SYSTEM_INFO si = {}; + GetSystemInfo(&si); + return si.dwAllocationGranularity; +#else + return sysconf(_SC_PAGE_SIZE); +#endif +} + +class CMemMap { + CMemMap(const CMemMap&) = delete; + CMemMap& operator = (const CMemMap&) = delete; + +public: + CMemMap(const std::string& path, size_t size, size_t offset = 0); + + void* Remap(size_t size, size_t offset = 0); + + void* GetPtr() { + return m_pView; + } + size_t GetSize() { + return m_size; + } + + void Unmap(); + + bool Resize(size_t size); + + ~CMemMap(); + +protected: +#ifdef _WIN32 + HANDLE m_hFile = nullptr; + HANDLE m_hMapping = nullptr; +#else + int m_fdin = 0; +#endif + size_t m_size = 0; + void* m_pView = nullptr; +}; + +class CRecorder { + CRecorder(const CRecorder&) = delete; + CRecorder& operator = (const CRecorder&) = delete; + +public: + CRecorder(); + bool Init(const std::string& path, uint64_t time, void* pCut); + size_t CheckCapacity(size_t size); + void* Allocate(size_t size); + uint64_t GetCount() { return m_counter; } + uint64_t GetCreationTime() { return m_time; } + void Close(bool bSave); + inline bool SameCut(void* pCut) { return pCut == m_pCut; } + ~CRecorder(); + +protected: +#ifdef IN_MEMORY_RING + size_t m_nBufferSize = 1024 * 1024; + void* m_pAlloc = nullptr; + size_t m_nBackSize = 0; + void* m_pBackBuffer = nullptr; +#endif + std::string m_path; + + std::unique_ptr m_memmap; + size_t m_nWroteTotal = 0; + void* m_pCurPos = nullptr; + uint64_t m_time = 0; + uint64_t m_counter = 0; + void* m_pCut = nullptr; +}; + + +enum class ERecordType: uint8_t { + BeginTask, + EndTask, + BeginOverlappedTask, + EndOverlappedTask, + Metadata, + Marker, + Counter, + BeginFrame, + EndFrame, + ObjectNew, + ObjectSnapshot, + ObjectDelete, + Relation +}; + +struct SRecord { + const CTraceEventFormat::SRegularFields& rf; + const __itt_domain& domain; + const __itt_id& taskid; + const __itt_id& parentid; + const __itt_string_handle *pName; + double* pDelta; + const char *pData; + size_t length; + void* function; +}; +double* WriteRecord(ERecordType type, const SRecord& record); +void WriteMeta(const CTraceEventFormat::SRegularFields& main, __itt_string_handle* pKey, const char* name, double* pDelta = nullptr); + +namespace sea { + struct IHandler; + bool WriteThreadName(const CTraceEventFormat::SRegularFields& rf, const char* name); + bool WriteGroupName(int64_t pid, const char* name); + bool ReportString(__itt_string_handle* pStr); + bool ReportModule(void* fn); + bool InitJit(); + bool WriteJit(const void* buff, size_t size); + bool InitMemStat(); + bool WriteMemStat(const void* buff, size_t size); +} // namespace sea + +sea::IHandler& GetSEARecorder(); diff --git a/thirdparty/itt_collector/sea_itt_lib/TraceEventFormat.h b/thirdparty/itt_collector/sea_itt_lib/TraceEventFormat.h new file mode 100644 index 00000000000000..48735a803f0bb7 --- /dev/null +++ b/thirdparty/itt_collector/sea_itt_lib/TraceEventFormat.h @@ -0,0 +1,153 @@ +/********************************************************************************************************************************************************************************************************************************************************************************************* +# Intel® Single Event API +# +# This file is provided under the BSD 3-Clause license. +# Copyright (c) 2021, Intel Corporation +# All rights reserved. +# +# Redistribution and use in source and binary forms, with or without modification, are permitted provided that the following conditions are met: +# Redistributions of source code must retain the above copyright notice, this list of conditions and the following disclaimer. +# Redistributions in binary form must reproduce the above copyright notice, this list of conditions and the following disclaimer in the documentation and/or other materials provided with the distribution. +# Neither the name of the Intel Corporation nor the names of its contributors may be used to endorse or promote products derived from this software without specific prior written permission. +# +# THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. +# IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) +# HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. +# +**********************************************************************************************************************************************************************************************************************************************************************************************/ + +#pragma once + +#include "Utils.h" + +#ifndef _WIN32 + #include + #include +#endif + +#ifdef _WIN32 + static const int64_t g_PID = (int64_t)GetCurrentProcessId(); +#else + static const int64_t g_PID = (int64_t)getpid(); + #if defined(__APPLE__) + inline int64_t GetTidFromPThread() { + uint64_t tid64 = 0; + pthread_threadid_np(NULL, &tid64); + return (int64_t)tid64; + } + #endif +#endif + +// https://github.com/google/trace-viewer +// For ETW see here: +// http://git.chromium.org/gitweb/?p=chromium/src.git;a=commitdiff;h=41fabf8e2dd3a847cbdad05da9b43fd9a99d741a +// (content/browser/tracing/etw_system_event_consumer_win.cc) +// parser source: https://github.com/google/trace-viewer/blob/49d0dd94c3925c3721d059ad3ee2db51d176248c/trace_viewer/extras/importer/trace_event_importer.html +class CTraceEventFormat { +public: + struct SRegularFields { + int64_t pid; + int64_t tid; + uint64_t nanoseconds; + enum EChanged { + ecNothing = 0x0, + ecPid = 0x1, + ecTid = 0x2, + ecTime = 0x4, + }; + uint64_t changed; + }; + + enum EventPhase { + Begin = 'B', //name, pid, tid, ts + End = 'E', //name, pid, tid, ts + Complete = 'X', //name, pid, tid, ts, dur + Instant = 'i', //name, pid, tid, ts, s = (g, p, t) //vertical line + Counter = 'C', //name, pid, tid, ts //"args": {"cats": 0, "dogs": 7} + AsyncBegin = 'b', //name, pid, tid, ts, id + AsyncInstant = 'n', //name, pid, tid, ts, id + AsyncEnd = 'e', //name, pid, tid, ts, id + //'S', 'T', 'F', + //'s', 't', 'f', //Flow events, with arrows: cool but unclear + FlowStart = 's', + FlowInstant = 't', + FlowFinish = 'f', + Metadata = 'M', + Sample = 'P', //pid, tid, ts + ObjectNew = 'N', //name, pid, tid, ts, id but no args! + ObjectDelete = 'D', //name, pid, tid, ts, id but no args! + ObjectSnapshot = 'O', //name, pid, tid, ts, id, can have args! See snapshot.basetype for deeper. + }; + + static uint64_t GetTimeNS() { +#ifdef _WIN32 + return SHiResClock::now64(); //in nanoseconds +#elif defined(__linux__) + static struct timespec res = {}; + if (!res.tv_nsec && !res.tv_sec) { + clock_getres(CLOCK_MONOTONIC_RAW, &res); + if (!res.tv_nsec && !res.tv_sec) { + VerbosePrint("Can't get CLOCK_MONOTONIC_RAW\n"); + return 0; + } + } + struct timespec ts = {}; + clock_gettime(CLOCK_MONOTONIC_RAW, &ts); + return uint64_t((1000000000. * ts.tv_sec + ts.tv_nsec) / (1000000000. * res.tv_sec + res.tv_nsec)); +#else // FIXME: use mach_absolute_time for APPLE + using namespace std::chrono; + return (uint64_t)duration_cast(SHiResClock::now().time_since_epoch()).count(); +#endif + } + + static SRegularFields GetRegularFields() { + return SRegularFields{ + #if defined(_WIN32) + g_PID, (int64_t)GetCurrentThreadId(), + #elif defined(__linux__) + g_PID, (int64_t)syscall(SYS_gettid), + #elif defined(__APPLE__) + g_PID, GetTidFromPThread(), + #else + g_PID, (int64_t)syscall(SYS_thread_selfid), + #endif + GetTimeNS() + }; + } + + class CArgs { + protected: + typedef std::map TMap; + TMap m_args; + + public: + CArgs() {} + template + CArgs(const std::string& name, const T& value) { + Add(name, value); + } + CArgs& Add(const std::string& name, const char* value) { + m_args[name] = value ? value : ""; + return *this; + } + template + CArgs& Add(const std::string& name, const T& value) { + m_args[name] = std::to_string(value); + return *this; + } + operator bool() const { return !m_args.empty(); } + + + std::string Str() const { + std::string res; + for (const auto& pair : m_args) { + if (!res.empty()) + res += ";"; + res += pair.first + "=" + pair.second; + } + return res; + } + const TMap& GetMap() const {return m_args;} + }; +}; + diff --git a/thirdparty/itt_collector/sea_itt_lib/Utils.cpp b/thirdparty/itt_collector/sea_itt_lib/Utils.cpp new file mode 100644 index 00000000000000..f8504ae553fd89 --- /dev/null +++ b/thirdparty/itt_collector/sea_itt_lib/Utils.cpp @@ -0,0 +1,229 @@ +/********************************************************************************************************************************************************************************************************************************************************************************************* +# Intel(R) Single Event API +# +# This file is provided under the BSD 3-Clause license. +# Copyright (c) 2021, Intel Corporation +# All rights reserved. +# +# Redistribution and use in source and binary forms, with or without modification, are permitted provided that the following conditions are met: +# Redistributions of source code must retain the above copyright notice, this list of conditions and the following disclaimer. +# Redistributions in binary form must reproduce the above copyright notice, this list of conditions and the following disclaimer in the documentation and/or other materials provided with the distribution. +# Neither the name of the Intel Corporation nor the names of its contributors may be used to endorse or promote products derived from this software without specific prior written permission. +# +# THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. +# IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) +# HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. +# +**********************************************************************************************************************************************************************************************************************************************************************************************/ + +#include "Utils.h" +#include "IttNotifyStdSrc.h" +#include + +#ifdef _WIN32 + #include + #undef API_VERSION + #include +#else + #include + #include + + #include +#endif + +#ifdef __APPLE__ + #include +#endif + +#if defined(ARM32) + #define NO_DL_ITERATE_PHDR +#endif + +#if !defined(NO_DL_ITERATE_PHDR) && defined(__linux__) + #ifndef _GNU_SOURCE + #define _GNU_SOURCE + #endif + #include +#endif + + +size_t GetStack(TStack& stack) { +#ifdef _WIN32 + typedef USHORT (WINAPI *FCaptureStackBackTrace)(__in ULONG, __in ULONG, __out PVOID*, __out_opt PULONG); + static FCaptureStackBackTrace CaptureStackBackTrace = (FCaptureStackBackTrace)(GetProcAddress(LoadLibraryA("kernel32.dll"), "RtlCaptureStackBackTrace")); + return CaptureStackBackTrace ? CaptureStackBackTrace(0, StackSize, stack, NULL) : 0; +#else + return backtrace(stack, StackSize); +#endif +} + +std::string GetStackString() { +#ifdef _WIN32 + return std::string(); +#else + TStack stack = {}; + size_t size = GetStack(stack); + + char **bt_syms = backtrace_symbols(stack, size); + if (!bt_syms) + return std::string(); + std::string res; + for (int i = 2; i < size; i++) { + if (res.size()) + res += "<-"; + res += bt_syms[i]; + } + + free(bt_syms); + return res; +#endif +} + +namespace sea { + +#ifdef _WIN32 +const char* GetProcessName(bool bFullPath) { + assert(bFullPath); + static char process_name[1024] = {}; + if (!process_name[0]) + GetModuleFileNameA(NULL, process_name, sizeof(process_name) - 1); + return process_name; +} + +SModuleInfo Fn2Mdl(void* fn) { + HMODULE hModule = NULL; + GetModuleHandleEx(GET_MODULE_HANDLE_EX_FLAG_FROM_ADDRESS, (LPCTSTR)fn, &hModule); + char filename[1024] = {}; + GetModuleFileNameA(hModule, filename, sizeof(filename) - 1); + MODULEINFO mi = {}; + GetModuleInformation(GetCurrentProcess(), hModule, &mi, sizeof(MODULEINFO)); + return SModuleInfo{hModule, mi.SizeOfImage, filename}; +} + +LONG WINAPI CreateMiniDump(EXCEPTION_POINTERS* pep) { + typedef BOOL(WINAPI *PDUMPFN)( + HANDLE hProcess, + DWORD ProcessId, + HANDLE hFile, + MINIDUMP_TYPE DumpType, + PMINIDUMP_EXCEPTION_INFORMATION ExceptionParam, + PMINIDUMP_USER_STREAM_INFORMATION UserStreamParam, + PMINIDUMP_CALLBACK_INFORMATION CallbackParam); + + PDUMPFN fnMiniDumpWriteDump = (PDUMPFN)GetProcAddress(::LoadLibraryA("DbgHelp.dll"), "MiniDumpWriteDump"); + if (!fnMiniDumpWriteDump) return EXCEPTION_EXECUTE_HANDLER; + std::string path = g_savepath.empty() ? "c:/temp" : g_savepath; + path += "/isea_minidump.dmp"; + HANDLE hFile = CreateFileA(path.c_str(), GENERIC_READ | GENERIC_WRITE, 0, NULL, CREATE_ALWAYS, FILE_ATTRIBUTE_NORMAL, NULL); + if (!hFile || INVALID_HANDLE_VALUE == hFile) return EXCEPTION_EXECUTE_HANDLER; + + MINIDUMP_EXCEPTION_INFORMATION mdei = {}; + mdei.ThreadId = GetCurrentThreadId(); + mdei.ExceptionPointers = pep; + mdei.ClientPointers = TRUE; + + + fnMiniDumpWriteDump(GetCurrentProcess(), GetCurrentProcessId(), hFile, MiniDumpNormal, (pep != 0) ? &mdei : 0, 0, 0); + CloseHandle(hFile); + + return EXCEPTION_EXECUTE_HANDLER; +} + +void SetGlobalCrashHandler() { + ::SetUnhandledExceptionFilter(CreateMiniDump); +} + +#else + +void SetGlobalCrashHandler() { + //FIXME: implement +} + +#include + +size_t GetFileSize(const char *path) { + struct stat st = {}; + + if (0 == stat(path, &st)) + return st.st_size; + + return -1; +} + +#ifndef __APPLE__ + +#if !defined(NO_DL_ITERATE_PHDR) +int iterate_callback(struct dl_phdr_info *info, size_t size, void *data) { + Dl_info* pInfo = reinterpret_cast(data); + VerbosePrint("iterate_callback: %lx, %s\n", (long int)info->dlpi_addr, info->dlpi_name); + if (reinterpret_cast(info->dlpi_addr) == pInfo->dli_fbase) + pInfo->dli_fname = strdup(info->dlpi_name); + return 0; +} +#endif + +bool proc_self_map(Dl_info& info) { + char base[100] = {}; + snprintf(base, sizeof(base), "%lx", (long int)info.dli_fbase); + VerbosePrint("Base: %s\n", base); + std::ifstream input("/proc/self/maps"); + std::string line; + while (std::getline(input, line)) { + VerbosePrint("/proc/self/maps: %s\n", line.c_str()); + if (0 == line.find(base)) { + size_t pos = line.rfind(' '); + info.dli_fname = strdup(line.substr(pos + 1).c_str()); + return true; + } + } + return false; +} +#endif + +sea::SModuleInfo Fn2Mdl(void* fn) { + Dl_info dl_info = {}; + dladdr(fn, &dl_info); + VerbosePrint("Fn2Mdl: %p, %s\n", dl_info.dli_fbase, dl_info.dli_fname); + if (!dl_info.dli_fname || !strstr(dl_info.dli_fname, ".so")) { +#ifndef __APPLE__ + #if !defined(NO_DL_ITERATE_PHDR) + dl_iterate_phdr(iterate_callback, &dl_info); + #endif + if (!dl_info.dli_fname || !strstr(dl_info.dli_fname, ".so")) + proc_self_map(dl_info); +#endif + return SModuleInfo{dl_info.dli_fbase, 0, dl_info.dli_fname}; + } + + if (dl_info.dli_fname[0] == '/') { + // path is absolute + return SModuleInfo{dl_info.dli_fbase, GetFileSize(dl_info.dli_fname), dl_info.dli_fname}; + } else { + if (const char * absolute = realpath(dl_info.dli_fname, nullptr)) { + SModuleInfo mdlInfo{dl_info.dli_fbase, GetFileSize(absolute), absolute}; + free((void*) absolute); // NOLINT + return mdlInfo; + } else { + return SModuleInfo{dl_info.dli_fbase, GetFileSize(dl_info.dli_fname), dl_info.dli_fname}; + } + } +} + +const char* GetProcessName(bool bFullPath) { + static char process_name[1024] = {}; +#ifdef __APPLE__ + uint32_t size = 1023; + _NSGetExecutablePath(process_name, &size); +#else + if (!process_name[0]) + process_name[readlink("/proc/self/exe", process_name, sizeof(process_name)/sizeof(process_name[0]) - 1 )] = 0; +#endif //__APPLE__ + if (bFullPath) return process_name; + return strrchr(process_name, '/') + 1; +} + +#endif + +} //namespace sea + + diff --git a/thirdparty/itt_collector/sea_itt_lib/Utils.h b/thirdparty/itt_collector/sea_itt_lib/Utils.h new file mode 100644 index 00000000000000..ee7dd2b427f511 --- /dev/null +++ b/thirdparty/itt_collector/sea_itt_lib/Utils.h @@ -0,0 +1,215 @@ +/********************************************************************************************************************************************************************************************************************************************************************************************* +# Intel(R) Single Event API +# +# This file is provided under the BSD 3-Clause license. +# Copyright (c) 2021, Intel Corporation +# All rights reserved. +# +# Redistribution and use in source and binary forms, with or without modification, are permitted provided that the following conditions are met: +# Redistributions of source code must retain the above copyright notice, this list of conditions and the following disclaimer. +# Redistributions in binary form must reproduce the above copyright notice, this list of conditions and the following disclaimer in the documentation and/or other materials provided with the distribution. +# Neither the name of the Intel Corporation nor the names of its contributors may be used to endorse or promote products derived from this software without specific prior written permission. +# +# THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. +# IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) +# HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. +# +**********************************************************************************************************************************************************************************************************************************************************************************************/ + +#pragma once +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include + +#if defined(__arm__) && !defined(__aarch64__) + #define ARM32 +#endif + +#ifdef _WIN32 + #include +#else + #include + #include + #include +#endif + +static std::string get_environ_value(const std::string& name) { +#ifdef _WIN32 + size_t sz; + char *v = NULL; + _dupenv_s(&v, &sz, name.c_str()); + + std::string ret = v ? v : ""; + free(v); + + return ret; +#else + const char *v = getenv(name.c_str()); + return v ? v : ""; +#endif +} + +#ifdef _WIN32 + + //there is bug in VS2012 implementation: high_resolution_clock is in fact not high res... + struct SHiResClock { + typedef uint64_t rep; + typedef std::nano period; + typedef std::chrono::duration duration; + typedef std::chrono::time_point time_point; + static const bool is_steady = true; + static uint64_t now64() { + static long long frequency = 0; + if (!frequency) { + QueryPerformanceFrequency(reinterpret_cast(&frequency)); + } + + LARGE_INTEGER count = {}; + QueryPerformanceCounter(&count); + return static_cast(static_cast(count.QuadPart) / frequency * static_cast(period::den)); + } + static time_point now() { + return time_point(duration(now64())); + } + }; + + namespace sea { + inline uint64_t GetTime() { + LARGE_INTEGER count = {}; + QueryPerformanceCounter(&count); + return count.QuadPart; + } + inline uint64_t GetTimeFreq() { + static LARGE_INTEGER frequency = {}; + if (!frequency.QuadPart) { + QueryPerformanceFrequency(&frequency); + } + return frequency.QuadPart; + } + } // namespace sea + +#else + + typedef std::chrono::high_resolution_clock SHiResClock; + namespace sea { + using namespace std::chrono; + inline uint64_t GetTime() { + return (uint64_t)duration_cast(SHiResClock::now().time_since_epoch()).count(); + } + inline uint64_t GetTimeFreq() { + /* + TODO: + struct timespec res = {}; + clock_getres(CLOCK_MONOTONIC_RAW, &res); + uint64_t freq = 1000000000ULL * (uint64_t)res.tv_sec + (uint64_t)res.tv_nsec; + */ + static uint64_t freq = SHiResClock::period::num / SHiResClock::period::den; + return freq; + } + } // namespace sea +#endif + +#ifdef _MSC_VER //std::mutex won't work in static constructors due to MS bug + class CCriticalSection { + CRITICAL_SECTION m_cs; + public: + CCriticalSection() { + InitializeCriticalSection(&m_cs); + } + void lock() { + EnterCriticalSection(&m_cs); + } + void unlock() { + LeaveCriticalSection(&m_cs); + } + ~CCriticalSection() { + DeleteCriticalSection(&m_cs); + } + }; + typedef CCriticalSection TCritSec; +#else + typedef std::recursive_mutex TCritSec; +#endif + +#ifdef _MSC_VER + #define thread_local __declspec(thread) +#else + #define thread_local __thread +#endif + +template +class CPlacementPool { + static CPlacementPool& GetPool() { + static thread_local CPlacementPool* pPool = nullptr; + if (!pPool) + pPool = new CPlacementPool; + return *pPool; + } + + void* AllocMem() { + if (m_free.size()) { + void* ptr = m_free.back(); + m_free.pop_back(); + return ptr; + } + return malloc(size); + } + + void FreeMem(void* ptr) { + m_free.push_back(ptr); + } + + std::vector m_free; + +public: + static void* Alloc() { + return GetPool().AllocMem(); + } + + template + static void Free(T* ptr) { + if (!ptr) return; + ptr->~T(); + return GetPool().FreeMem(ptr); + } + ~CPlacementPool() { + for (void* ptr : m_free) { + free(ptr); + } + } +}; + +#define placement_new(T) new (CPlacementPool::Alloc()) T +template +inline void placement_free(T* ptr) { + CPlacementPool::Free(ptr); +} + +class CScope { +protected: + std::function m_fn; +public: + CScope(const std::function& fn) + : m_fn(fn) + {} + ~CScope() { + m_fn(); + } +}; + +const size_t StackSize = 100; +using TStack = void*[StackSize]; +size_t GetStack(TStack& stack); +std::string GetStackString(); + +namespace sea { + void SetGlobalCrashHandler(); +} diff --git a/thirdparty/itt_collector/sea_itt_lib/sea_itt_lib.cpp b/thirdparty/itt_collector/sea_itt_lib/sea_itt_lib.cpp new file mode 100644 index 00000000000000..60a90747b6bbc0 --- /dev/null +++ b/thirdparty/itt_collector/sea_itt_lib/sea_itt_lib.cpp @@ -0,0 +1,373 @@ +/********************************************************************************************************************************************************************************************************************************************************************************************* +# Intel® Single Event API +# +# This file is provided under the BSD 3-Clause license. +# Copyright (c) 2021, Intel Corporation +# All rights reserved. +# +# Redistribution and use in source and binary forms, with or without modification, are permitted provided that the following conditions are met: +# Redistributions of source code must retain the above copyright notice, this list of conditions and the following disclaimer. +# Redistributions in binary form must reproduce the above copyright notice, this list of conditions and the following disclaimer in the documentation and/or other materials provided with the distribution. +# Neither the name of the Intel Corporation nor the names of its contributors may be used to endorse or promote products derived from this software without specific prior written permission. +# +# THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. +# IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) +# HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. +# +**********************************************************************************************************************************************************************************************************************************************************************************************/ + +#include "IttNotifyStdSrc.h" +#include "Utils.h" +#include +#include +#include +#include "jitprofiling.h" + +#define INTEL_LIBITTNOTIFY "INTEL_LIBITTNOTIFY" +#define INTEL_JIT_PROFILER "INTEL_JIT_PROFILER" + +#ifdef _WIN32 + #define setenv _putenv + #include + #include "IntelSEAPI.h" + #undef API_VERSION + #include + #pragma comment(lib, "dbghelp") +#else + #define setenv putenv + #define _strdup strdup +#endif + +#if (INTPTR_MAX == INT32_MAX) + #define BIT_SUFFIX "32" +#elif INTPTR_MAX == INT64_MAX + #define BIT_SUFFIX "64" +#else + #error "Environment not 32 or 64-bit!" +#endif + +int GlobalInit() { + static const char var_name[] = INTEL_LIBITTNOTIFY BIT_SUFFIX; + static const char jit_var_name[] = INTEL_JIT_PROFILER BIT_SUFFIX; + sea::SModuleInfo mdlinfo = sea::Fn2Mdl((void*)GlobalInit); // NOLINT + + VerbosePrint("IntelSEAPI: %s=%s | Loaded from: %s\n", var_name, get_environ_value(var_name).c_str(), mdlinfo.path.c_str()); + + std::string value = var_name; + value += "="; + value += mdlinfo.path; + std::string jit_val = jit_var_name; + jit_val += "=" + mdlinfo.path; + + setenv(_strdup(value.c_str())); + VerbosePrint("IntelSEAPI: setting %s\n", value.c_str()); + setenv(_strdup(jit_val.c_str())); + VerbosePrint("IntelSEAPI: setting %s\n", jit_val.c_str()); + return 1; +} + +int nSetLib = GlobalInit(); + +void AtExit(); + +extern "C" { + extern __itt_global ITT_JOIN(INTEL_ITTNOTIFY_PREFIX, _ittapi_global); +} + +bool g_bInitialized = false; + +__itt_global* GetITTGlobal() { + return &ITT_JOIN(INTEL_ITTNOTIFY_PREFIX, _ittapi_global); +} + +void ChainGlobal(__itt_global* pNew) { + __itt_global* pCurrent = GetITTGlobal(); + while (pCurrent->next) { + if (pCurrent->next == pNew) //already chained + return; + pCurrent = pCurrent->next; + } + pCurrent->next = pNew; +} + +void UnchainGlobal(__itt_global* pOld) { + __itt_global* pCurrent = GetITTGlobal(); + while (pCurrent->next) { + if (pCurrent->next == pOld) { + pCurrent->next = pOld->next; //removing it from list + return; + } + pCurrent = pCurrent->next; + } +} + + + +#ifdef _WIN32 + #include + + #define FIX_STR(type, ptr, name) \ + if (!ptr->name##A) { \ + if (ptr->name##W) { \ + size_t len = lstrlenW((const wchar_t*)ptr->name##W); \ + char* dest = reinterpret_cast(malloc(len + 2)); \ + wcstombs_s(&len, dest, len + 1, (const wchar_t*)ptr->name##W, len + 1); \ + const_cast(ptr)->name##A = dest; \ + } else { \ + const_cast(ptr)->name##A = _strdup("null"); \ + } \ + } + +#else + #define FIX_STR(type, ptr, name) \ + if (!ptr->name##A) { \ + if (ptr->name##W) { \ + size_t len = wcslen((const wchar_t*)ptr->name##W); \ + char* dest = reinterpret_cast(malloc(len + 2)); \ + wcstombs(dest, (const wchar_t*)ptr->name##W, len + 1); \ + const_cast(ptr)->name##A = dest; \ + } else { \ + const_cast(ptr)->name##A = _strdup("null"); \ + } \ + } +#endif + +#define FIX_DOMAIN(ptr) FIX_STR(__itt_domain, ptr, name) +#define FIX_STRING(ptr) FIX_STR(__itt_string_handle, ptr, str) +#define FIX_COUNTER(ptr)\ + FIX_STR(__itt_counter_info_t, ptr, name);\ + FIX_STR(__itt_counter_info_t, ptr, domain);\ + sea::FixCounter(ptr); + + +void __itt_report_error(__itt_error_code, ...) {} + + +__itt_domain* g_pIntelSEAPIDomain = nullptr; + +extern "C" { + + SEA_EXPORT void ITTAPI __itt_api_init(__itt_global* pGlob, __itt_group_id id) { + if (!g_bInitialized) { + g_bInitialized = true; + sea::SetGlobalCrashHandler(); + + __itt_global* pGlobal = GetITTGlobal(); + __itt_mutex_init(&pGlobal->mutex); + pGlobal->mutex_initialized = 1; + sea::CIttLocker locker; + using namespace sea; + g_pIntelSEAPIDomain = UNICODE_AGNOSTIC(domain_create)("IntelSEAPI"); + __itt_api_init(pGlobal, id); + pGlobal->api_initialized = 1; + } + const char* procname = sea::GetProcessName(true); + sea::SModuleInfo mdlinfo = sea::Fn2Mdl(pGlob); + VerbosePrint("IntelSEAPI init is called from process '%s' at module '%s'\n", procname, mdlinfo.path.c_str()); + if (GetITTGlobal() != pGlob) + ChainGlobal(pGlob); + sea::FillApiList(pGlob->api_list_ptr); + for (___itt_domain* pDomain = pGlob->domain_list; pDomain; pDomain = pDomain->next) { + FIX_DOMAIN(pDomain); + sea::InitDomain(pDomain); + } + for (__itt_string_handle* pStr = pGlob->string_list; pStr; pStr = pStr->next) { + FIX_STRING(pStr); + sea::ReportString(const_cast<__itt_string_handle *>(pStr)); + } + // counter_list was not yet invented that time + if (pGlob->version_build > 20120000) { + for (__itt_counter_info_t* pCounter = pGlob->counter_list; pCounter; pCounter = pCounter->next) { + FIX_COUNTER(pCounter); + VerbosePrint("Fixed counter: %s | %s\n", pCounter->domainA, pCounter->nameA); + } + } + sea::ReportModule(pGlob); + static bool bInitialized = false; + if (!bInitialized) { + bInitialized = true; + sea::InitSEA(); +#ifdef _WIN32 + EventRegisterIntelSEAPI(); +#endif + atexit(AtExit); + } + } + + SEA_EXPORT void ITTAPI __itt_api_fini(__itt_global* pGlob) { + if (pGlob) { + UnchainGlobal(pGlob); + return; + } + + if (!g_bInitialized) return; + g_bInitialized = false; + + sea::FinitaLaComedia(); +#ifdef _WIN32 + EventUnregisterIntelSEAPI(); +#endif + } + +} + +void AtExit() { + __itt_api_fini(nullptr); +} + +extern "C" { +#ifdef STANDARD_SOURCES + typedef bool(*receive_t)(uint64_t receiver, uint64_t time, uint16_t count, const stdsrc::uchar_t** names, const stdsrc::uchar_t** values, double progress); + typedef uint64_t(*get_receiver_t)(const stdsrc::uchar_t* provider, const stdsrc::uchar_t* opcode, const stdsrc::uchar_t* taskName); + + SEA_EXPORT bool parse_standard_source(const char* file, get_receiver_t get_receiver, receive_t receive) { + STDSRC_CHECK_RET(file, false); + class Receiver : public stdsrc::Receiver { + protected: + uint64_t m_receiver = 0; + receive_t m_receive = nullptr; + stdsrc::Reader& m_reader; + + public: + Receiver(stdsrc::Reader& reader, uint64_t receiver, receive_t receive) + : m_receiver(receiver) + , m_reader(reader) + , m_receive(receive) {} + + virtual bool onEvent(uint64_t time, const stdsrc::CVariantTree& props) { + size_t size = props.get_bags().size(); + std::vector names(size), values(size); + std::vector values_temp(size); + names.reserve(size); + values.reserve(size); + size_t i = 0; + for (const auto& pair : props.get_bags()) { + const stdsrc::CVariantTree& prop = pair.second; + const stdsrc::CVariant& name = prop.get_variant(stdsrc::bagname::Name); + names[i] = name.is_empty() ? nullptr : name.get().c_str(); + const stdsrc::CVariant& value = prop.get_variant(stdsrc::bagname::Value); + values[i] = value.is_empty() ? nullptr : value.as_str(values_temp[i]).c_str(); + ++i; + } + return m_receive(m_receiver, time, (uint16_t)size, size ? &names[0] : nullptr, size ? &values[0] : nullptr, m_reader.getProgress()); + } + }; + + class Reader : public stdsrc::Reader { + get_receiver_t m_get_receiver = nullptr; + receive_t m_receive = nullptr; + public: + Reader(get_receiver_t get_receiver, receive_t receive) + : m_get_receiver(get_receiver) + , m_receive(receive) {} + virtual stdsrc::Receiver::Ptr getReceiver(const stdsrc::ustring& provider, const stdsrc::ustring& opcode, const stdsrc::ustring& taskName, + stdsrc::CVariantTree& props) { + uint64_t receiver = m_get_receiver(provider.c_str(), opcode.c_str(), taskName.c_str()); + if (!receiver) return nullptr; + return std::make_shared(*this, receiver, m_receive); + } + }; + Reader reader(get_receiver, receive); + std::string path(file); +#ifdef _WIN32 + if (path.substr(path.size() - 4) == ".etl") + return stdsrc::readETLFile(reader, file, stdsrc::etuRaw); +#endif + return false; + } +#endif + +#ifdef _WIN32 + SEA_EXPORT const char* resolve_pointer(const char* szModulePath, uint64_t addr) { + static std::string res; + res.clear(); + static HANDLE hCurProc = GetCurrentProcess(); + DWORD dwOptions = SymSetOptions((SymGetOptions() | + SYMOPT_LOAD_LINES | + SYMOPT_UNDNAME | + SYMOPT_INCLUDE_32BIT_MODULES | + SYMOPT_ALLOW_ABSOLUTE_SYMBOLS) & ~SYMOPT_DEFERRED_LOADS); + static BOOL bInitialize = SymInitialize(hCurProc, NULL, TRUE); + if (!bInitialize) return nullptr; + static std::map modules; + uint64_t module = 0; + if (modules.count(szModulePath)) { + module = modules[szModulePath]; + } else { + module = SymLoadModule64(hCurProc, NULL, szModulePath, NULL, 0x800000, 0); + modules[szModulePath] = module; + } + if (!module) return nullptr; + IMAGEHLP_LINE64 line = { sizeof(IMAGEHLP_LINE64) }; + DWORD dwDisplacement = 0; + SymGetLineFromAddr64(hCurProc, module + addr, &dwDisplacement, &line); + if (line.FileName) { + res += std::string(line.FileName) + "(" + std::to_string(line.LineNumber) + ")\n"; + } + + char buff[sizeof(SYMBOL_INFO) + 1024] = {}; + SYMBOL_INFO * symbol = (SYMBOL_INFO*)buff; // NOLINT + symbol->MaxNameLen = 255; + symbol->SizeOfStruct = sizeof(SYMBOL_INFO); + SymFromAddr(hCurProc, module + addr, nullptr, symbol); + res += symbol->Name; + return res.c_str(); + } + + SEA_EXPORT int NotifyEvent(iJIT_JVM_EVENT event_type, void *EventSpecificData) { + iJIT_Method_Load* methodData = (iJIT_Method_Load*)EventSpecificData; // NOLINT + + switch (event_type) { + case iJVM_EVENT_TYPE_METHOD_LOAD_FINISHED: + { + sea::WriteJit(&(uint32_t)methodData->method_id, sizeof(uint32_t)); // NOLINT + sea::WriteJit(&methodData->method_load_address, sizeof(void*)); // NOLINT + sea::WriteJit(&(uint32_t)methodData->method_size, sizeof(uint32_t)); // NOLINT + sea::WriteJit(&(uint32_t)methodData->line_number_size, sizeof(uint32_t)); // NOLINT + for (unsigned int i = 0; i < methodData->line_number_size; ++i) { + const LineNumberInfo& lni = methodData->line_number_table[i]; + sea::WriteJit(&(uint32_t)lni.Offset, sizeof(uint32_t)); // NOLINT + sea::WriteJit(&(uint32_t)lni.LineNumber, sizeof(uint32_t)); // NOLINT + } + + const char * strings[] = { methodData->method_name, methodData->class_file_name, methodData->source_file_name }; + for (size_t i = 0; i < sizeof(strings) / sizeof(strings[0]); ++i) { + const char * str = strings[i] ? strings[i] : ""; + uint16_t len = (uint16_t)strlen(str); + sea::WriteJit(&len, sizeof(len)); + sea::WriteJit(str, len); + } + break; + } + default: + break; + } + + return 0; + } + + SEA_EXPORT int Initialize() { + __itt_api_init(GetITTGlobal(), __itt_group_none); + sea::InitJit(); + + return 1; + } +#endif +} + +#if defined(STANDARD_SOURCES) && defined(_DEBUG) && 0 + +bool receive(uint64_t, uint64_t time, uint16_t count, const stdsrc::uchar_t** names, const stdsrc::uchar_t** values, double progress) { + return true; +} + +uint64_t get_receiver(const stdsrc::uchar_t* provider, const stdsrc::uchar_t* opcode, const stdsrc::uchar_t* taskName) { + return (uint64_t)&receive; +} + +void Test() { + parse_standard_source(R"(d:\temp\SteamVR\Merged.etl)", get_receiver, receive); +} + +#endif + diff --git a/thirdparty/ittapi/CMakeLists.txt b/thirdparty/ittapi/CMakeLists.txt new file mode 100644 index 00000000000000..f89bb0c0b1c3d6 --- /dev/null +++ b/thirdparty/ittapi/CMakeLists.txt @@ -0,0 +1,46 @@ +# ****************************************************************************** +# Copyright 2017-2021 Intel Corporation +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +# ****************************************************************************** + +if(ENABLE_PROFILING_ITT) + if(DEFINED INTEL_VTUNE_DIR OR DEFINED ENV{INTEL_VTUNE_DIR}) + find_package(ITT + PATHS "${CMAKE_CURRENT_SOURCE_DIR}/cmake" + NO_DEFAULT_PATH) + if(NOT ITT_FOUND) + message(WARNING "Profiling option enabled, but no ITT library was found under INTEL_VTUNE_DIR") + endif() + else() + include(FetchContent) + FetchContent_Declare( + ext_ittapi + GIT_REPOSITORY https://github.com/intel/ittapi.git + GIT_TAG v3.18.6 + ) + + FetchContent_GetProperties(ext_ittapi) + if(NOT ext_ittapi_POPULATED) + FetchContent_Populate(ext_ittapi) + add_subdirectory(${ext_ittapi_SOURCE_DIR} ${ext_ittapi_BINARY_DIR}) + endif() + + target_compile_definitions(ittnotify INTERFACE ENABLE_PROFILING_ITT) + if (UNIX) + target_compile_options(ittnotify PRIVATE -Wno-undef) + endif() + + openvino_developer_export_targets(COMPONENT openvino_common TARGETS ittnotify) + endif() +endif() diff --git a/tools/benchmark/README.md b/tools/benchmark/README.md index 0d9e62bc44a889..28fd4b70933e2f 100644 --- a/tools/benchmark/README.md +++ b/tools/benchmark/README.md @@ -146,7 +146,7 @@ If a model has only image input(s), please a provide folder with images or a pat If a model has some specific input(s) (not images), please prepare a binary file(s), which is filled with data of appropriate precision and provide a path to them as input. If a model has mixed input types, input folder should contain all required files. Image inputs are filled with image files one by one. Binary inputs are filled with binary inputs one by one. -To run the demo, you can use public or pre-trained models. To download the pre-trained models, use the OpenVINO [Model Downloader](https://github.com/opencv/open_model_zoo/tree/2018/model_downloader) or go to [https://download.01.org/opencv/](https://download.01.org/opencv/). +To run the tool, you can use [public](@ref omz_models_public_index) or [Intel's](@ref omz_models_intel_index) pre-trained models from the Open Model Zoo. The models can be downloaded using the [Model Downloader](@ref omz_tools_downloader_README). > **NOTE**: Before running the demo with a trained model, make sure the model is converted to the Inference Engine format (\*.xml + \*.bin) using the [Model Optimizer tool](./docs/MO_DG/Deep_Learning_Model_Optimizer_DevGuide.md).