Merge remote-tracking branch 'upstream/master' into rebase11

ceciliapeng2011 · May 31, 2021 · edd6027 · edd6027
2 parents 6c9e7a8 + 4918533
commit edd6027
Show file tree

Hide file tree

Showing 716 changed files with 16,168 additions and 6,235 deletions.
diff --git a/.ci/azure/linux.yml b/.ci/azure/linux.yml
@@ -107,6 +107,8 @@ jobs:
         -DENABLE_PYTHON=ON
         -DPYTHON_EXECUTABLE=/usr/bin/python3.6
         -DENABLE_TESTS=ON
+        -DNGRAPH_ONNX_IMPORT_ENABLE=ON
+        -DNGRAPH_ONNX_EDITOR_ENABLE=ON
         -DENABLE_FASTER_BUILD=ON
         -DIE_EXTRA_MODULES=$(OPENVINO_CONTRIB_REPO_DIR)/modules
         $(REPO_DIR)

diff --git a/.ci/azure/mac.yml b/.ci/azure/mac.yml
@@ -105,7 +105,7 @@ jobs:
     workingDirectory: $(BUILD_DIR)
     displayName: 'Install'
 
-  - script: $(BIN_DIR)/unit-test --gtest_print_time=1 --gtest_filter=-backend_api.config_unsupported:*IE_GPU*:IE_CPU.onnx_model_sigmoid:IE_CPU/GRUSequenceOp.onnx_model_gru*:IE_CPU.exp_* --gtest_output=xml:TEST-NGraphUT.xml
+  - script: $(BIN_DIR)/unit-test --gtest_print_time=1 --gtest_filter=-backend_api.config_unsupported:*IE_GPU*:IE_CPU.onnx_model_sigmoid:IE_CPU/GRUSequenceOp.onnx_model_gru* --gtest_output=xml:TEST-NGraphUT.xml
     displayName: 'nGraph UT'
     continueOnError: false
 

diff --git a/CMakeLists.txt b/CMakeLists.txt
@@ -114,6 +114,7 @@ function(build_ngraph)
     endif()
 
     ie_cpack_add_component(ngraph REQUIRED)
+    ie_cpack_add_component(ngraph_dev REQUIRED DEPENDS ngraph)
 
     set(SDL_cmake_included ON)
     add_subdirectory(ngraph)

diff --git a/Jenkinsfile b/Jenkinsfile
@@ -13,6 +13,7 @@ properties([
                name: 'library_version')
     ])
 ])
+
 loadOpenVinoLibrary {
     entrypoint(this)
 }
diff --git a/cmake/developer_package/IEDevScriptsConfig.cmake b/cmake/developer_package/IEDevScriptsConfig.cmake
@@ -53,9 +53,6 @@ function(set_temp_directory temp_variable source_tree_dir)
     if (DEFINED ENV{DL_SDK_TEMP} AND NOT $ENV{DL_SDK_TEMP} STREQUAL "")
         message(STATUS "DL_SDK_TEMP environment is set : $ENV{DL_SDK_TEMP}")
         file(TO_CMAKE_PATH $ENV{DL_SDK_TEMP} temp)
-        if (ENABLE_ALTERNATIVE_TEMP)
-            set(ALTERNATIVE_PATH ${source_tree_dir}/temp)
-        endif()
     else ()
         set(temp ${source_tree_dir}/temp)
     endif()

diff --git a/cmake/developer_package/add_ie_target.cmake b/cmake/developer_package/add_ie_target.cmake
@@ -31,6 +31,7 @@ addIeTarget(
 function(addIeTarget)
     set(options
         ADD_CPPLINT                   # Enables code style checks for the target
+        ADD_CLANG_FORMAT              # Enables code style checks for the target
         )
     set(oneValueRequiredArgs
         TYPE # type of target, SHARED|STATIC|EXECUTABLE. SHARED and STATIC correspond to add_library, EXECUTABLE to add_executable
@@ -119,6 +120,10 @@ function(addIeTarget)
         # code style
         add_cpplint_target(${ARG_NAME}_cpplint FOR_TARGETS ${ARG_NAME})
     endif()
+    if (ARG_ADD_CLANG_FORMAT)
+        # code style
+        add_clang_format_target(${ARG_NAME}_clang FOR_TARGETS ${ARG_NAME})
+    endif()
     if (ARG_DEVELOPER_PACKAGE)
         # developer package
         openvino_developer_export_targets(COMPONENT ${ARG_DEVELOPER_PACKAGE}
@@ -128,7 +133,6 @@ function(addIeTarget)
         # Provide default compile pdb name equal to target name
         set_target_properties(${ARG_NAME} PROPERTIES COMPILE_PDB_NAME ${ARG_NAME})
     endif()
-
 endfunction()
 
 #[[

diff --git a/cmake/developer_package/download/download_and_extract.cmake b/cmake/developer_package/download/download_and_extract.cmake
@@ -175,7 +175,7 @@ function (CheckOrDownloadAndExtract component RELATIVE_URL archive_name unpacked
     #path exists, so we would like to check what was unpacked version
     set (version_file ${unpacked_path}/ie_dependency.info)
 
-    if (NOT EXISTS ${version_file} AND NOT ${ENABLE_ALTERNATIVE_TEMP})
+    if (NOT EXISTS ${version_file})
       clean_message(FATAL_ERROR "error: Dependency doesn't contain version file. Please select actions: \n"
         "if you are not sure about your FS dependency - remove it : \n"
         "\trm -rf ${unpacked_path}\n"

diff --git a/cmake/developer_package/features.cmake b/cmake/developer_package/features.cmake
@@ -56,8 +56,6 @@ ie_option (VERBOSE_BUILD "shows extra information about build" OFF)
 
 ie_option (ENABLE_UNSAFE_LOCATIONS "skip check for MD5 for dependency" OFF)
 
-ie_option (ENABLE_ALTERNATIVE_TEMP "in case of dependency conflict, to avoid modification in master, use local copy of dependency" OFF)
-
 ie_dependent_option (ENABLE_FUZZING "instrument build for fuzzing" OFF "CMAKE_CXX_COMPILER_ID MATCHES ^(Apple)?Clang$; NOT WIN32" OFF)
 
 #

diff --git a/cmake/developer_package/plugins/plugins.cmake b/cmake/developer_package/plugins/plugins.cmake
@@ -27,7 +27,10 @@ endif()
 #               )
 #
 function(ie_add_plugin)
-    set(options SKIP_INSTALL)
+    set(options 
+        SKIP_INSTALL 
+        ADD_CLANG_FORMAT
+        )
     set(oneValueArgs NAME DEVICE_NAME VERSION_DEFINES_FOR)
     set(multiValueArgs SOURCES OBJECT_LIBRARIES CPPLINT_FILTERS)
     cmake_parse_arguments(IE_PLUGIN "${options}" "${oneValueArgs}" "${multiValueArgs}" ${ARGN})
@@ -73,7 +76,11 @@ function(ie_add_plugin)
         string(CONCAT custom_filter "${custom_filter}" "," "${filter}")
     endforeach()
 
-    add_cpplint_target(${IE_PLUGIN_NAME}_cpplint FOR_TARGETS ${IE_PLUGIN_NAME} CUSTOM_FILTERS ${custom_filter})
+    if (IE_PLUGIN_ADD_CLANG_FORMAT)
+        add_clang_format_target(${IE_PLUGIN_NAME}_clang FOR_TARGETS ${IE_PLUGIN_NAME})
+    else()
+        add_cpplint_target(${IE_PLUGIN_NAME}_cpplint FOR_TARGETS ${IE_PLUGIN_NAME} CUSTOM_FILTERS ${custom_filter})
+    endif()
 
     # check that plugin with such name is not registered
 

diff --git a/cmake/toolchains/ia32.linux.toolchain.cmake b/cmake/toolchains/ia32.linux.toolchain.cmake
@@ -19,7 +19,3 @@ endmacro()
 
 # need libusb 32-bits version
 _set_if_not_defined(ENABLE_VPU OFF)
-
-# fix conversion from uint64_t / int64_t to size_t
-_set_if_not_defined(NGRAPH_ONNX_IMPORT_ENABLE OFF)
-_set_if_not_defined(NGRAPH_ONNX_EDITOR_ENABLE OFF)
diff --git a/docs/.clang-format b/docs/.clang-format
@@ -0,0 +1,25 @@
+BasedOnStyle: Google
+IndentWidth: 4
+UseTab: Never
+
+Language: Cpp
+Standard: Cpp11
+
+AccessModifierOffset: -4
+AlignConsecutiveMacros: true
+AllowAllArgumentsOnNextLine: false
+AllowAllParametersOfDeclarationOnNextLine: false
+AllowShortFunctionsOnASingleLine: Empty
+AllowShortIfStatementsOnASingleLine: Never
+AllowShortLambdasOnASingleLine: Empty
+AllowShortLoopsOnASingleLine: false
+AlwaysBreakBeforeMultilineStrings: false
+ColumnLimit: 160
+# Specialize this comment pragma in order to avoid changes in SEA copyrights
+CommentPragmas: '^#'
+DerivePointerAlignment: false
+FixNamespaceComments: true
+IndentCaseLabels: false
+IndentPPDirectives: BeforeHash
+SpaceBeforeCpp11BracedList: true
+SpaceBeforeCtorInitializerColon: false
diff --git a/docs/IE_DG/supported_plugins/CL_DNN.md b/docs/IE_DG/supported_plugins/CL_DNN.md
@@ -117,6 +117,8 @@ When specifying key values as raw strings (that is, when using Python API), omit
 | `KEY_GPU_THROUGHPUT_STREAMS`  | `KEY_GPU_THROUGHPUT_AUTO`, or positive integer| 1 | Specifies a number of GPU "execution" streams for the throughput mode (upper bound for a number of inference requests that can be executed simultaneously).<br>This option is can be used to decrease GPU stall time by providing more effective load from several streams. Increasing the number of streams usually is more effective for smaller topologies or smaller input sizes. Note that your application should provide enough parallel slack (e.g. running many inference requests) to leverage full GPU bandwidth. Additional streams consume several times more GPU memory, so make sure the system has enough memory available to suit parallel stream execution. Multiple streams might also put additional load on CPU. If CPU load increases, it can be regulated by setting an appropriate `KEY_CLDNN_PLUGIN_THROTTLE` option value (see above). If your target system has relatively weak CPU, keep throttling low. <br>The default value is 1, which implies latency-oriented behavior.<br>`KEY_GPU_THROUGHPUT_AUTO` creates bare minimum of streams to improve the performance; this is the most portable option if you are not sure how many resources your target machine has (and what would be the optimal number of streams). <br> A positive integer value creates the requested number of streams. |
 | `KEY_EXCLUSIVE_ASYNC_REQUESTS` | `YES` / `NO`                | `NO`              | Forces async requests (also from different executable networks) to execute serially.|
 | `KEY_CLDNN_MAX_NUM_THREADS` | `integer value` | `maximum # of HW threads available in host environment` |  Specifies the number of CPU threads that can be used for clDNN engine, e.g, JIT compilation of clDNN kernels or clDNN cpu kernel processing. The default value is set as the number of maximum available threads in host environment to minimize the time for LoadNetwork, where the clDNN kernel build time occupies a large portion. Note that if the specified value is larger than the maximum available # of threads or less than zero, it is set as maximum available # of threads. It can be specified with a smaller number than the available HW threads according to the usage scenario, e.g., when the user wants to assign more CPU threads while clDNN plugin is running. Note that setting this value with lower number will affect not only the network loading time but also the cpu layers of clDNN networks that are optimized with multi-threading. |
+| `KEY_CLDNN_ENABLE_LOOP_UNROLLING` | `YES` / `NO`             | `YES`             | Enables recurrent layers such as TensorIterator or Loop with fixed iteration count to be unrolled. It is turned on by default. Turning this key on will achieve better inference performance for loops with not too many iteration counts (less than 16, as a rule of thumb). Turning this key off will achieve better performance for both graph loading time and inference time with many iteration counts (greater than 16). Note that turning this key on will increase the graph loading time in proportion to the iteration counts. Thus, this key should be turned off if graph loading time is considered to be most important target to optimize. |
+
 ## Note on Debug Capabilities of the GPU Plugin
 
 Inference Engine GPU plugin provides possibility to dump the user custom OpenCL&trade; kernels to a file to allow you to properly debug compilation issues in your custom kernels.

diff --git a/docs/IE_PLUGIN_DG/ExecutableNetwork.md b/docs/IE_PLUGIN_DG/ExecutableNetwork.md
@@ -4,8 +4,8 @@
 - Compile an InferenceEngine::ICNNNetwork instance to a backend specific graph representation
 - Create an arbitrary number of `InferRequest` objects
 - Hold some common resources shared between different instances of `InferRequest`. For example:
-	- InferenceEngine::ExecutableNetworkInternal::_taskExecutor task executor to implement asynchronous execution
-	- InferenceEngine::ExecutableNetworkInternal::_callbackExecutor task executor to run an asynchronous inference request callback in a separate thread
+	- InferenceEngine::IExecutableNetworkInternal::_taskExecutor task executor to implement asynchronous execution
+	- InferenceEngine::IExecutableNetworkInternal::_callbackExecutor task executor to run an asynchronous inference request callback in a separate thread
 
 `ExecutableNetwork` Class
 ------------------------

diff --git a/docs/IE_PLUGIN_DG/Plugin.md b/docs/IE_PLUGIN_DG/Plugin.md
@@ -21,7 +21,7 @@ To build an Inference Engine plugin with the Plugin API, see the [Inference Engi
 Plugin Class
 ------------------------
 
-Inference Engine Plugin API provides the helper InferenceEngine::InferencePluginInternal class recommended to use as a base class for a plugin.
+Inference Engine Plugin API provides the helper InferenceEngine::IInferencePlugin class recommended to use as a base class for a plugin.
 Based on that, declaration of a plugin class can look as follows:
 
 @snippet src/template_plugin.hpp plugin:header
@@ -56,8 +56,8 @@ A plugin must define a device name enabled via the `_pluginName` field of a base
 
 ### `LoadExeNetworkImpl()`
 
-**Implementation details:** The base InferenceEngine::InferencePluginInternal class provides a common implementation 
-of the public InferenceEngine::InferencePluginInternal::LoadNetwork method that calls plugin-specific `LoadExeNetworkImpl`, which is defined in a derived class.
+**Implementation details:** The base InferenceEngine::IInferencePlugin class provides a common implementation 
+of the public InferenceEngine::IInferencePlugin::LoadNetwork method that calls plugin-specific `LoadExeNetworkImpl`, which is defined in a derived class.
 
 This is the most important function of the `Plugin` class and creates an instance of compiled `ExecutableNetwork`,
 which holds a backend-dependent compiled graph in an internal representation:
@@ -166,10 +166,10 @@ using an [ExecutableNetwork](@ref executable_network) object. This functionality
 backend specific graph compilation takes significant time and/or cannot be done on a target host 
 device due to other reasons.
 
-**Implementation details:** The base plugin class InferenceEngine::InferencePluginInternal implements InferenceEngine::InferencePluginInternal::ImportNetwork 
-as follows: exports a device type (InferenceEngine::InferencePluginInternal::_pluginName) and then calls `ImportNetworkImpl`, 
+**Implementation details:** The base plugin class InferenceEngine::IInferencePlugin implements InferenceEngine::IInferencePlugin::ImportNetwork 
+as follows: exports a device type (InferenceEngine::IInferencePlugin::_pluginName) and then calls `ImportNetworkImpl`, 
 which is implemented in a derived class. 
-If a plugin cannot use the base implementation InferenceEngine::InferencePluginInternal::ImportNetwork, it can override base 
+If a plugin cannot use the base implementation InferenceEngine::IInferencePlugin::ImportNetwork, it can override base 
 implementation and define an output blob structure up to its needs. This 
 can be useful if a plugin exports a blob in a special format for integration with other frameworks 
 where a common Inference Engine header from a base class implementation is not appropriate. 

diff --git a/docs/MO_DG/img/compressed_int8_Convolution_weights.png b/docs/MO_DG/img/compressed_int8_Convolution_weights.png
diff --git a/docs/MO_DG/img/expanded_int8_Convolution_weights.png b/docs/MO_DG/img/expanded_int8_Convolution_weights.png
diff --git a/docs/MO_DG/prepare_model/convert_model/IR_suitable_for_INT8_inference.md b/docs/MO_DG/prepare_model/convert_model/IR_suitable_for_INT8_inference.md
@@ -2,36 +2,36 @@
 
 ## Introduction
 
-Inference Engine CPU plugin can infer models in the 8-bit integer (INT8) precision. 
-For details, refer to [INT8 inference on the CPU](../../../IE_DG/Int8Inference.md).
+Inference Engine CPU and GPU plugin can infer models in the low precision. 
+For details, refer to [Low Precision Inference on the CPU](../../../IE_DG/Int8Inference.md).
 
-Intermediate Representation (IR) should be specifically formed to be suitable for INT8 inference. 
-Such an IR is called an INT8 IR and you can generate it in two ways:
-- [Quantize model with the Post-Training Optimization tool](@ref pot_README)
-- Use the Model Optimizer for TensorFlow\* pre-TFLite models (`.pb` model file with `FakeQuantize*` operations)
+Intermediate Representation (IR) should be specifically formed to be suitable for low precision inference. 
+Such an IR is called a Low Precision IR and you can generate it in two ways:
+- [Quantize regular IR with the Post-Training Optimization tool](@ref pot_README)
+- Use the Model Optimizer for a model pretrained for Low Precision inference: TensorFlow\* pre-TFLite models (`.pb` model file with `FakeQuantize*` operations) and ONNX\* quantized models.
+Both Tensorflow and ONNX quantized models could be prepared by [Neural Network Compression Framework](https://github.com/openvinotoolkit/nncf/blob/develop/README.md) 
 
-For an operation to be executed in INT8, it must have `FakeQuantize` operations as inputs with the `levels` attribute set to `255` or `256`. 
+For an operation to be executed in INT8, it must have `FakeQuantize` operations as inputs.
 See the [specification of `FakeQuantize` operation](../../../ops/quantization/FakeQuantize_1.md) for details. 
-To see the list of supported INT8 layers, refer to [INT8 inference on the CPU](../../../IE_DG/Int8Inference.md).
 
 To execute the `Convolution` operation in INT8 on CPU, both data and weight inputs should have `FakeQuantize` as an input operation:
 ![](../../img/expanded_int8_Convolution_weights.png)
 
-INT8 IR is also suitable for FP32 and FP16 inference if a chosen plugin supports all operations of the IR, because the only difference between an INT8 IR and FP16 or FP32 IR is the existence of `FakeQuantize` in the INT8 IR. 
-Plugins with INT8 inference support recognize these sub-graphs and quantize them during the inference time. 
-Plugins without INT8 support execute all operations, including `FakeQuantize`, as is in the FP32 or FP16 precision.   
+Low pecision IR is also suitable for FP32 and FP16 inference if a chosen plugin supports all operations of the IR, because the only difference between a Low Precision IR and FP16 or FP32 IR is the existence of `FakeQuantize` in the Low Precision IR. 
+Plugins with Low Precision Inference support recognize these sub-graphs and quantize them during the inference time. 
+Plugins without Low Precision support execute all operations, including `FakeQuantize`, as is in the FP32 or FP16 precision.   
 
 Accordingly, the presence of FakeQuantize operations in the IR is a recommendation for a plugin on how to quantize particular operations in the model. 
-If capable, a plugin accepts the recommendation and performs INT8 inference, otherwise the plugin ignores the recommendation and executes a model in the floating-point precision. 
+If capable, a plugin accepts the recommendation and performs Low Precision Inference, otherwise, the plugin ignores the recommendation and executes a model in the floating-point precision. 
 
-## Compressed INT8 Weights
+## Compressed Low Precision Weights
 
 Weighted operations, like `Convolution`, `MatMul`, and others, store weights as floating-point `Constant` in the graph followed by the `FakeQuantize` operation. 
 `Constant` followed by the `FakeQuantize` operation could be optimized memory-wise due to the `FakeQuantize` operation semantics. 
-The resulting weights sub-graph stores weights in INT8 `Constant`, which gets unpacked back to floating point with the `Convert` operation. 
-Weights compression leaves `FakeQuantize` output arithmetically the same and weights storing takes four times less memory.
+The resulting weights sub-graph stores weights in Low Precision `Constant`, which gets unpacked back to floating point with the `Convert` operation. 
+Weights compression replaces `FakeQuantize` with optional `Subtract` and `Multiply` operation leaving output arithmetically the same and weights storing takes four times less memory.
 
 See the visualization of `Convolution` with the compressed weights:
 ![](../../img/compressed_int8_Convolution_weights.png)
 
-Both Model Optimizer and Post-Training Optimization tool generate a compressed IR by default. To generate an expanded INT8 IR, use `--disable_weights_compression`.
+Both Model Optimizer and Post-Training Optimization tool generate a compressed IR by default.
diff --git a/docs/img/cpu_streams_explained.png b/docs/img/cpu_streams_explained.png
diff --git a/docs/img/cpu_streams_explained_1.png b/docs/img/cpu_streams_explained_1.png
diff --git a/docs/install_guides/installing-openvino-conda.md b/docs/install_guides/installing-openvino-conda.md
@@ -19,8 +19,13 @@ This guide provides installation steps for Intel® Distribution of OpenVINO™ t
 
 ## Install the runtime package using the Anaconda* Package Manager
 
-1. Set up the Anaconda* environment. 
-
+1. Set up the Anaconda* environment: 
+   ```sh
+   conda create --name py37 python=3.7
+   ```
+   ```sh
+   conda activate py37
+   ```
 2. Updated conda to the latest version:
    ```sh
    conda update --all

diff --git a/docs/install_guides/installing-openvino-pip.md b/docs/install_guides/installing-openvino-pip.md
@@ -5,7 +5,7 @@ OpenVINO™ toolkit is a comprehensive toolkit for quickly developing applicatio
 Intel® Distribution of OpenVINO™ Toolkit provides the following packages available for installation through the PyPI repository:
 
 * Runtime package with the Inference Engine inside: [https://pypi.org/project/openvino/](https://pypi.org/project/openvino/).
-* Developers package that includes the runtime package as a dependency, Model Optimizer, Accuracy Checker and Post-Training Optimization Tool: [https://pypi.org/project/openvino-dev](https://pypi.org/project/openvino-dev).
+* Developer package that includes the runtime package as a dependency, Model Optimizer and other developer tools: [https://pypi.org/project/openvino-dev](https://pypi.org/project/openvino-dev).
 
 ## Additional Resources
-Original file line number
+Diff line change
@@ Expand Up @@
     ie_option (ENABLE_UNSAFE_LOCATIONS "skip check for MD5 for dependency" OFF)
-    ie_option (ENABLE_ALTERNATIVE_TEMP "in case of dependency conflict, to avoid modification in master, use local copy of dependency" OFF)
     ie_dependent_option (ENABLE_FUZZING "instrument build for fuzzing" OFF "CMAKE_CXX_COMPILER_ID MATCHES ^(Apple)?Clang$; NOT WIN32" OFF)
     #
@@ Expand Down @@