Skip to content

Commit

Permalink
Merge remote-tracking branch 'upstream/master' into rebase11
Browse files Browse the repository at this point in the history
  • Loading branch information
nosovmik committed May 31, 2021
2 parents 6c9e7a8 + 4918533 commit edd6027
Show file tree
Hide file tree
Showing 716 changed files with 16,168 additions and 6,235 deletions.
2 changes: 2 additions & 0 deletions .ci/azure/linux.yml
Original file line number Diff line number Diff line change
Expand Up @@ -107,6 +107,8 @@ jobs:
-DENABLE_PYTHON=ON
-DPYTHON_EXECUTABLE=/usr/bin/python3.6
-DENABLE_TESTS=ON
-DNGRAPH_ONNX_IMPORT_ENABLE=ON
-DNGRAPH_ONNX_EDITOR_ENABLE=ON
-DENABLE_FASTER_BUILD=ON
-DIE_EXTRA_MODULES=$(OPENVINO_CONTRIB_REPO_DIR)/modules
$(REPO_DIR)
Expand Down
2 changes: 1 addition & 1 deletion .ci/azure/mac.yml
Original file line number Diff line number Diff line change
Expand Up @@ -105,7 +105,7 @@ jobs:
workingDirectory: $(BUILD_DIR)
displayName: 'Install'

- script: $(BIN_DIR)/unit-test --gtest_print_time=1 --gtest_filter=-backend_api.config_unsupported:*IE_GPU*:IE_CPU.onnx_model_sigmoid:IE_CPU/GRUSequenceOp.onnx_model_gru*:IE_CPU.exp_* --gtest_output=xml:TEST-NGraphUT.xml
- script: $(BIN_DIR)/unit-test --gtest_print_time=1 --gtest_filter=-backend_api.config_unsupported:*IE_GPU*:IE_CPU.onnx_model_sigmoid:IE_CPU/GRUSequenceOp.onnx_model_gru* --gtest_output=xml:TEST-NGraphUT.xml
displayName: 'nGraph UT'
continueOnError: false

Expand Down
1 change: 1 addition & 0 deletions CMakeLists.txt
Original file line number Diff line number Diff line change
Expand Up @@ -114,6 +114,7 @@ function(build_ngraph)
endif()

ie_cpack_add_component(ngraph REQUIRED)
ie_cpack_add_component(ngraph_dev REQUIRED DEPENDS ngraph)

set(SDL_cmake_included ON)
add_subdirectory(ngraph)
Expand Down
1 change: 1 addition & 0 deletions Jenkinsfile
Original file line number Diff line number Diff line change
Expand Up @@ -13,6 +13,7 @@ properties([
name: 'library_version')
])
])

loadOpenVinoLibrary {
entrypoint(this)
}
3 changes: 0 additions & 3 deletions cmake/developer_package/IEDevScriptsConfig.cmake
Original file line number Diff line number Diff line change
Expand Up @@ -53,9 +53,6 @@ function(set_temp_directory temp_variable source_tree_dir)
if (DEFINED ENV{DL_SDK_TEMP} AND NOT $ENV{DL_SDK_TEMP} STREQUAL "")
message(STATUS "DL_SDK_TEMP environment is set : $ENV{DL_SDK_TEMP}")
file(TO_CMAKE_PATH $ENV{DL_SDK_TEMP} temp)
if (ENABLE_ALTERNATIVE_TEMP)
set(ALTERNATIVE_PATH ${source_tree_dir}/temp)
endif()
else ()
set(temp ${source_tree_dir}/temp)
endif()
Expand Down
6 changes: 5 additions & 1 deletion cmake/developer_package/add_ie_target.cmake
Original file line number Diff line number Diff line change
Expand Up @@ -31,6 +31,7 @@ addIeTarget(
function(addIeTarget)
set(options
ADD_CPPLINT # Enables code style checks for the target
ADD_CLANG_FORMAT # Enables code style checks for the target
)
set(oneValueRequiredArgs
TYPE # type of target, SHARED|STATIC|EXECUTABLE. SHARED and STATIC correspond to add_library, EXECUTABLE to add_executable
Expand Down Expand Up @@ -119,6 +120,10 @@ function(addIeTarget)
# code style
add_cpplint_target(${ARG_NAME}_cpplint FOR_TARGETS ${ARG_NAME})
endif()
if (ARG_ADD_CLANG_FORMAT)
# code style
add_clang_format_target(${ARG_NAME}_clang FOR_TARGETS ${ARG_NAME})
endif()
if (ARG_DEVELOPER_PACKAGE)
# developer package
openvino_developer_export_targets(COMPONENT ${ARG_DEVELOPER_PACKAGE}
Expand All @@ -128,7 +133,6 @@ function(addIeTarget)
# Provide default compile pdb name equal to target name
set_target_properties(${ARG_NAME} PROPERTIES COMPILE_PDB_NAME ${ARG_NAME})
endif()

endfunction()

#[[
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -175,7 +175,7 @@ function (CheckOrDownloadAndExtract component RELATIVE_URL archive_name unpacked
#path exists, so we would like to check what was unpacked version
set (version_file ${unpacked_path}/ie_dependency.info)

if (NOT EXISTS ${version_file} AND NOT ${ENABLE_ALTERNATIVE_TEMP})
if (NOT EXISTS ${version_file})
clean_message(FATAL_ERROR "error: Dependency doesn't contain version file. Please select actions: \n"
"if you are not sure about your FS dependency - remove it : \n"
"\trm -rf ${unpacked_path}\n"
Expand Down
2 changes: 0 additions & 2 deletions cmake/developer_package/features.cmake
Original file line number Diff line number Diff line change
Expand Up @@ -56,8 +56,6 @@ ie_option (VERBOSE_BUILD "shows extra information about build" OFF)

ie_option (ENABLE_UNSAFE_LOCATIONS "skip check for MD5 for dependency" OFF)

ie_option (ENABLE_ALTERNATIVE_TEMP "in case of dependency conflict, to avoid modification in master, use local copy of dependency" OFF)

ie_dependent_option (ENABLE_FUZZING "instrument build for fuzzing" OFF "CMAKE_CXX_COMPILER_ID MATCHES ^(Apple)?Clang$; NOT WIN32" OFF)

#
Expand Down
11 changes: 9 additions & 2 deletions cmake/developer_package/plugins/plugins.cmake
Original file line number Diff line number Diff line change
Expand Up @@ -27,7 +27,10 @@ endif()
# )
#
function(ie_add_plugin)
set(options SKIP_INSTALL)
set(options
SKIP_INSTALL
ADD_CLANG_FORMAT
)
set(oneValueArgs NAME DEVICE_NAME VERSION_DEFINES_FOR)
set(multiValueArgs SOURCES OBJECT_LIBRARIES CPPLINT_FILTERS)
cmake_parse_arguments(IE_PLUGIN "${options}" "${oneValueArgs}" "${multiValueArgs}" ${ARGN})
Expand Down Expand Up @@ -73,7 +76,11 @@ function(ie_add_plugin)
string(CONCAT custom_filter "${custom_filter}" "," "${filter}")
endforeach()

add_cpplint_target(${IE_PLUGIN_NAME}_cpplint FOR_TARGETS ${IE_PLUGIN_NAME} CUSTOM_FILTERS ${custom_filter})
if (IE_PLUGIN_ADD_CLANG_FORMAT)
add_clang_format_target(${IE_PLUGIN_NAME}_clang FOR_TARGETS ${IE_PLUGIN_NAME})
else()
add_cpplint_target(${IE_PLUGIN_NAME}_cpplint FOR_TARGETS ${IE_PLUGIN_NAME} CUSTOM_FILTERS ${custom_filter})
endif()

# check that plugin with such name is not registered

Expand Down
4 changes: 0 additions & 4 deletions cmake/toolchains/ia32.linux.toolchain.cmake
Original file line number Diff line number Diff line change
Expand Up @@ -19,7 +19,3 @@ endmacro()

# need libusb 32-bits version
_set_if_not_defined(ENABLE_VPU OFF)

# fix conversion from uint64_t / int64_t to size_t
_set_if_not_defined(NGRAPH_ONNX_IMPORT_ENABLE OFF)
_set_if_not_defined(NGRAPH_ONNX_EDITOR_ENABLE OFF)
25 changes: 25 additions & 0 deletions docs/.clang-format
Original file line number Diff line number Diff line change
@@ -0,0 +1,25 @@
BasedOnStyle: Google
IndentWidth: 4
UseTab: Never

Language: Cpp
Standard: Cpp11

AccessModifierOffset: -4
AlignConsecutiveMacros: true
AllowAllArgumentsOnNextLine: false
AllowAllParametersOfDeclarationOnNextLine: false
AllowShortFunctionsOnASingleLine: Empty
AllowShortIfStatementsOnASingleLine: Never
AllowShortLambdasOnASingleLine: Empty
AllowShortLoopsOnASingleLine: false
AlwaysBreakBeforeMultilineStrings: false
ColumnLimit: 160
# Specialize this comment pragma in order to avoid changes in SEA copyrights
CommentPragmas: '^#'
DerivePointerAlignment: false
FixNamespaceComments: true
IndentCaseLabels: false
IndentPPDirectives: BeforeHash
SpaceBeforeCpp11BracedList: true
SpaceBeforeCtorInitializerColon: false
2 changes: 2 additions & 0 deletions docs/IE_DG/supported_plugins/CL_DNN.md
Original file line number Diff line number Diff line change
Expand Up @@ -117,6 +117,8 @@ When specifying key values as raw strings (that is, when using Python API), omit
| `KEY_GPU_THROUGHPUT_STREAMS` | `KEY_GPU_THROUGHPUT_AUTO`, or positive integer| 1 | Specifies a number of GPU "execution" streams for the throughput mode (upper bound for a number of inference requests that can be executed simultaneously).<br>This option is can be used to decrease GPU stall time by providing more effective load from several streams. Increasing the number of streams usually is more effective for smaller topologies or smaller input sizes. Note that your application should provide enough parallel slack (e.g. running many inference requests) to leverage full GPU bandwidth. Additional streams consume several times more GPU memory, so make sure the system has enough memory available to suit parallel stream execution. Multiple streams might also put additional load on CPU. If CPU load increases, it can be regulated by setting an appropriate `KEY_CLDNN_PLUGIN_THROTTLE` option value (see above). If your target system has relatively weak CPU, keep throttling low. <br>The default value is 1, which implies latency-oriented behavior.<br>`KEY_GPU_THROUGHPUT_AUTO` creates bare minimum of streams to improve the performance; this is the most portable option if you are not sure how many resources your target machine has (and what would be the optimal number of streams). <br> A positive integer value creates the requested number of streams. |
| `KEY_EXCLUSIVE_ASYNC_REQUESTS` | `YES` / `NO` | `NO` | Forces async requests (also from different executable networks) to execute serially.|
| `KEY_CLDNN_MAX_NUM_THREADS` | `integer value` | `maximum # of HW threads available in host environment` | Specifies the number of CPU threads that can be used for clDNN engine, e.g, JIT compilation of clDNN kernels or clDNN cpu kernel processing. The default value is set as the number of maximum available threads in host environment to minimize the time for LoadNetwork, where the clDNN kernel build time occupies a large portion. Note that if the specified value is larger than the maximum available # of threads or less than zero, it is set as maximum available # of threads. It can be specified with a smaller number than the available HW threads according to the usage scenario, e.g., when the user wants to assign more CPU threads while clDNN plugin is running. Note that setting this value with lower number will affect not only the network loading time but also the cpu layers of clDNN networks that are optimized with multi-threading. |
| `KEY_CLDNN_ENABLE_LOOP_UNROLLING` | `YES` / `NO` | `YES` | Enables recurrent layers such as TensorIterator or Loop with fixed iteration count to be unrolled. It is turned on by default. Turning this key on will achieve better inference performance for loops with not too many iteration counts (less than 16, as a rule of thumb). Turning this key off will achieve better performance for both graph loading time and inference time with many iteration counts (greater than 16). Note that turning this key on will increase the graph loading time in proportion to the iteration counts. Thus, this key should be turned off if graph loading time is considered to be most important target to optimize. |

## Note on Debug Capabilities of the GPU Plugin

Inference Engine GPU plugin provides possibility to dump the user custom OpenCL&trade; kernels to a file to allow you to properly debug compilation issues in your custom kernels.
Expand Down
4 changes: 2 additions & 2 deletions docs/IE_PLUGIN_DG/ExecutableNetwork.md
Original file line number Diff line number Diff line change
Expand Up @@ -4,8 +4,8 @@
- Compile an InferenceEngine::ICNNNetwork instance to a backend specific graph representation
- Create an arbitrary number of `InferRequest` objects
- Hold some common resources shared between different instances of `InferRequest`. For example:
- InferenceEngine::ExecutableNetworkInternal::_taskExecutor task executor to implement asynchronous execution
- InferenceEngine::ExecutableNetworkInternal::_callbackExecutor task executor to run an asynchronous inference request callback in a separate thread
- InferenceEngine::IExecutableNetworkInternal::_taskExecutor task executor to implement asynchronous execution
- InferenceEngine::IExecutableNetworkInternal::_callbackExecutor task executor to run an asynchronous inference request callback in a separate thread

`ExecutableNetwork` Class
------------------------
Expand Down
12 changes: 6 additions & 6 deletions docs/IE_PLUGIN_DG/Plugin.md
Original file line number Diff line number Diff line change
Expand Up @@ -21,7 +21,7 @@ To build an Inference Engine plugin with the Plugin API, see the [Inference Engi
Plugin Class
------------------------

Inference Engine Plugin API provides the helper InferenceEngine::InferencePluginInternal class recommended to use as a base class for a plugin.
Inference Engine Plugin API provides the helper InferenceEngine::IInferencePlugin class recommended to use as a base class for a plugin.
Based on that, declaration of a plugin class can look as follows:

@snippet src/template_plugin.hpp plugin:header
Expand Down Expand Up @@ -56,8 +56,8 @@ A plugin must define a device name enabled via the `_pluginName` field of a base

### `LoadExeNetworkImpl()`

**Implementation details:** The base InferenceEngine::InferencePluginInternal class provides a common implementation
of the public InferenceEngine::InferencePluginInternal::LoadNetwork method that calls plugin-specific `LoadExeNetworkImpl`, which is defined in a derived class.
**Implementation details:** The base InferenceEngine::IInferencePlugin class provides a common implementation
of the public InferenceEngine::IInferencePlugin::LoadNetwork method that calls plugin-specific `LoadExeNetworkImpl`, which is defined in a derived class.

This is the most important function of the `Plugin` class and creates an instance of compiled `ExecutableNetwork`,
which holds a backend-dependent compiled graph in an internal representation:
Expand Down Expand Up @@ -166,10 +166,10 @@ using an [ExecutableNetwork](@ref executable_network) object. This functionality
backend specific graph compilation takes significant time and/or cannot be done on a target host
device due to other reasons.

**Implementation details:** The base plugin class InferenceEngine::InferencePluginInternal implements InferenceEngine::InferencePluginInternal::ImportNetwork
as follows: exports a device type (InferenceEngine::InferencePluginInternal::_pluginName) and then calls `ImportNetworkImpl`,
**Implementation details:** The base plugin class InferenceEngine::IInferencePlugin implements InferenceEngine::IInferencePlugin::ImportNetwork
as follows: exports a device type (InferenceEngine::IInferencePlugin::_pluginName) and then calls `ImportNetworkImpl`,
which is implemented in a derived class.
If a plugin cannot use the base implementation InferenceEngine::InferencePluginInternal::ImportNetwork, it can override base
If a plugin cannot use the base implementation InferenceEngine::IInferencePlugin::ImportNetwork, it can override base
implementation and define an output blob structure up to its needs. This
can be useful if a plugin exports a blob in a special format for integration with other frameworks
where a common Inference Engine header from a base class implementation is not appropriate.
Expand Down
4 changes: 2 additions & 2 deletions docs/MO_DG/img/compressed_int8_Convolution_weights.png
Loading
Sorry, something went wrong. Reload?
Sorry, we cannot display this file.
Sorry, this file is invalid so it cannot be displayed.
4 changes: 2 additions & 2 deletions docs/MO_DG/img/expanded_int8_Convolution_weights.png
Loading
Sorry, something went wrong. Reload?
Sorry, we cannot display this file.
Sorry, this file is invalid so it cannot be displayed.
Original file line number Diff line number Diff line change
Expand Up @@ -2,36 +2,36 @@

## Introduction

Inference Engine CPU plugin can infer models in the 8-bit integer (INT8) precision.
For details, refer to [INT8 inference on the CPU](../../../IE_DG/Int8Inference.md).
Inference Engine CPU and GPU plugin can infer models in the low precision.
For details, refer to [Low Precision Inference on the CPU](../../../IE_DG/Int8Inference.md).

Intermediate Representation (IR) should be specifically formed to be suitable for INT8 inference.
Such an IR is called an INT8 IR and you can generate it in two ways:
- [Quantize model with the Post-Training Optimization tool](@ref pot_README)
- Use the Model Optimizer for TensorFlow\* pre-TFLite models (`.pb` model file with `FakeQuantize*` operations)
Intermediate Representation (IR) should be specifically formed to be suitable for low precision inference.
Such an IR is called a Low Precision IR and you can generate it in two ways:
- [Quantize regular IR with the Post-Training Optimization tool](@ref pot_README)
- Use the Model Optimizer for a model pretrained for Low Precision inference: TensorFlow\* pre-TFLite models (`.pb` model file with `FakeQuantize*` operations) and ONNX\* quantized models.
Both Tensorflow and ONNX quantized models could be prepared by [Neural Network Compression Framework](https://github.com/openvinotoolkit/nncf/blob/develop/README.md)

For an operation to be executed in INT8, it must have `FakeQuantize` operations as inputs with the `levels` attribute set to `255` or `256`.
For an operation to be executed in INT8, it must have `FakeQuantize` operations as inputs.
See the [specification of `FakeQuantize` operation](../../../ops/quantization/FakeQuantize_1.md) for details.
To see the list of supported INT8 layers, refer to [INT8 inference on the CPU](../../../IE_DG/Int8Inference.md).

To execute the `Convolution` operation in INT8 on CPU, both data and weight inputs should have `FakeQuantize` as an input operation:
![](../../img/expanded_int8_Convolution_weights.png)

INT8 IR is also suitable for FP32 and FP16 inference if a chosen plugin supports all operations of the IR, because the only difference between an INT8 IR and FP16 or FP32 IR is the existence of `FakeQuantize` in the INT8 IR.
Plugins with INT8 inference support recognize these sub-graphs and quantize them during the inference time.
Plugins without INT8 support execute all operations, including `FakeQuantize`, as is in the FP32 or FP16 precision.
Low pecision IR is also suitable for FP32 and FP16 inference if a chosen plugin supports all operations of the IR, because the only difference between a Low Precision IR and FP16 or FP32 IR is the existence of `FakeQuantize` in the Low Precision IR.
Plugins with Low Precision Inference support recognize these sub-graphs and quantize them during the inference time.
Plugins without Low Precision support execute all operations, including `FakeQuantize`, as is in the FP32 or FP16 precision.

Accordingly, the presence of FakeQuantize operations in the IR is a recommendation for a plugin on how to quantize particular operations in the model.
If capable, a plugin accepts the recommendation and performs INT8 inference, otherwise the plugin ignores the recommendation and executes a model in the floating-point precision.
If capable, a plugin accepts the recommendation and performs Low Precision Inference, otherwise, the plugin ignores the recommendation and executes a model in the floating-point precision.

## Compressed INT8 Weights
## Compressed Low Precision Weights

Weighted operations, like `Convolution`, `MatMul`, and others, store weights as floating-point `Constant` in the graph followed by the `FakeQuantize` operation.
`Constant` followed by the `FakeQuantize` operation could be optimized memory-wise due to the `FakeQuantize` operation semantics.
The resulting weights sub-graph stores weights in INT8 `Constant`, which gets unpacked back to floating point with the `Convert` operation.
Weights compression leaves `FakeQuantize` output arithmetically the same and weights storing takes four times less memory.
The resulting weights sub-graph stores weights in Low Precision `Constant`, which gets unpacked back to floating point with the `Convert` operation.
Weights compression replaces `FakeQuantize` with optional `Subtract` and `Multiply` operation leaving output arithmetically the same and weights storing takes four times less memory.

See the visualization of `Convolution` with the compressed weights:
![](../../img/compressed_int8_Convolution_weights.png)

Both Model Optimizer and Post-Training Optimization tool generate a compressed IR by default. To generate an expanded INT8 IR, use `--disable_weights_compression`.
Both Model Optimizer and Post-Training Optimization tool generate a compressed IR by default.
2 changes: 1 addition & 1 deletion docs/img/cpu_streams_explained.png
Loading
Sorry, something went wrong. Reload?
Sorry, we cannot display this file.
Sorry, this file is invalid so it cannot be displayed.
3 changes: 3 additions & 0 deletions docs/img/cpu_streams_explained_1.png
Loading
Sorry, something went wrong. Reload?
Sorry, we cannot display this file.
Sorry, this file is invalid so it cannot be displayed.
9 changes: 7 additions & 2 deletions docs/install_guides/installing-openvino-conda.md
Original file line number Diff line number Diff line change
Expand Up @@ -19,8 +19,13 @@ This guide provides installation steps for Intel® Distribution of OpenVINO™ t

## Install the runtime package using the Anaconda* Package Manager

1. Set up the Anaconda* environment. 

1. Set up the Anaconda* environment: 
```sh
conda create --name py37 python=3.7
```
```sh
conda activate py37
```
2. Updated conda to the latest version:
```sh
conda update --all
Expand Down
2 changes: 1 addition & 1 deletion docs/install_guides/installing-openvino-pip.md
Original file line number Diff line number Diff line change
Expand Up @@ -5,7 +5,7 @@ OpenVINO™ toolkit is a comprehensive toolkit for quickly developing applicatio
Intel® Distribution of OpenVINO™ Toolkit provides the following packages available for installation through the PyPI repository:

* Runtime package with the Inference Engine inside: [https://pypi.org/project/openvino/](https://pypi.org/project/openvino/).
* Developers package that includes the runtime package as a dependency, Model Optimizer, Accuracy Checker and Post-Training Optimization Tool: [https://pypi.org/project/openvino-dev](https://pypi.org/project/openvino-dev).
* Developer package that includes the runtime package as a dependency, Model Optimizer and other developer tools: [https://pypi.org/project/openvino-dev](https://pypi.org/project/openvino-dev).

## Additional Resources

Expand Down
Loading

0 comments on commit edd6027

Please sign in to comment.