diff --git a/.ci/azure/ci_utils/onnxruntime/skip_tests b/.ci/azure/ci_utils/onnxruntime/skip_tests
new file mode 100644
index 00000000000000..475b79b4acac66
--- /dev/null
+++ b/.ci/azure/ci_utils/onnxruntime/skip_tests
@@ -0,0 +1,6 @@
+TransposeOpTest.NHWC2NCHW
+TransposeOpTest.NCHW2NHWC
+TransposeOpTest.TwoDim_int16
+GatherOpTest.Gather_axis1_indices2d_int16
+SoftmaxOperator.ThreeDimsAxis1
+SoftmaxOperator.ThreeDimsAxis0
diff --git a/.ci/azure/ci_utils/onnxruntime/version b/.ci/azure/ci_utils/onnxruntime/version
new file mode 100644
index 00000000000000..3abd49542da1e3
--- /dev/null
+++ b/.ci/azure/ci_utils/onnxruntime/version
@@ -0,0 +1 @@
+rel-1.7.1
diff --git a/.ci/azure/linux_onnxruntime.yml b/.ci/azure/linux_onnxruntime.yml
new file mode 100644
index 00000000000000..37173a4560fcf9
--- /dev/null
+++ b/.ci/azure/linux_onnxruntime.yml
@@ -0,0 +1,156 @@
+jobs:
+- job: onnxruntime
+ timeoutInMinutes: 90
+
+ pool:
+ name: LIN_VMSS_VENV_ONNX_WU2
+
+ variables:
+ system.debug: true
+ VSTS_HTTP_RETRY: 5
+ VSTS_HTTP_TIMEOUT: 200
+ WORKERS_NUMBER: 8
+ BUILD_TYPE: Release
+ REPO_DIR: $(Build.Repository.LocalPath)
+ ONNXRUNTIME_REPO_DIR: $(REPO_DIR)/../onnxruntime
+ WORK_DIR: $(Pipeline.Workspace)/_w
+ MODELS_DIR: /mount/cinfsshare/onnxtestdata
+ TMP_DIR: /mnt/tmp
+ INSTALL_DIR: $(WORK_DIR)/install_pkg
+ BUILD_DIR: $(WORK_DIR)/build
+ ONNXRUNTIME_UTILS: $(REPO_DIR)/.ci/azure/ci_utils/onnxruntime
+ ONNXRUNTIME_BUILD_DIR: $(ONNXRUNTIME_REPO_DIR)/build
+ steps:
+ - script: |
+ curl -H Metadata:true --noproxy "*" "http://169.254.169.254/metadata/instance?api-version=2019-06-01"
+ whoami
+ uname -a
+ echo Python3 info ; which python3 ; python3 --version
+ echo Python info ; which python ; python --version
+ echo Java info ; which java ; java -version
+ echo gcc info ; which gcc ; gcc --version
+ lsb_release
+ env
+ cat /proc/cpuinfo
+ cat /proc/meminfo
+ cat /etc/fstab
+ vmstat -s
+ df
+ lsblk -o NAME,HCTL,SIZE,MOUNTPOINT | grep -i "sd"
+ free -h
+ displayName: 'System info'
+
+ - script: |
+ rm -rf $(WORK_DIR) ; mkdir $(WORK_DIR)
+ sudo rm -rf $(TMP_DIR) ; sudo mkdir $(TMP_DIR) ; sudo chmod 777 -R $(TMP_DIR)
+ sudo mkdir -p $(MODELS_DIR)
+ sudo apt --assume-yes install nfs-common
+ sudo mount -vvv -t nfs cinfsshare.file.core.windows.net:/cinfsshare/onnxtestdata $(MODELS_DIR) -o vers=4,minorversion=1,sec=sys
+ displayName: 'Make dirs'
+
+ - checkout: self
+ clean: true
+ lfs: false
+ submodules: recursive
+ path: openvino
+
+ - script: |
+ branch=`tr -s '\n ' < $(ONNXRUNTIME_UTILS)/version`
+ git clone --branch $branch --single-branch --recursive https://github.com/microsoft/onnxruntime.git $(ONNXRUNTIME_REPO_DIR)
+ displayName: 'Clone onnxruntime'
+
+ - script: |
+ sudo apt --assume-yes install libusb-1.0-0-dev
+ python3 -m pip install -r $(REPO_DIR)/inference-engine/ie_bridges/python/requirements.txt
+ # For running Python API tests
+ python3 -m pip install -r $(REPO_DIR)/inference-engine/ie_bridges/python/src/requirements-dev.txt
+ # Speed up build
+ wget https://github.com/ninja-build/ninja/releases/download/v1.10.0/ninja-linux.zip
+ unzip ninja-linux.zip
+ sudo cp -v ninja /usr/local/bin/
+ # Speed up tests
+ git clone https://github.com/google/gtest-parallel.git
+ workingDirectory: $(WORK_DIR)
+ displayName: 'Install dependencies'
+
+ - task: CMake@1
+ inputs:
+ # CMake must get Python 3.x version by default
+ cmakeArgs: >
+ -GNinja
+ -DCMAKE_BUILD_TYPE=$(BUILD_TYPE)
+ -DENABLE_PYTHON=ON
+ -DPYTHON_EXECUTABLE=/usr/bin/python3.6
+ -DENABLE_VPU=OFF
+ -DENABLE_GNA=OFF
+ -DENABLE_OPENCV=OFF
+ -DENABLE_CPPLINT=OFF
+ -DENABLE_TESTS=OFF
+ -DENABLE_MKL_DNN=ON
+ -DENABLE_CLDNN=OFF
+ -DENABLE_PROFILING_ITT=OFF
+ -DENABLE_SAMPLES=OFF
+ -DENABLE_SPEECH_DEMO=OFF
+ -DENABLE_PYTHON=ON
+ -DNGRAPH_ONNX_IMPORT_ENABLE=ON
+ -DNGRAPH_ONNX_EDITOR_ENABLE=ON
+ -DNGRAPH_INTERPRETER_ENABLE=ON
+ -DNGRAPH_DEBUG_ENABLE=OFF
+ -DNGRAPH_DYNAMIC_COMPONENTS_ENABLE=ON
+ $(REPO_DIR)
+ workingDirectory: $(BUILD_DIR)
+
+ - script: ninja
+ workingDirectory: $(BUILD_DIR)
+ displayName: 'Build Lin'
+
+ - script: ls -alR $(REPO_DIR)/bin/
+ displayName: 'List files'
+
+ - script: cmake -DCMAKE_INSTALL_PREFIX=$(INSTALL_DIR) -P cmake_install.cmake
+ workingDirectory: $(BUILD_DIR)
+ displayName: 'Install'
+
+ - script: |
+ source $(INSTALL_DIR)/bin/setupvars.sh
+ echo "2021.2" > $(INSTALL_DIR)/deployment_tools/inference_engine/version.txt
+ ./build.sh --config RelWithDebInfo --use_openvino CPU_FP32 --build_shared_lib --parallel --skip_tests --build_dir $(ONNXRUNTIME_BUILD_DIR)
+ workingDirectory: $(ONNXRUNTIME_REPO_DIR)
+ displayName: 'Build ONNX Runtime'
+
+ - script: |
+ source $(INSTALL_DIR)/bin/setupvars.sh
+ skip_tests=`tr -s '\n ' ':' < $(ONNXRUNTIME_UTILS)/skip_tests`
+ ./onnxruntime_test_all --gtest_filter=-$skip_tests
+ workingDirectory: $(ONNXRUNTIME_BUILD_DIR)/RelWithDebInfo
+ displayName: 'Run onnxruntime_test_all'
+
+ - script: |
+ source $(INSTALL_DIR)/bin/setupvars.sh
+ ./onnxruntime_shared_lib_test
+ workingDirectory: $(ONNXRUNTIME_BUILD_DIR)/RelWithDebInfo
+ displayName: 'Run onnxruntime_shared_lib_test'
+
+ - script: |
+ source $(INSTALL_DIR)/bin/setupvars.sh
+ ./onnxruntime_global_thread_pools_test
+ workingDirectory: $(ONNXRUNTIME_BUILD_DIR)/RelWithDebInfo
+ displayName: 'Run onnxruntime_global_thread_pools_test'
+
+ - script: |
+ source $(INSTALL_DIR)/bin/setupvars.sh
+ ./onnxruntime_api_tests_without_env
+ workingDirectory: $(ONNXRUNTIME_BUILD_DIR)/RelWithDebInfo
+ displayName: 'Run onnxruntime_api_tests_without_env'
+
+ - script: |
+ source $(INSTALL_DIR)/bin/setupvars.sh
+ ./onnx_test_runner "$(ONNXRUNTIME_REPO_DIR)/cmake/external/onnx/onnx/backend/test/data/pytorch-converted"
+ workingDirectory: $(ONNXRUNTIME_BUILD_DIR)/RelWithDebInfo
+ displayName: 'Run pytorch-converted tests'
+
+ - script: |
+ source $(INSTALL_DIR)/bin/setupvars.sh
+ ./onnx_test_runner "$(ONNXRUNTIME_REPO_DIR)/cmake/external/onnx/onnx/backend/test/data/pytorch-operator"
+ workingDirectory: $(ONNXRUNTIME_BUILD_DIR)/RelWithDebInfo
+ displayName: 'Run pytorch-operator tests'
diff --git a/.ci/azure/windows.yml b/.ci/azure/windows.yml
index e344f1d8243032..c805aff037c8aa 100644
--- a/.ci/azure/windows.yml
+++ b/.ci/azure/windows.yml
@@ -36,7 +36,7 @@ jobs:
SETUPVARS: $(INSTALL_DIR)\bin\setupvars.bat
IB_DIR: C:\Program Files (x86)\IncrediBuild
IB_TESTCONSOLE: $(IB_DIR)\IBTestConsole.exe
- TEST_ENV_PATH: $(REPO_DIR)\inference-engine\temp\tbb\bin;$(REPO_DIR)\inference-engine\temp\opencv_4.5.1\opencv\bin;$(IB_DIR);%PATH%
+ TEST_ENV_PATH: $(REPO_DIR)\inference-engine\temp\tbb\bin;$(REPO_DIR)\inference-engine\temp\opencv_4.5.2\opencv\bin;$(IB_DIR);%PATH%
steps:
- script: |
diff --git a/.ci/azure/windows_conditional_compilation.yml b/.ci/azure/windows_conditional_compilation.yml
index e627030a36223e..7352f9c758c79e 100644
--- a/.ci/azure/windows_conditional_compilation.yml
+++ b/.ci/azure/windows_conditional_compilation.yml
@@ -24,7 +24,7 @@ jobs:
SETUPVARS: $(INSTALL_DIR)\bin\setupvars.bat
IB_DIR: C:\Program Files (x86)\IncrediBuild
IB_TESTCONSOLE: $(IB_DIR)\IBTestConsole.exe
- TEST_ENV_PATH: $(REPO_DIR)\inference-engine\temp\tbb\bin;$(REPO_DIR)\inference-engine\temp\opencv_4.5.1\opencv\bin;$(IB_DIR);%PATH%
+ TEST_ENV_PATH: $(REPO_DIR)\inference-engine\temp\tbb\bin;$(REPO_DIR)\inference-engine\temp\opencv_4.5.2\opencv\bin;$(IB_DIR);%PATH%
steps:
- script: |
diff --git a/.ci/openvino-onnx/Dockerfile b/.ci/openvino-onnx/Dockerfile
index 6879c49182a8fb..ec78869b6d6585 100644
--- a/.ci/openvino-onnx/Dockerfile
+++ b/.ci/openvino-onnx/Dockerfile
@@ -1,6 +1,10 @@
FROM ubuntu:20.04
-LABEL version=2020.07.09.1
+LABEL version=2021.03.30.1
+
+# Build configuration arguments
+ARG BUILD_TYPE=Release
+ARG PROTOBUF_LITE=OFF
ARG http_proxy
ARG https_proxy
@@ -10,7 +14,6 @@ ENV https_proxy ${https_proxy}
ENV CI=true
ENV DEBIAN_FRONTEND=noninteractive
ENV PYTHONUNBUFFERED 1
-ARG PROTOBUF_LITE=OFF
# Install base dependencies
RUN apt-get update && apt-get install -y locales && apt-get clean autoclean && apt-get autoremove -y
@@ -52,7 +55,7 @@ RUN apt-get update && apt-get -y --no-install-recommends install \
COPY . /openvino/
WORKDIR /openvino/build
RUN cmake .. \
- -DCMAKE_BUILD_TYPE=Release \
+ -DCMAKE_BUILD_TYPE=${BUILD_TYPE} \
-DENABLE_VPU=OFF \
-DENABLE_GNA=OFF \
-DENABLE_OPENCV=OFF \
@@ -66,6 +69,7 @@ RUN cmake .. \
-DENABLE_PYTHON=ON \
-DPYTHON_EXECUTABLE=/usr/bin/python3 \
-DNGRAPH_ONNX_IMPORT_ENABLE=ON \
+ -DNGRAPH_ONNX_EDITOR_ENABLE=ON \
-DNGRAPH_INTERPRETER_ENABLE=ON \
-DNGRAPH_DEBUG_ENABLE=OFF \
-DNGRAPH_DYNAMIC_COMPONENTS_ENABLE=ON \
@@ -75,7 +79,7 @@ RUN make -j $(nproc) install
# Run tests via tox
WORKDIR /openvino/ngraph/python
-ENV NGRAPH_CPP_BUILD_PATH=/openvino/dist/deployment_tools/ngraph
+ENV ngraph_DIR=/openvino/dist/deployment_tools/ngraph
ENV LD_LIBRARY_PATH=/openvino/dist/deployment_tools/ngraph/lib
-ENV PYTHONPATH=/openvino/bin/intel64/Release/lib/python_api/python3.8:${PYTHONPATH}
+ENV PYTHONPATH=/openvino/bin/intel64/${BUILD_TYPE}/lib/python_api/python3.8:${PYTHONPATH}
CMD tox
diff --git a/.ci/openvino-onnx/Jenkinsfile b/.ci/openvino-onnx/Jenkinsfile
index de359f0b21d026..f157392d471892 100644
--- a/.ci/openvino-onnx/Jenkinsfile
+++ b/.ci/openvino-onnx/Jenkinsfile
@@ -1,12 +1,13 @@
// Copyright (C) 2018-2020 Intel Corporation
// SPDX-License-Identifier: Apache-2.0
-DOCKER_CONTAINER_NAME= "openvino-onnx-ci-container"
-DOCKER_IMAGE_TAG = "openvino-onnx-ci-image"
+DOCKER_CONTAINER_PREFIX= "openvino-onnx-ci-container"
+DOCKER_IMAGE_PREFIX= "openvino-onnx-ci-image"
BACKEND_CONFIGURATIONS = [
- [ protobuf_lite : "ON" ],
- [ protobuf_lite : "OFF" ]
+ [ name: "Release", build_type: "Release", protobuf_lite : "OFF" ],
+ [ name: "Debug", build_type: "Debug", protobuf_lite : "OFF" ],
+ [ name: "Rel_Lite", build_type: "Release", protobuf_lite : "ON" ],
]
// workaround for aborting previous builds on PR update
@@ -77,7 +78,7 @@ def gitSubmoduleUpdate(String repository_name) {
git submodule init && git submodule update \
--init \
--no-fetch \
- --recursive
+ --recursive
"""
}
}
@@ -89,35 +90,59 @@ def prepare_repository() {
}
}
+def get_commit_id(){
+ dir("${WORKDIR}/PROJECT_NAME") {
+ GIT_COMMIT_ID = sh(returnStdout: true, script: "git log -n 1 --pretty=format:'%h'").trim()
+ }
+}
+
def updateModels() {
sh """
./ngraph/python/tests/test_onnx/model_zoo_preprocess.sh -d ${HOME}/ONNX_CI/data -o
"""
}
-def buildDockerImage(String protobuf_lite="OFF") {
+def buildDockerImage(Map configuration) {
updateModels()
+ get_commit_id()
+ DOCKER_IMAGE_TAG="${DOCKER_IMAGE_PREFIX}_${GIT_COMMIT_ID}_${configuration.name}".toLowerCase()
sh """
- docker build --tag=${DOCKER_IMAGE_TAG} --build-arg PROTOBUF_LITE=${protobuf_lite} \
+ docker build --tag=${DOCKER_IMAGE_TAG} \
+ --build-arg BUILD_TYPE=${configuration.build_type} \
+ --build-arg PROTOBUF_LITE=${configuration.protobuf_lite} \
--file=.ci/openvino-onnx/Dockerfile \
--build-arg http_proxy=http://proxy-chain.intel.com:911/ \
--build-arg https_proxy=http://proxy-chain.intel.com:912/ .
"""
}
-def runTests() {
- sh """
- docker run --name ${DOCKER_CONTAINER_NAME} \
- --volume ${HOME}/ONNX_CI/data/model_zoo:/root/.onnx/model_zoo \
- ${DOCKER_IMAGE_TAG}
- """
+
+def runTests(Map configuration) {
+ get_commit_id()
+ DOCKER_IMAGE_TAG="${DOCKER_IMAGE_PREFIX}_${GIT_COMMIT_ID}_${configuration.name}".toLowerCase()
+ DOCKER_CONTAINER_NAME="${DOCKER_CONTAINER_PREFIX}_${GIT_COMMIT_ID}_${configuration.name}"
+ // Run only basic unit tests in Debug configuration
+ if (configuration.build_type == "Debug") {
+ sh """
+ docker run --name ${DOCKER_CONTAINER_NAME} ${DOCKER_IMAGE_TAG}
+ """
+ }
+
+ // Run unit-tests AND large model tests by default
+ else {
+ sh """
+ docker run --name ${DOCKER_CONTAINER_NAME} \
+ --volume ${HOME}/ONNX_CI/data/model_zoo:/root/.onnx/model_zoo \
+ ${DOCKER_IMAGE_TAG} /bin/bash -c "tox && tox -e zoo_models"
+ """
+ }
}
def getConfigurationsMap() {
def configurationsMap = [:]
for (backend in BACKEND_CONFIGURATIONS) {
def configuration = backend.clone()
- configuration.name = "protobuf-lite ${configuration.protobuf_lite}"
+ configuration.name = "${configuration.name}"
configurationsMap[configuration.name] = {
stage(configuration.name) { CONFIGURATION_WORKFLOW(configuration) }
}
@@ -143,12 +168,12 @@ CONFIGURATION_WORKFLOW = { configuration ->
}
stage("Prepare Docker environment") {
dir("${WORKDIR}") {
- buildDockerImage(configuration.protobuf_lite)
+ buildDockerImage(configuration)
}
}
stage("Run tests") {
timeout(time: 20, unit: 'MINUTES') {
- runTests()
+ runTests(configuration)
}
}
}
@@ -165,9 +190,11 @@ CONFIGURATION_WORKFLOW = { configuration ->
finally {
stage("Cleanup") {
deleteDir()
+ get_commit_id()
+ DOCKER_CONTAINER_NAME="${DOCKER_CONTAINER_PREFIX}_${GIT_COMMIT_ID}_${configuration.name}"
sh """
- docker image prune -f
docker rm -f ${DOCKER_CONTAINER_NAME}
+ docker image prune -f
"""
}
}
diff --git a/.github/workflows/mo.yml b/.github/workflows/mo.yml
index 7dcb851466aae3..eb2ea91484e7ca 100644
--- a/.github/workflows/mo.yml
+++ b/.github/workflows/mo.yml
@@ -80,12 +80,22 @@ jobs:
python3 setup.py sdist bdist_wheel
working-directory: model-optimizer
- - name: Test
+ - name: Test package content
+ run: |
+ echo "src = open('openvino_mo.egg-info/SOURCES.txt', 'rt').read().split()" | tee -a test_wheel.py
+ echo "ref = open('automation/package_BOM.txt', 'rt').read().split()" | tee -a test_wheel.py
+ echo "for name in ref:" | tee -a test_wheel.py
+ echo " if name.endswith('.py'):" | tee -a test_wheel.py
+ echo " assert name in src or './' + name in src, name + ' file missed'" | tee -a test_wheel.py
+ python3 test_wheel.py
+ working-directory: model-optimizer
+
+ - name: Test conversion
run: |
wget -q http://download.tensorflow.org/models/mobilenet_v1_2018_08_02/mobilenet_v1_1.0_224.tgz
tar -xf mobilenet_v1_1.0_224.tgz
python3 -m pip install model-optimizer/dist/*.whl
- python3 -c "import sys, subprocess, mo_tf; subprocess.run([sys.executable, mo_tf.__file__, '--input_model', 'mobilenet_v1_1.0_224_frozen.pb', '--input_shape', '[1,224,224,3]'], check=True)"
+ python3 -m mo --input_model mobilenet_v1_1.0_224_frozen.pb --input_shape "[1,224,224,3]"
- uses: actions/upload-artifact@v2
with:
diff --git a/README.md b/README.md
index b03a32b256b39b..7d54e9e8f9cd33 100644
--- a/README.md
+++ b/README.md
@@ -45,5 +45,4 @@ Please report questions, issues and suggestions using:
[Open Model Zoo]:https://github.com/opencv/open_model_zoo
[Inference Engine]:https://software.intel.com/en-us/articles/OpenVINO-InferEngine
[Model Optimizer]:https://software.intel.com/en-us/articles/OpenVINO-ModelOptimizer
-[tag on StackOverflow]:https://stackoverflow.com/search?q=%23openvino
[nGraph]:https://docs.openvinotoolkit.org/latest/openvino_docs_nGraph_DG_DevGuide.html
diff --git a/cmake/developer_package/clang_format/clang_format.cmake b/cmake/developer_package/clang_format/clang_format.cmake
index 1484e7038d789a..78114abeaf80aa 100644
--- a/cmake/developer_package/clang_format/clang_format.cmake
+++ b/cmake/developer_package/clang_format/clang_format.cmake
@@ -17,6 +17,9 @@ if (ENABLE_CLANG_FORMAT)
set(ENABLE_CLANG_FORMAT OFF)
endif()
endif()
+ else()
+ message(WARNING "Supported clang-format version is not found!")
+ set(ENABLE_CLANG_FORMAT OFF)
endif()
endif()
diff --git a/cmake/developer_package/cross_compile/cross_compiled_disp_gen.cmake b/cmake/developer_package/cross_compile/cross_compiled_disp_gen.cmake
index f7fe488366bc6a..610568e8bb1dd2 100644
--- a/cmake/developer_package/cross_compile/cross_compiled_disp_gen.cmake
+++ b/cmake/developer_package/cross_compile/cross_compiled_disp_gen.cmake
@@ -10,7 +10,7 @@
# XARCH_FUNC_NAME -- name of function to dispatch
# XARCH_NAMESPACES -- full namespace used to keep ODR
# XARCH_DISP_FILE -- dispatcher file name to generate
-# XARCH_SET -- set of ARCH supported by dispatcher. space delimited
+# XARCH_SET -- set of ARCH supported by dispatcher. semicolon-delimited
#
# =================================================================
@@ -24,7 +24,6 @@ function(_generate_dispatcher)
_find_signature_in_file(${XARCH_API_HEADER} ${XARCH_FUNC_NAME} SIGNATURE)
_generate_call_line_from_signature("${SIGNATURE}" CALL_LINE)
- string(REPLACE " " ";" XARCH_SET "${XARCH_SET}")
string(REPLACE "::" ";" XARCH_NAMESPACES "${XARCH_NAMESPACES}")
list(GET XARCH_NAMESPACES -1 XARCH_CURRENT_NAMESPACE)
diff --git a/cmake/developer_package/cross_compile/cross_compiled_func.cmake b/cmake/developer_package/cross_compile/cross_compiled_func.cmake
index ed969a3869f29c..7d83b4dbd4a2d3 100644
--- a/cmake/developer_package/cross_compile/cross_compiled_func.cmake
+++ b/cmake/developer_package/cross_compile/cross_compiled_func.cmake
@@ -117,17 +117,21 @@ function(_clone_source_to_target TARGET SOURCE ARCH_SET)
${CMAKE_CURRENT_SOURCE_DIR}/${SOURCE}
${CMAKE_CURRENT_BINARY_DIR}/${ARCH_SOURCE}
DEPENDS ${SOURCE}
+ VERBATIM
)
set(_ARCH_SPECIFIC_FLAGS
${_DEFINE_${_arch}}
${_FLAGS_${_arch}}
"-DXARCH=${_arch}" ## to replace XARCH with direct ARCH name
- "-I${CMAKE_CURRENT_SOURCE_DIR}/${ARCH_INCLUDE_DIR}" ## To make valid #include "some.hpp"
)
_add_source_compile_flags(${ARCH_SOURCE} ${_ARCH_SPECIFIC_FLAGS})
+ ## To make `#include "some.hpp"` valid
+ set_property(SOURCE ${ARCH_SOURCE} APPEND PROPERTY INCLUDE_DIRECTORIES
+ "${CMAKE_CURRENT_SOURCE_DIR}/${ARCH_INCLUDE_DIR}")
+
list(APPEND _ARCH_SOURCES ${ARCH_SOURCE})
endforeach()
@@ -146,25 +150,26 @@ function(_add_dispatcher_to_target TARGET HEADER FUNC_NAME NAMESPACE ARCH_SET)
set(DISPATCHER_SOURCE "cross-compiled/${DISPATCHER_NAME}_disp.cpp")
set(DISPATCHER_OPT_HOLDER "cross-compiled/${DISPATCHER_NAME}_holder.txt")
- set(_GEN_ARGS_LIST
- -DXARCH_FUNC_NAME="${X_NAME}"
- -DXARCH_NAMESPACES="${NAMESPACE}"
- -DXARCH_API_HEADER="${CMAKE_CURRENT_SOURCE_DIR}/${HEADER}"
- -DXARCH_DISP_FILE="${CMAKE_CURRENT_BINARY_DIR}/${DISPATCHER_SOURCE}"
- -DXARCH_SET="${ARCH_SET}"
- )
configure_file(${DISPATCHER_GEN_OPTIONS_HOLDER} ${DISPATCHER_OPT_HOLDER})
add_custom_command(
OUTPUT ${DISPATCHER_SOURCE}
- COMMAND ${CMAKE_COMMAND} ${_GEN_ARGS_LIST}
+ COMMAND ${CMAKE_COMMAND}
+ -D "XARCH_FUNC_NAME=${X_NAME}"
+ -D "XARCH_NAMESPACES=${NAMESPACE}"
+ -D "XARCH_API_HEADER=${CMAKE_CURRENT_SOURCE_DIR}/${HEADER}"
+ -D "XARCH_DISP_FILE=${CMAKE_CURRENT_BINARY_DIR}/${DISPATCHER_SOURCE}"
+ -D "XARCH_SET=${ARCH_SET}"
-P ${DISPATCHER_GEN_SCRIPT}
DEPENDS ${HEADER}
${DISPATCHER_GEN_SCRIPT}
${CMAKE_CURRENT_BINARY_DIR}/${DISPATCHER_OPT_HOLDER} ## Just to make run dependency on args value
+ VERBATIM
)
- _add_source_compile_flags(${DISPATCHER_SOURCE} "-I${DISPATCHER_INCLUDE_DIR}")
+ set_property(SOURCE ${DISPATCHER_SOURCE} APPEND PROPERTY INCLUDE_DIRECTORIES
+ "${CMAKE_CURRENT_SOURCE_DIR}/${DISPATCHER_INCLUDE_DIR}")
+
_add_source_to_target(${TARGET} ${DISPATCHER_SOURCE})
endfunction()
diff --git a/cmake/toolchains/ia32.linux.toolchain.cmake b/cmake/toolchains/ia32.linux.toolchain.cmake
index 675b0b3c4613ad..3c821a04f0b33f 100644
--- a/cmake/toolchains/ia32.linux.toolchain.cmake
+++ b/cmake/toolchains/ia32.linux.toolchain.cmake
@@ -22,3 +22,4 @@ _set_if_not_defined(ENABLE_VPU OFF)
# fix conversion from uint64_t / int64_t to size_t
_set_if_not_defined(NGRAPH_ONNX_IMPORT_ENABLE OFF)
+_set_if_not_defined(NGRAPH_ONNX_EDITOR_ENABLE OFF)
diff --git a/docs/IE_DG/Samples_Overview.md b/docs/IE_DG/Samples_Overview.md
index 1eeedca35b9f52..b59d5a576ae588 100644
--- a/docs/IE_DG/Samples_Overview.md
+++ b/docs/IE_DG/Samples_Overview.md
@@ -15,21 +15,25 @@ Inference Engine sample applications include the following:
- **Hello Classification Sample** – Inference of image classification networks like AlexNet and GoogLeNet using Synchronous Inference Request API. Input of any size and layout can be set to an infer request which will be pre-processed automatically during inference (the sample supports only images as inputs and supports Unicode paths).
- [Hello Classification C++ Sample](../../inference-engine/samples/hello_classification/README.md)
- [Hello Classification C Sample](../../inference-engine/ie_bridges/c/samples/hello_classification/README.md)
+ - [Hello Classification Python Sample](../../inference-engine/ie_bridges/python/sample/hello_classification/README.md)
- **Hello NV12 Input Classification Sample** – Input of any size and layout can be provided to an infer request. The sample transforms the input to the NV12 color format and pre-process it automatically during inference. The sample supports only images as inputs.
- [Hello NV12 Input Classification C++ Sample](../../inference-engine/samples/hello_nv12_input_classification/README.md)
- [Hello NV12 Input Classification C Sample](../../inference-engine/ie_bridges/c/samples/hello_nv12_input_classification/README.md)
- **Hello Query Device Sample** – Query of available Inference Engine devices and their metrics, configuration values.
- [Hello Query Device C++ Sample](../../inference-engine/samples/hello_query_device/README.md)
- [Hello Query Device Python* Sample](../../inference-engine/ie_bridges/python/sample/hello_query_device/README.md)
-- **[Hello Reshape SSD C++ Sample**](../../inference-engine/samples/hello_reshape_ssd/README.md)** – Inference of SSD networks resized by ShapeInfer API according to an input size.
+- **Hello Reshape SSD Sample** – Inference of SSD networks resized by ShapeInfer API according to an input size.
+ - [Hello Reshape SSD C++ Sample**](../../inference-engine/samples/hello_reshape_ssd/README.md)
+ - [Hello Reshape SSD Python Sample**](../../inference-engine/ie_bridges/python/sample/hello_reshape_ssd/README.md)
- **Image Classification Sample Async** – Inference of image classification networks like AlexNet and GoogLeNet using Asynchronous Inference Request API (the sample supports only images as inputs).
- [Image Classification C++ Sample Async](../../inference-engine/samples/classification_sample_async/README.md)
- [Image Classification Python* Sample Async](../../inference-engine/ie_bridges/python/sample/classification_sample_async/README.md)
-- **[Image Classification Python* Sample](../../inference-engine/ie_bridges/python/sample/hello_classification/README.md)** – Inference of image classification networks like AlexNet and GoogLeNet using Synchronous Inference Request API (the sample supports only images as inputs).
- **Neural Style Transfer Sample** – Style Transfer sample (the sample supports only images as inputs).
- [Neural Style Transfer C++ Sample](../../inference-engine/samples/style_transfer_sample/README.md)
- [Neural Style Transfer Python* Sample](../../inference-engine/ie_bridges/python/sample/style_transfer_sample/README.md)
-- **[nGraph Function Creation C++ Sample](../../inference-engine/samples/ngraph_function_creation_sample/README.md)** – Construction of the LeNet network using the nGraph function creation sample.
+- **nGraph Function Creation Sample** – Construction of the LeNet network using the nGraph function creation sample.
+ - [nGraph Function Creation C++ Sample](../../inference-engine/samples/ngraph_function_creation_sample/README.md)
+ - [nGraph Function Creation Python Sample](../../inference-engine/ie_bridges/python/sample/ngraph_function_creation_sample/README.md)
- **Object Detection for SSD Sample** – Inference of object detection networks based on the SSD, this sample is simplified version that supports only images as inputs.
- [Object Detection for SSD C++ Sample](../../inference-engine/samples/object_detection_sample_ssd/README.md)
- [Object Detection for SSD C Sample](../../inference-engine/ie_bridges/c/samples/object_detection_sample_ssd/README.md)
@@ -39,7 +43,7 @@ Inference Engine sample applications include the following:
## Media Files Available for Samples
-To run the sample applications, you can use images and videos from the media files collection available at https://github.com/intel-iot-devkit/sample-videos.
+To run the sample applications, you can use images and videos from the media files collection available at https://storage.openvinotoolkit.org/data/test_data.
## Samples that Support Pre-Trained Models
diff --git a/docs/IE_DG/supported_plugins/GNA.md b/docs/IE_DG/supported_plugins/GNA.md
index 82e168997056d2..f47297571840a4 100644
--- a/docs/IE_DG/supported_plugins/GNA.md
+++ b/docs/IE_DG/supported_plugins/GNA.md
@@ -69,7 +69,7 @@ Limitations include:
- Only 1D convolutions are natively supported.
- The number of output channels for convolutions must be a multiple of 4.
- Permute layer support is limited to the cases where no data reordering is needed or when reordering is happening for two dimensions, at least one of which is not greater than 8.
-- Concatenations and splitting are supported only along the channel dimension (axis=1).
+- Splits and concatenations are supported for continuous portions of memory (e.g., split of 1,2,3,4 to 1,1,3,4 and 1,1,3,4 or concats of 1,2,3,4 and 1,2,3,5 to 2,2,3,4).
#### Experimental Support for 2D Convolutions
diff --git a/docs/MO_DG/prepare_model/convert_model/tf_specific/Convert_WideAndDeep_Family_Models.md b/docs/MO_DG/prepare_model/convert_model/tf_specific/Convert_WideAndDeep_Family_Models.md
index 7e28a7ac0533e3..84821d6b41c87c 100644
--- a/docs/MO_DG/prepare_model/convert_model/tf_specific/Convert_WideAndDeep_Family_Models.md
+++ b/docs/MO_DG/prepare_model/convert_model/tf_specific/Convert_WideAndDeep_Family_Models.md
@@ -15,7 +15,7 @@ through Tensorflow* tf.feature_column API. Table below presents what feature typ
**Step 1**. Clone the GitHub repository with TensorFlow models and move to the directory with an example of Wide and Deep model:
```sh
-git clone https://github.com/tensorflow/models.git;
+git clone https://github.com/tensorflow/models.git --branch r2.2.0;
cd official/r1/wide_deep
```
diff --git a/docs/doxygen/ie_docs.xml b/docs/doxygen/ie_docs.xml
index 1049d4d328372f..a6f5dd3250c818 100644
--- a/docs/doxygen/ie_docs.xml
+++ b/docs/doxygen/ie_docs.xml
@@ -234,7 +234,7 @@ limitations under the License.
-
+
diff --git a/docs/how_tos/POT_how_to_example.md b/docs/how_tos/POT_how_to_example.md
new file mode 100644
index 00000000000000..571269a92ff437
--- /dev/null
+++ b/docs/how_tos/POT_how_to_example.md
@@ -0,0 +1,163 @@
+# Post-Training Optimization Tool - A real example
+
+This tutorial describes the example from the following YouTube* video:
+
+http://XXXXX
+
+Watch this video to learn the basics of Post-training Optimization Tool (POT):
+ https://www.youtube.com/watch?v=SvkI25Ca_SQ
+
+The example has been tested on OpenVINO™ 2021 on Ubuntu 18.04 Operating System.
+
+
+## 1. Installation
+
+Install OpenVINO™ toolkit and Model Optimizer, Accuracy Checker, and Post-training Optimization Tool components.
+
+1. Define the OpenVINO™ install directory:
+```
+export OV=/opt/intel/openvino_2021/
+```
+2. Install the Model Optimizer prerequisites:
+```
+cd $OV/deployment_tools/model_optimizer/install_prerequisites
+sudo ./install_prerequisites.sh
+```
+3. Install the Accuracy Checker requirements:
+```
+cd $OV/deployment_tools/open_model_zoo/tools/accuracy_checker
+sudo python3 setup.py install
+```
+4. Install the Post-training Optimization Tool:
+```
+cd $OV/deployment_tools/tools/post_training_optimization_toolkit
+sudo python3 setup.py install
+```
+
+## 2. Download Model
+
+This tutorial describes MobileNet v2 model from PyTorch* framework. You can choose any other model.
+
+Download the MobileNet v2 PyTorch* model using the commands below:
+```
+mkdir ~/POT
+```
+```
+cd ~/POT
+```
+```
+python3 $OV/deployment_tools/tools/model_downloader/downloader.py --name mobilenet-v2-pytorch -o .
+```
+
+## 3. Prepare Model for Inference
+
+Install requirements for PyTorch using the commands below:
+```
+cd $OV/deployment_tools/open_model_zoo/tools/downloader
+```
+```
+python3 -mpip install --user -r ./requirements-pytorch.in
+```
+
+You can find the parameters for Mobilnet v2 conversion here:
+```
+vi /opt/intel/openvino_2021/deployment_tools/open_model_zoo/models/public/mobilenet-v2-pytorch/model.yml
+```
+
+Convert the model from PyTorch to ONNX*:
+```
+cd ~/POT/public/mobilenet-v2-pytorch
+python3 /opt/intel/openvino_2021/deployment_tools/open_model_zoo/tools/downloader/pytorch_to_onnx.py \
+ --model-name=MobileNetV2 \
+ --model-path=. \
+ --weights=mobilenet-v2.pth \
+ --import-module=MobileNetV2 \
+ --input-shape=1,3,224,224 /
+ --output-file=mobilenet-v2.onnx \
+ --input-names=data \
+ --output-names=prob
+
+```
+Convert the model from ONNX to the OpenVINO™ Intermediate Representation (IR):
+```
+mo_onnx.py \
+ -m mobilenet-v2.onnx \
+ --input=data \
+ --mean_values=data[123.675,116.28,103.53] \
+ --scale_values=data[58.624,57.12,57.375] \
+ --reverse_input_channels \
+ --output=prob
+```
+
+Move the IR files to my directory:
+
+```
+mv mobilenet-v2.xml ~/POT/model.xml
+mv mobilenet-v2.bin ~/POT/model.bin
+```
+
+## 4. Edit Configurations
+
+Edit the configuration files:
+```
+sudo vi $OV/deployment_tools/open_model_zoo/tools/accuracy_checker/dataset_definitions.yml
+(edit imagenet_1000_classes)
+```
+```
+export DEFINITIONS_FILE=/opt/intel/openvino_2021/deployment_tools/open_model_zoo/tools/accuracy_checker/dataset_definitions.yml
+```
+
+Copy the JSON file to my directory and edit:
+
+```
+cp $OV/deployment_tools/tools/post_training_optimization_toolkit/configs/examples/quantization/classification/mobilenetV2_pytorch_int8.json ~/POT
+```
+```
+vi mobilenetV2_pytorch_int8.json
+```
+
+Copy the YML file to my directory and edit:
+
+```
+cp /opt/intel/openvino_2021/deployment_tools/open_model_zoo/tools/accuracy_checker/configs/mobilenet-v2.yml ~/POT
+```
+```
+vi mobilenet-v2.yml
+```
+
+## 5. Run Baseline
+
+Run Accuracy Checker on the original model:
+
+```
+accuracy_check -c mobilenet-v2.yml
+```
+
+Install the Benchmark Tool first. To learn more about Benchmark Tool refer to [Benchmark C++ Tool](https://docs.openvinotoolkit.org/latest/openvino_inference_engine_samples_benchmark_app_README.html)
+ or [Benchmark Python* Tool](https://docs.openvinotoolkit.org/latest/openvino_inference_engine_tools_benchmark_tool_README.html).
+
+Run performance benchmark:
+```
+~/inference_engine_cpp_samples_build/intel64/Release/benchmark_app -m ~/POT/model.xml
+```
+
+## 6. Run Integer Calibration
+
+You can edit the JSON file to switch between two modes of calibration:
+
+ - AccuracyAwareQuantization
+ - DefaultQuantization
+
+
+```
+pot --config /home/~/POT/mobilenetV2_pytorch_int8.json \
+ --output-dir /home/~/POT/ \
+ --evaluate \
+ --log-level INFO
+```
+
+Run the Benchmark Tool for the calibrated model. Make sure the name contains `DafultQuantization/.../optimized/...`
+
+```
+~/inference_engine_cpp_samples_build/intel64/Release/benchmark_app -m mobilenetv2_DefaultQuantization/2021-03-07/optimized/mobilenetv2.xml
+```
diff --git a/docs/install_guides/pypi-openvino-dev.md b/docs/install_guides/pypi-openvino-dev.md
index 9504475f6b6466..3da7e3c1088fd8 100644
--- a/docs/install_guides/pypi-openvino-dev.md
+++ b/docs/install_guides/pypi-openvino-dev.md
@@ -51,7 +51,11 @@ python -m pip install --user virtualenv
python -m venv openvino_env --system-site-packages
```
-Activate virtual environment:
+> **NOTE**: On Linux and macOS, you may need to type `python3` instead of
+`python`. You may also need to [install pip](https://pip.pypa.io/en/stable/installing/).
+
+### Step 2. Activate Virtual Environment
+
On Linux and macOS:
```sh
source openvino_env/bin/activate
@@ -61,14 +65,14 @@ On Windows:
openvino_env\Scripts\activate
```
-### Step 2. Set Up and Update pip to the Highest Version
+### Step 3. Set Up and Update pip to the Highest Version
Run the command below:
```sh
python -m pip install --upgrade pip
```
-### Step 3. Install the Package
+### Step 4. Install the Package
Run the command below:
@@ -76,13 +80,13 @@ Run the command below:
pip install openvino-dev
```
-### Step 4. Verify that the Package is Installed
+### Step 5. Verify that the Package is Installed
-Run the command below:
+Run the command below (this may take a few seconds):
```sh
-python -c "pot -h"
+pot -h
```
-
+
You will see the help message for Post-Training Optimization Tool if installation finished successfully.
## Additional Resources
@@ -90,4 +94,3 @@ You will see the help message for Post-Training Optimization Tool if installatio
- Intel® Distribution of OpenVINO™ toolkit home page: [https://software.intel.com/en-us/openvino-toolkit](https://software.intel.com/en-us/openvino-toolkit)
- OpenVINO™ toolkit online documentation: [https://docs.openvinotoolkit.org](https://docs.openvinotoolkit.org)
-
diff --git a/docs/install_guides/pypi-openvino-rt.md b/docs/install_guides/pypi-openvino-rt.md
index 1f44344b3e95f6..9b825c8651873a 100644
--- a/docs/install_guides/pypi-openvino-rt.md
+++ b/docs/install_guides/pypi-openvino-rt.md
@@ -48,7 +48,11 @@ python -m pip install --user virtualenv
python -m venv openvino_env --system-site-packages
```
-Activate virtual environment:
+> **NOTE**: On Linux and macOS, you may need to type `python3` instead of
+`python`. You may also need to [install pip](https://pip.pypa.io/en/stable/installing/).
+
+### Step 2. Activate Virtual Environment
+
On Linux and macOS:
```sh
source openvino_env/bin/activate
@@ -58,14 +62,14 @@ On Windows:
openvino_env\Scripts\activate
```
-### Step 2. Set Up and Update pip to the Highest Version
+### Step 3. Set Up and Update pip to the Highest Version
Run the command below:
```sh
python -m pip install --upgrade pip
```
-### Step 3. Install the Package
+### Step 4. Install the Package
Run the command below:
@@ -73,7 +77,7 @@ Run the command below:
pip install openvino
```
-### Step 4. Verify that the Package is Installed
+### Step 5. Verify that the Package is Installed
Run the command below:
```sh
diff --git a/docs/ops/activation/Elu_1.md b/docs/ops/activation/Elu_1.md
index 8d5d424d02312b..60a093c506fff6 100644
--- a/docs/ops/activation/Elu_1.md
+++ b/docs/ops/activation/Elu_1.md
@@ -8,29 +8,58 @@
**Detailed Description**
-For each element from the input tensor calculates corresponding
-element in the output tensor with the following formula:
+*Elu* operation is introduced in this [article](https://arxiv.org/abs/1511.07289v3).
+It performs element-wise activation function on a given input tensor, based on the following mathematical formula:
+
\f[
-elu(x) = \left\{\begin{array}{ll}
- alpha(e^{x} - 1) \quad \mbox{if } x < 0 \\
- x \quad \mbox{if } x \geq 0
+Elu(x) = \left\{\begin{array}{r}
+ x \qquad \mbox{if } x > 0 \\
+ \alpha(e^{x} - 1) \quad \mbox{if } x \leq 0
\end{array}\right.
\f]
+where α corresponds to *alpha* attribute.
+
+*Elu* is equivalent to *ReLU* operation when *alpha* is equal to zero.
+
**Attributes**
* *alpha*
* **Description**: scale for the negative factor
- * **Range of values**: arbitrary floating point number
- * **Type**: float
+ * **Range of values**: non-negative arbitrary floating-point number
+ * **Type**: `float`
* **Default value**: none
* **Required**: *yes*
**Inputs**:
-* **1**: Input tensor x of any floating point type. Required.
+* **1**: A tensor of type `T` and arbitrary shape. **Required**.
**Outputs**:
-* **1**: Result of Elu function applied to the input tensor *x*. Floating point tensor with shape and type matching the input tensor.
+* **1**: The result of element-wise *Elu* function applied to the input tensor. A tensor of type `T` and the same shape as input tensor.
+
+**Types**
+
+* *T*: arbitrary supported floating-point type.
+
+**Example**
+
+```xml
+
+
+
+
+ 1
+ 128
+
+
+
+
+```
diff --git a/docs/ops/activation/Exp_1.md b/docs/ops/activation/Exp_1.md
index b5815a271603cb..fad5c24c3bd1bd 100644
--- a/docs/ops/activation/Exp_1.md
+++ b/docs/ops/activation/Exp_1.md
@@ -6,12 +6,43 @@
**Short description**: Exponential element-wise activation function.
-**Attributes**: has no attributes
+**Detailed description**
-**Inputs**:
+*Exp* performs element-wise exponential activation function on a given input tensor. The mathematical formula is as follows:
-* **1**: Input tensor x of any floating point type. Required.
+\f[
+exp(x) = e^{x}
+\f]
-**Outputs**:
+**Attributes**: *Exp* operation has no attributes.
-* **1**: Result of Exp function applied to the input tensor *x*. Floating point tensor with shape and type matching the input tensor.
+**Inputs**
+
+* **1**: A tensor of type `T` and arbitrary shape. **Required**.
+
+**Outputs**
+
+* **1**: The result of element-wise *Exp* function applied to the input tensor. A tensor of type `T` and the same shape as input tensor.
+
+**Types**
+
+* *T*: arbitrary supported floating-point type.
+
+**Example**
+
+```xml
+
+
+
+ 1
+ 256
+
+
+
+
+```
\ No newline at end of file
diff --git a/docs/ops/activation/GELU_2.md b/docs/ops/activation/GELU_2.md
index 461defb02c9e56..9f48eba8791c06 100644
--- a/docs/ops/activation/GELU_2.md
+++ b/docs/ops/activation/GELU_2.md
@@ -2,35 +2,40 @@
**Versioned name**: *Gelu-2*
-**Category**: *Activation*
+**Category**: *Activation function*
-**Short description**: [Reference](https://pytorch.org/docs/stable/nn.functional.html#gelu)
+**Short description**: Gaussian error linear unit element-wise activation function.
-**Detailed description**: [Reference](https://arxiv.org/abs/1606.08415)
+**Detailed description**
-**Attributes**: *Gelu* operation has no attributes.
-
-**Mathematical Formulation**
-Gelu(x)=x*Φ(x), where Φ(x) is the Cumulative Distribution Function for Gaussian Distribution.
-The following equivalent combination is recognized and fused into single Gelu op:
+*Gelu* operation is introduced in this [article](https://arxiv.org/abs/1606.08415).
+It performs element-wise activation function on a given input tensor, based on the following mathematical formula:
\f[
- Gelu(x) = 0.5*x*(1.0 + erf((x) / \sqrt{2})
+ Gelu(x) = x\cdot\Phi(x) = x\cdot\frac{1}{2}\cdot\left[1 + erf\left(x/\sqrt{2}\right)\right]
\f]
-Similarly, the following Gelu approximation (typical for the TensorFlow*) is recognized and fused into single Gelu op
+where Φ(x) is the Cumulative Distribution Function for Gaussian Distribution.
+
+Additionally, *Gelu* function may be approximated as follows:
\f[
- Gelu(x) \approx 0.5x(1.0 + tanh(\sqrt{2.0/pi} * (x + 0.044715 * x ^ 3))
+ Gelu(x) \approx 0.5\cdot x\cdot \left(1 + \tanh\left[\sqrt{2/\pi} \cdot (x + 0.044715 \cdot x^3)\right]\right)
\f]
+**Attributes**: *Gelu* operation has no attributes.
+
**Inputs**:
-* **1**: Multidimensional input tensor. Required.
+* **1**: A tensor of type `T` and arbitrary shape. **Required**.
**Outputs**:
-* **1**: Floating point tensor with shape and type matching the input tensor.
+* **1**: The result of element-wise *Gelu* function applied to the input tensor. A tensor of type `T` and the same shape as input tensor.
+
+**Types**
+
+* *T*: arbitrary supported floating-point type.
**Example**
diff --git a/docs/ops/activation/Mish_4.md b/docs/ops/activation/Mish_4.md
index 8eda674f5039f4..30eebc71c643b3 100644
--- a/docs/ops/activation/Mish_4.md
+++ b/docs/ops/activation/Mish_4.md
@@ -2,35 +2,35 @@
**Versioned name**: *Mish-4*
-**Category**: *Activation*
+**Category**: *Activation function*
-**Short description**: Mish is a Self Regularized Non-Monotonic Neural Activation Function.
+**Short description**: *Mish* is a Self Regularized Non-Monotonic Neural Activation Function.
-**Detailed description**: Mish is a self regularized non-monotonic neural activation function proposed in the [article](https://arxiv.org/abs/1908.08681).
+**Detailed description**
-**Attributes**: operation has no attributes.
+*Mish* is a self regularized non-monotonic neural activation function proposed in this [article](https://arxiv.org/abs/1908.08681v2).
+
+*Mish* performs element-wise activation function on a given input tensor, based on the following mathematical formula:
+
+\f[
+Mish(x) = x\cdot\tanh\big(SoftPlus(x)\big) = x\cdot\tanh\big(\ln(1+e^{x})\big)
+\f]
+
+**Attributes**: *Mish* operation has no attributes.
**Inputs**:
-* **1**: Input tensor *x* of any floating point type T. Required.
+* **1**: A tensor of type `T` and arbitrary shape. **Required**.
**Outputs**:
-* **1**: Floating point tensor with shape and type matching the input tensor.
+* **1**: The result of element-wise *Mish* function applied to the input tensor. A tensor of type `T` and the same shape as input tensor.
**Types**
-* *T*: any floating point type.
-
-**Mathematical Formulation**
-
- For each element from the input tensor calculates corresponding
- element in the output tensor with the following formula:
-\f[
-Mish(x) = x*tanh(ln(1.0+e^{x}))
-\f]
+* *T*: arbitrary supported floating-point type.
-**Examples**
+**Example**
```xml
diff --git a/docs/ops/activation/PReLU_1.md b/docs/ops/activation/PReLU_1.md
index 74920e1306be47..ea0d4d05b2f603 100644
--- a/docs/ops/activation/PReLU_1.md
+++ b/docs/ops/activation/PReLU_1.md
@@ -2,32 +2,114 @@
**Versioned name**: *PReLU-1*
-**Category**: Activation function
+**Category**: *Activation function*
-**Short description**: *PReLU* performs element-wise parametric ReLU operation with negative slope defined by the second input.
+**Short description**: Parametric rectified linear unit element-wise activation function.
-**Attributes**: operation has no attributes.
+**Detailed description**
-**Inputs**
+*PReLU* operation is introduced in this [article](https://arxiv.org/abs/1502.01852v1).
+
+*PReLU* performs element-wise parametric *ReLU* operation on a given input tensor, based on the following mathematical formula:
+
+\f[
+PReLU(x) = \left\{\begin{array}{r}
+ x \quad \mbox{if } x \geq 0 \\
+ \alpha x \quad \mbox{if } x < 0
+\end{array}\right.
+\f]
+
+where α is a learnable parameter and corresponds to the negative slope, per channel, defined by the second input `slope`.
+
+Another mathematical representation that may be found in other references:
+
+\f[
+PReLU(x) = \max(0, x) + \alpha\cdot\min(0, x)
+\f]
-* **1**: `X` - Input tensor of any supported floating point type T1. Required.
-* **2**: `slope` - Tensor with negative slope values of type T2. The shape of the tensor should be broadcastable to input 1. Required.
+**Attributes**: *PReLU* operation has no attributes.
+
+**Inputs**
+
+* **1**: `data`. A tensor of type `T` and arbitrary shape. **Required**.
+* **2**: `slope`. 1D tensor of type `T`. Tensor with negative slope values, one per channel dimension of `data` input tensor. **Required**.
+* **Note**: Channels dimension corresponds to second dimension of `data` input tensor. If `data` rank is less than 2, the number of channels is 1.
**Outputs**
-* **1**: The result of element-wise PReLU operation applied for tensor from input 1 with slope values from input 2. A tensor of type T1 and shape matching shape of input *x* tensor.
+* **1**: The result of element-wise *PReLU* operation applied to `data` input tensor with negative slope values from `slope` input tensor. A tensor of type `T` and the same shape as `data` input tensor.
**Types**
-* *T1*: arbitrary supported floating point type.
+* *T*: arbitrary supported floating-point type.
-* *T2*: arbitrary supported floating point type.
+**Examples**
-**Detailed description**
-Before performing addition operation, input tensor 2 with slope values is broadcasted to input 1.
-The broadcasting rules are aligned with ONNX Broadcasting. Description is available in ONNX docs.
+*Example: 1D input tensor `data`*
+
+```xml
+
+
+
+ 128
+
+
+ 1
+
+
+
+
+```
+
+*Example: 2D input tensor `data`*
+
+```xml
+
+
+
+ 20
+ 128
+
+
+ 128
+
+
+
+
+```
-After broadcasting *PReLU* does the following for each input 1 element x:
+*Example: 4D input tensor `data`*
- f(x) = slope * x for x < 0; x for x >= 0
\ No newline at end of file
+```xml
+
+
+
+ 1
+ 20
+ 128
+ 128
+
+
+ 20
+
+
+
+
+```
diff --git a/docs/ops/activation/Selu_1.md b/docs/ops/activation/Selu_1.md
new file mode 100644
index 00000000000000..0af534b8f56292
--- /dev/null
+++ b/docs/ops/activation/Selu_1.md
@@ -0,0 +1,71 @@
+## Selu {#openvino_docs_ops_activation_Selu_1}
+
+**Versioned name**: *Selu-1*
+
+**Category**: *Activation function*
+
+**Short description**: *Selu* is a scaled exponential linear unit element-wise activation function.
+
+**Detailed Description**
+
+*Selu* operation is introduced in this [article](https://arxiv.org/abs/1706.02515), as activation function for self-normalizing neural networks (SNNs).
+
+*Selu* performs element-wise activation function on a given input tensor `data`, based on the following mathematical formula:
+
+\f[
+Selu(x) = \lambda \left\{\begin{array}{r}
+ x \quad \mbox{if } x > 0 \\
+ \alpha(e^{x} - 1) \quad \mbox{if } x \le 0
+\end{array}\right.
+\f]
+
+where α and λ correspond to inputs `alpha` and `lambda` respectively.
+
+Another mathematical representation that may be found in other references:
+
+\f[
+Selu(x) = \lambda\cdot\big(\max(0, x) + \min(0, \alpha(e^{x}-1))\big)
+\f]
+
+**Attributes**: *Selu* operation has no attributes.
+
+**Inputs**
+
+* **1**: `data`. A tensor of type `T` and arbitrary shape. **Required.**
+
+* **2**: `alpha`. 1D tensor with one element of type `T`. **Required.**
+
+* **3**: `lambda`. 1D tensor with one element of type `T`. **Required.**
+
+**Outputs**
+
+* **1**: The result of element-wise *Selu* function applied to `data` input tensor. A tensor of type `T` and the same shape as `data` input tensor.
+
+**Types**
+
+* *T*: arbitrary supported floating-point type.
+
+**Example**
+
+```xml
+
+
+
+ 256
+ 56
+
+
+ 1
+
+
+ 1
+
+
+
+
+```
diff --git a/docs/ops/activation/SoftPlus_4.md b/docs/ops/activation/SoftPlus_4.md
index 135c4cb9dccae4..8afc94684acb7a 100644
--- a/docs/ops/activation/SoftPlus_4.md
+++ b/docs/ops/activation/SoftPlus_4.md
@@ -2,15 +2,18 @@
**Versioned name**: *SoftPlus-4*
-**Category**: *Activation*
+**Category**: *Activation function*
-**Short description**: SoftPlus takes one input tensor and produces output tensor where the softplus function is applied to the tensor elementwise.
+**Short description**: *SoftPlus* is a rectified-based element-wise activation function.
-**Detailed description**: For each element from the input tensor calculates corresponding
-element in the output tensor with the following formula:
+**Detailed description**
+
+*SoftPlus* operation is introduced in this [article](https://citeseerx.ist.psu.edu/viewdoc/summary?doi=10.1.1.165.6419).
+
+*SoftPlus* performs element-wise activation function on a given input tensor, based on the following mathematical formula:
\f[
-SoftPlus(x) = ln(e^{x} + 1.0)
+SoftPlus(x) = \ln(1+e^{x})
\f]
**Attributes**: *SoftPlus* operation has no attributes.
@@ -18,16 +21,15 @@ SoftPlus(x) = ln(e^{x} + 1.0)
**Inputs**:
-* **1**: Multidimensional input tensor of type *T*. **Required**.
+* **1**: A tensor of type `T` and arbitrary shape. **Required**.
**Outputs**:
-* **1**: The resulting tensor of the same shape and type as input tensor.
+* **1**: The result of element-wise *SoftPlus* function applied to the input tensor. A tensor of type `T` and the same shape as input tensor.
**Types**
-* *T*: arbitrary supported floating point type.
-
+* *T*: arbitrary supported floating-point type.
**Example**
@@ -46,4 +48,4 @@ SoftPlus(x) = ln(e^{x} + 1.0)
-```
\ No newline at end of file
+```
diff --git a/docs/ops/activation/Swish_4.md b/docs/ops/activation/Swish_4.md
index 78bcb3866e7b91..1a8b7d1b51a4f9 100644
--- a/docs/ops/activation/Swish_4.md
+++ b/docs/ops/activation/Swish_4.md
@@ -2,38 +2,40 @@
**Versioned name**: *Swish-4*
-**Category**: *Activation*
+**Category**: *Activation function*
-**Short description**: Swish takes one input tensor and produces output tensor where the Swish function is applied to the tensor elementwise.
+**Short description**: *Swish* performs element-wise activation function on a given input tensor.
-**Detailed description**: For each element from the input tensor calculates corresponding
-element in the output tensor with the following formula:
+**Detailed description**
+
+*Swish* operation is introduced in this [article](https://arxiv.org/abs/1710.05941).
+It performs element-wise activation function on a given input tensor, based on the following mathematical formula:
\f[
-Swish(x) = x / (1.0 + e^{-(beta * x)})
+Swish(x) = x\cdot \sigma(\beta x) = x \left(1 + e^{-(\beta x)}\right)^{-1}
\f]
-The Swish operation is introduced in the [article](https://arxiv.org/pdf/1710.05941.pdf).
+where β corresponds to `beta` scalar input.
-**Attributes**:
+**Attributes**: *Swish* operation has no attributes.
**Inputs**:
-* **1**: Multidimensional input tensor of type *T*. **Required**.
+* **1**: `data`. A tensor of type `T` and arbitrary shape. **Required**.
-* **2**: Scalar with non-negative value of type *T*. Multiplication parameter *beta* for the sigmoid. If the input is not connected then the default value 1.0 is used. **Optional**
+* **2**: `beta`. A non-negative scalar value of type `T`. Multiplication parameter for the sigmoid. Default value 1.0 is used. **Optional**.
**Outputs**:
-* **1**: The resulting tensor of the same shape and type as input tensor.
+* **1**: The result of element-wise *Swish* function applied to the input tensor `data`. A tensor of type `T` and the same shape as `data` input tensor.
**Types**
-* *T*: arbitrary supported floating point type.
-
+* *T*: arbitrary supported floating-point type.
-**Example**
+**Examples**
+*Example: Second input `beta` provided*
```xml
@@ -41,13 +43,30 @@ The Swish operation is introduced in the [article](https://arxiv.org/pdf/1710.05
25656
-
+
+
-```
\ No newline at end of file
+```
+
+*Example: Second input `beta` not provided*
+```xml
+
+
+
+ 128
+
+
+
+
+```
diff --git a/docs/ops/arithmetic/FloorMod_1.md b/docs/ops/arithmetic/FloorMod_1.md
index 26986df0f3c3dc..042ffb7f428696 100644
--- a/docs/ops/arithmetic/FloorMod_1.md
+++ b/docs/ops/arithmetic/FloorMod_1.md
@@ -13,7 +13,7 @@ As a first step input tensors *a* and *b* are broadcasted if their shapes differ
o_{i} = a_{i} % b_{i}
\f]
-*FloorMod* operation computes a reminder of a floored division. It is the same behaviour like in Python programming language: `floor(x / y) * y + floor_mod(x, y) = x`. The sign of the result is equal to a sign of a dividend. The result of division by zero is undefined.
+*FloorMod* operation computes a reminder of a floored division. It is the same behaviour like in Python programming language: `floor(x / y) * y + floor_mod(x, y) = x`. The sign of the result is equal to a sign of a divisor. The result of division by zero is undefined.
**Attributes**:
diff --git a/docs/ops/arithmetic/Selu_1.md b/docs/ops/arithmetic/Selu_1.md
deleted file mode 100644
index 8d69d13fbf2e37..00000000000000
--- a/docs/ops/arithmetic/Selu_1.md
+++ /dev/null
@@ -1,65 +0,0 @@
-## Selu {#openvino_docs_ops_arithmetic_Selu_1}
-
-**Versioned name**: *Selu-1*
-
-**Category**: Arithmetic unary operation
-
-**Short description**: *Selu* calculates the SELU activation function (https://arxiv.org/abs/1706.02515) element-wise with given tensor.
-
-**Detailed Description**
-
-For each element from the input tensor calculates corresponding
-element in the output tensor with the following formula:
-\f[
-selu(x) = \lambda \left\{\begin{array}{ll}
- \alpha(e^{x} - 1) \quad \mbox{if } x \le 0 \\
- x \quad \mbox{if } x > 0
-\end{array}\right.
-\f]
-
-**Attributes**:
-
- No attributes available.
-
-**Inputs**
-
-* **1**: An tensor of type T. **Required.**
-
-* **2**: `alpha` 1D tensor with one element of type T. **Required.**
-
-* **3**: `lambda` 1D tensor with one element of type T. **Required.**
-
-**Outputs**
-
-* **1**: The result of element-wise operation. A tensor of type T.
-
-**Types**
-
-* *T*: any supported floating point type.
-
-**Examples**
-
-*Example 1*
-
-```xml
-
-
-
- 256
- 56
-
-
- 1
-
-
- 1
-
-
-
-
-```
\ No newline at end of file
diff --git a/docs/ops/convolution/BinaryConvolution_1.md b/docs/ops/convolution/BinaryConvolution_1.md
index 6ab2458035c1c8..314e9aad029258 100644
--- a/docs/ops/convolution/BinaryConvolution_1.md
+++ b/docs/ops/convolution/BinaryConvolution_1.md
@@ -82,17 +82,17 @@ Computation algorithm for mode *xnor-popcount*:
**Inputs**:
-* **1**: Input tensor of type *T1* and rank 4. Layout is NCYX (number of batches, number of channels, spatial axes Y, X). Required.
-* **2**: Kernel tensor of type *T2* and rank 4. Layout is OIYX (number of output channels, number of input channels, spatial axes Y, X). Required.
+* **1**: Input tensor of type *T1* and rank 4. Layout is `[N, C_IN, Y, X]` (number of batches, number of channels, spatial axes Y, X). Required.
+* **2**: Kernel tensor of type *T2* and rank 4. Layout is `[C_OUT, C_IN, Y, X]` (number of output channels, number of input channels, spatial axes Y, X). Required.
* **Note**: Interpretation of tensor values is defined by *mode* attribute.
**Outputs**:
-* **1**: Output tensor of type *T3* and rank 4. Layout is NOYX (number of batches, number of kernel output channels, spatial axes Y, X).
+* **1**: Output tensor of type *T3* and rank 4. Layout is `[N, C_OUT, Y, X]` (number of batches, number of kernel output channels, spatial axes Y, X).
**Types**:
-* *T1*: floating point type with values `0` or `1`.
+* *T1*: any numeric type with values `0` or `1`.
* *T2*: `u1` type with binary values `0` or `1`.
* *T3*: *T1* type with full range of values.
diff --git a/docs/ops/convolution/Convolution_1.md b/docs/ops/convolution/Convolution_1.md
index ffdbbc508618a4..8e50b3ffada509 100644
--- a/docs/ops/convolution/Convolution_1.md
+++ b/docs/ops/convolution/Convolution_1.md
@@ -37,7 +37,7 @@ The receptive field in each layer is calculated using the formulas:
* **Description**: *strides* is a distance (in pixels) to slide the filter on the feature map over the `(z, y, x)` axes for 3D convolutions and `(y, x)` axes for 2D convolutions. For example, *strides* equal `4,2,1` means sliding the filter 4 pixel at a time over depth dimension, 2 over height dimension and 1 over width dimension.
* **Range of values**: integer values starting from 0
- * **Type**: int[]
+ * **Type**: `int[]`
* **Default value**: None
* **Required**: *yes*
@@ -45,7 +45,7 @@ The receptive field in each layer is calculated using the formulas:
* **Description**: *pads_begin* is a number of pixels to add to the beginning along each axis. For example, *pads_begin* equal `1,2` means adding 1 pixel to the top of the input and 2 to the left of the input.
* **Range of values**: integer values starting from 0
- * **Type**: int[]
+ * **Type**: `int[]`
* **Default value**: None
* **Required**: *yes*
* **Note**: the attribute is ignored when *auto_pad* attribute is specified.
@@ -54,7 +54,7 @@ The receptive field in each layer is calculated using the formulas:
* **Description**: *pads_end* is a number of pixels to add to the ending along each axis. For example, *pads_end* equal `1,2` means adding 1 pixel to the bottom of the input and 2 to the right of the input.
* **Range of values**: integer values starting from 0
- * **Type**: int[]
+ * **Type**: `int[]`
* **Default value**: None
* **Required**: *yes*
* **Note**: the attribute is ignored when *auto_pad* attribute is specified.
@@ -63,7 +63,7 @@ The receptive field in each layer is calculated using the formulas:
* **Description**: *dilations* denotes the distance in width and height between elements (weights) in the filter. For example, *dilation* equal `1,1` means that all the elements in the filter are neighbors, so it is the same as for the usual convolution. *dilation* equal `2,2` means that all the elements in the filter are matched not to adjacent elements in the input matrix, but to those that are adjacent with distance 1.
* **Range of values**: integer value starting from 0
- * **Type**: int[]
+ * **Type**: `int[]`
* **Default value**: None
* **Required**: *yes*
@@ -74,15 +74,15 @@ The receptive field in each layer is calculated using the formulas:
* *same_upper* - the input is padded to match the output size. In case of odd padding value an extra padding is added at the end.
* *same_lower* - the input is padded to match the output size. In case of odd padding value an extra padding is added at the beginning.
* *valid* - do not use padding.
- * **Type**: string
+ * **Type**: `string`
* **Default value**: explicit
* **Required**: *no*
* **Note**: *pads_begin* and *pads_end* attributes are ignored when *auto_pad* is specified.
**Inputs**:
-* **1**: Input tensor of type *T* and rank 3, 4 or 5. Layout is NCZYX (number of batches, number of channels, spatial axes Z, Y, X). Required.
-* **2**: Kernel tensor of type *T* and rank 3, 4 or 5. Layout is OIZYX (number of output channels, number of input channels, spatial axes Z, Y, X). Required.
+* **1**: Input tensor of type *T* and rank 3, 4 or 5. Layout is `[N, C_IN, Z, Y, X]` (number of batches, number of channels, spatial axes Z, Y, X). Required.
+* **2**: Kernel tensor of type *T* and rank 3, 4 or 5. Layout is `[C_OUT, C_IN, Z, Y, X]` (number of output channels, number of input channels, spatial axes Z, Y, X). Required.
* **Note**: Type of the convolution (1D, 2D or 3D) is derived from the rank of the input tensors and not specified by any attribute:
* 1D convolution (input tensors rank 3) means that there is only one spatial axis X
* 2D convolution (input tensors rank 4) means that there are two spatial axes Y, X
@@ -90,11 +90,11 @@ The receptive field in each layer is calculated using the formulas:
**Outputs**:
-* **1**: Output tensor of type *T* and rank 3, 4 or 5. Layout is NOZYX (number of batches, number of kernel output channels, spatial axes Z, Y, X).
+* **1**: Output tensor of type *T* and rank 3, 4 or 5. Layout is `[N, C_OUT, Z, Y, X]` (number of batches, number of kernel output channels, spatial axes Z, Y, X).
**Types**:
-* *T*: any floating point type.
+* *T*: any numeric type.
**Example**:
diff --git a/docs/ops/convolution/DeformableConvolution_1.md b/docs/ops/convolution/DeformableConvolution_1.md
index 446d6fd07bb8ac..2cba8d84039fe9 100644
--- a/docs/ops/convolution/DeformableConvolution_1.md
+++ b/docs/ops/convolution/DeformableConvolution_1.md
@@ -88,7 +88,7 @@
**Types**:
-* *T*: Any floating point type.
+* *T*: Any numeric type.
**Example**
diff --git a/docs/ops/convolution/GroupConvolutionBackpropData_1.md b/docs/ops/convolution/GroupConvolutionBackpropData_1.md
index 9c041a891499b1..2d76aa905ea25d 100644
--- a/docs/ops/convolution/GroupConvolutionBackpropData_1.md
+++ b/docs/ops/convolution/GroupConvolutionBackpropData_1.md
@@ -66,9 +66,9 @@
**Inputs**:
-* **1**: Input tensor of type `T1` and rank 3, 4 or 5. Layout is `NCZYX` (number of batches, number of channels, spatial axes Z, Y, X). Required.
+* **1**: Input tensor of type `T1` and rank 3, 4 or 5. Layout is `[N, GROUPS * C_IN, Z, Y, X]` (number of batches, number of channels, spatial axes Z, Y, X). Required.
-* **2**: Kernel tensor of type `T1` and rank 4, 5 or 6. Layout is `GOIZYX` (number of groups, number of output channels, number of input channels, spatial axes Z, Y, X). Required.
+* **2**: Kernel tensor of type `T1` and rank 4, 5 or 6. Layout is `[GROUPS, C_IN, C_OUT, X, Y, Z]` (number of groups, number of input channels, number of output channels, spatial axes X, Y, Z). Required.
* **3**: Output shape tensor of type `T2` and rank 1. It specifies spatial shape of the output. Optional.
* **Note** Number of groups is derived from the shape of the kernel and not specified by any attribute.
@@ -79,11 +79,11 @@
**Outputs**:
-* **1**: Output tensor of type `T1` and rank 3, 4 or 5 (the same as input *1*). Layout is `NOZYX` (number of batches, number of kernel output channels, spatial axes Z, Y, X).
+* **1**: Output tensor of type `T1` and rank 3, 4 or 5 (the same as input *1*). Layout is `[N, GROUPS * C_OUT, Z, Y, X]` (number of batches, number of kernel output channels, spatial axes Z, Y, X).
**Types**:
-* *T1*: any floating point type.
+* *T1*: any numeric type.
* *T2*: any integer type.
**Example**
diff --git a/docs/ops/convolution/GroupConvolution_1.md b/docs/ops/convolution/GroupConvolution_1.md
index 33a34c6fa2ed4b..49d6d9c05a84fb 100644
--- a/docs/ops/convolution/GroupConvolution_1.md
+++ b/docs/ops/convolution/GroupConvolution_1.md
@@ -15,7 +15,7 @@ Neural Networks](https://proceedings.neurips.cc/paper/2012/file/c399862d3b9d6b76
* **Description**: *strides* is a distance (in pixels) to slide the filter on the feature map over the `(z, y, x)` axes for 3D convolutions and `(y, x)` axes for 2D convolutions. For example, *strides* equal `4,2,1` means sliding the filter 4 pixel at a time over depth dimension, 2 over height dimension and 1 over width dimension.
* **Range of values**: positive integer numbers
- * **Type**: int[]
+ * **Type**: `int[]`
* **Default value**: None
* **Required**: *yes*
@@ -23,7 +23,7 @@ Neural Networks](https://proceedings.neurips.cc/paper/2012/file/c399862d3b9d6b76
* **Description**: *pads_begin* is a number of pixels to add to the beginning along each axis. For example, *pads_begin* equal `1,2` means adding 1 pixel to the top of the input and 2 to the left of the input.
* **Range of values**: positive integer numbers
- * **Type**: int[]
+ * **Type**: `int[]`
* **Default value**: None
* **Required**: *yes*
* **Note**: the attribute is ignored when *auto_pad* attribute is specified.
@@ -32,7 +32,7 @@ Neural Networks](https://proceedings.neurips.cc/paper/2012/file/c399862d3b9d6b76
* **Description**: *pads_end* is a number of pixels to add to the ending along each axis. For example, *pads_end* equal `1,2` means adding 1 pixel to the bottom of the input and 2 to the right of the input.
* **Range of values**: positive integer numbers
- * **Type**: int[]
+ * **Type**: `int[]`
* **Default value**: None
* **Required**: *yes*
* **Note**: the attribute is ignored when *auto_pad* attribute is specified.
@@ -41,7 +41,7 @@ Neural Networks](https://proceedings.neurips.cc/paper/2012/file/c399862d3b9d6b76
* **Description**: *dilations* denotes the distance in width and height between elements (weights) in the filter. For example, *dilation* equal `1,1` means that all the elements in the filter are neighbors, so it is the same as for the usual convolution. *dilation* equal `2,2` means that all the elements in the filter are matched not to adjacent elements in the input matrix, but to those that are adjacent with distance 1.
* **Range of values**: positive integer numbers
- * **Type**: int[]
+ * **Type**: `int[]`
* **Default value**: None
* **Required**: *yes*
@@ -52,15 +52,15 @@ Neural Networks](https://proceedings.neurips.cc/paper/2012/file/c399862d3b9d6b76
* *same_upper* - the input is padded to match the output size. In case of odd padding value an extra padding is added at the end.
* *same_lower* - the input is padded to match the output size. In case of odd padding value an extra padding is added at the beginning.
* *valid* - do not use padding.
- * **Type**: string
+ * **Type**: `string`
* **Default value**: explicit
* **Required**: *no*
* **Note**: *pads_begin* and *pads_end* attributes are ignored when *auto_pad* is specified.
**Inputs**:
-* **1**: Input tensor of type *T* and rank 3, 4 or 5. Layout is NCZYX (number of batches, number of channels, spatial axes Z, Y, X). Required.
-* **2**: Convolution kernel tensor of type *T* and rank 4, 5 or 6. Layout is GOIZYX (number of groups, number of output channels, number of input channels, spatial axes Z, Y, X),
+* **1**: Input tensor of type *T* and rank 3, 4 or 5. Layout is `[N, GROUPS * C_IN, Z, Y, X]` (number of batches, number of channels, spatial axes Z, Y, X). Required.
+* **2**: Convolution kernel tensor of type *T* and rank 4, 5 or 6. Layout is `[GROUPS, C_OUT, C_IN, Z, Y, X]` (number of groups, number of output channels, number of input channels, spatial axes Z, Y, X),
* **Note** Number of groups is derived from the shape of the kernel and not specified by any attribute.
* **Note**: Type of the convolution (1D, 2D or 3D) is derived from the rank of the input tensors and not specified by any attribute:
* 1D convolution (input tensors rank 3) means that there is only one spatial axis X
@@ -69,11 +69,11 @@ Neural Networks](https://proceedings.neurips.cc/paper/2012/file/c399862d3b9d6b76
**Outputs**:
-* **1**: Output tensor of type *T* and rank 3, 4 or 5. Layout is NOZYX (number of batches, number of kernel output channels, spatial axes Z, Y, X).
+* **1**: Output tensor of type *T* and rank 3, 4 or 5. Layout is `[N, GROUPS * C_OUT, Z, Y, X]` (number of batches, number of output channels, spatial axes Z, Y, X).
**Types**:
-* *T*: any floating point type.
+* *T*: any numeric type.
**Example**:
1D GroupConvolution
diff --git a/docs/ops/detection/ExperimentalDetectronDetectionOutput_6.md b/docs/ops/detection/ExperimentalDetectronDetectionOutput_6.md
index 4ef48bb0cce8a6..69411e3f31f63b 100644
--- a/docs/ops/detection/ExperimentalDetectronDetectionOutput_6.md
+++ b/docs/ops/detection/ExperimentalDetectronDetectionOutput_6.md
@@ -4,13 +4,13 @@
**Category**: Object detection
-**Short description**: An operation *ExperimentalDetectronDetectionOutput* performs non-maximum suppression to generate
+**Short description**: The *ExperimentalDetectronDetectionOutput* operation performs non-maximum suppression to generate
the detection output using information on location and score predictions.
-**Detailed description**: Operation doing next steps:
+**Detailed description**: The operation performs the following steps:
1. Applies deltas to boxes sizes [x1, y1, x2, y2] and takes coordinates of
-refined boxes according to formulas:
+refined boxes according to the formulas:
`x1_new = ctr_x + (dx - 0.5 * exp(min(d_log_w, max_delta_log_wh))) * box_w`
@@ -20,20 +20,20 @@ refined boxes according to formulas:
`y1_new = ctr_y + (dy + 0.5 * exp(min(d_log_h, max_delta_log_wh))) * box_h - 1.0`
-* `box_w` and `box_h` are width and height of box:
+* `box_w` and `box_h` are width and height of box, respectively:
`box_w = x1 - x0 + 1.0`
`box_h = y1 - y0 + 1.0`
-* `ctr_x` and `ctr_y` are center location of box:
+* `ctr_x` and `ctr_y` are center location of a box:
`ctr_x = x0 + 0.5f * box_w`
`ctr_y = y0 + 0.5f * box_h`
-* `dx`, `dy`, `d_log_w` and `d_log_h` are deltas calculated according to next formulas and `deltas_tensor` is second
-input:
+* `dx`, `dy`, `d_log_w` and `d_log_h` are deltas calculated according to the formulas below, and `deltas_tensor` is a
+second input:
`dx = deltas_tensor[roi_idx, 4 * class_idx + 0] / deltas_weights[0]`
@@ -43,21 +43,21 @@ input:
`d_log_h = deltas_tensor[roi_idx, 4 * class_idx + 3] / deltas_weights[3]`
-2. If *class_agnostic_box_regression* is `true` then operation removes predictions for background classes;
-3. Clips boxes to image;
-4. Applies *score_threshold* on detection scores;
+2. If *class_agnostic_box_regression* is `true` removes predictions for background classes.
+3. Clips boxes to the image.
+4. Applies *score_threshold* on detection scores.
5. Applies non-maximum suppression class-wise with *nms_threshold* and returns *post_nms_count* or less detections per
-class;
-6. Operation returns *max_detections_per_image* detections if total number of detections is more than it, otherwise
-returns total number of detections and the output tensor is filled with undefined values for rest output tensor
-elements.
+class.
+6. Returns *max_detections_per_image* detections if total number of detections is more than *max_detections_per_image*;
+otherwise, returns total number of detections and the output tensor is filled with undefined values for rest output
+tensor elements.
**Attributes**:
* *score_threshold*
- * **Description**: *score_threshold* attribute specifies threshold to consider only detections whose score are
- larger than a threshold.
+ * **Description**: The *score_threshold* attribute specifies a threshold to consider only detections whose score are
+ larger than the threshold.
* **Range of values**: non-negative floating point number
* **Type**: float
* **Default value**: None
@@ -65,7 +65,7 @@ elements.
* *nms_threshold*
- * **Description**: *nms_threshold* attribute specifies threshold to be used in the NMS stage.
+ * **Description**: The *nms_threshold* attribute specifies a threshold to be used in the NMS stage.
* **Range of values**: non-negative floating point number
* **Type**: float
* **Default value**: None
@@ -73,7 +73,7 @@ elements.
* *num_classes*
- * **Description**: *num_classes* attribute specifies number of detected classes.
+ * **Description**: The *num_classes* attribute specifies the number of detected classes.
* **Range of values**: non-negative integer number
* **Type**: int
* **Default value**: None
@@ -81,7 +81,7 @@ elements.
* *post_nms_count*
- * **Description**: *post_nms_count* attribute specifies the maximal number of detections per class.
+ * **Description**: The *post_nms_count* attribute specifies the maximal number of detections per class.
* **Range of values**: non-negative integer number
* **Type**: int
* **Default value**: None
@@ -89,7 +89,7 @@ elements.
* *max_detections_per_image*
- * **Description**: *max_detections_per_image* attribute specifies maximal number of detections per image.
+ * **Description**: The *max_detections_per_image* attribute specifies maximal number of detections per image.
* **Range of values**: non-negative integer number
* **Type**: int
* **Default value**: None
@@ -101,14 +101,14 @@ elements.
classes or not.
* **Range of values**:
* `true` means background classes should be deleted
- * `false` means background classes shouldn't be deleted
+ * `false` means background classes should not be deleted
* **Type**: boolean
* **Default value**: false
* **Required**: *no*
* *max_delta_log_wh*
- * **Description**: *max_delta_log_wh* attribute specifies maximal delta of logarithms for width and height.
+ * **Description**: The *max_delta_log_wh* attribute specifies maximal delta of logarithms for width and height.
* **Range of values**: floating point number
* **Type**: float
* **Default value**: None
@@ -116,7 +116,7 @@ elements.
* *deltas_weights*
- * **Description**: *deltas_weights* attribute specifies weights for bounding boxes sizes deltas.
+ * **Description**: The *deltas_weights* attribute specifies weights for bounding boxes sizes deltas.
* **Range of values**: a list of non-negative floating point numbers
* **Type**: float[]
* **Default value**: None
@@ -124,27 +124,25 @@ elements.
**Inputs**
-* **1**: A 2D tensor of type *T* with input ROIs, with shape `[number_of_ROIs, 4]` describing the ROIs as 4-tuples:
-[x1, y1, x2, y2]. The batch dimension of first, second and third inputs
+* **1**: A 2D tensor of type *T* with input ROIs, with shape `[number_of_ROIs, 4]` providing the ROIs as 4-tuples:
+[x1, y1, x2, y2]. The batch dimension of first, second, and third inputs
should be the same. **Required.**
-* **2**: A 2D tensor of type *T* with shape `[number_of_ROIs, num_classes * 4]` describing deltas for input boxes.
+* **2**: A 2D tensor of type *T* with shape `[number_of_ROIs, num_classes * 4]` providing deltas for input boxes.
**Required.**
-* **3**: A 2D tensor of type *T* with shape `[number_of_ROIs, num_classes]` describing detections scores. **Required.**
+* **3**: A 2D tensor of type *T* with shape `[number_of_ROIs, num_classes]` providing detections scores. **Required.**
-* **4**: A 2D tensor of type *T* with shape `[1, 3]` contains 3 elements
- `[image_height, image_width, scale_height_and_width]` describing input image size info. **Required.**
+* **4**: A 2D tensor of type *T* with shape `[1, 3]` contains three elements
+ `[image_height, image_width, scale_height_and_width]` providing input image size info. **Required.**
**Outputs**
-* **1**: A 2D tensor of type *T* with shape `[max_detections_per_image, 4]` describing boxes indices.
+* **1**: A 2D tensor of type *T* with shape `[max_detections_per_image, 4]` providing boxes indices.
-* **2**: A 1D tensor of type *T_IND* with shape `[max_detections_per_image]` describing classes indices.
+* **2**: A 1D tensor of type *T_IND* with shape `[max_detections_per_image]` providing classes indices.
-* **3**: A 1D tensor of type *T* with shape `[max_detections_per_image]` describing scores indices.
-
-* **4**: A 1D tensor of type *T_IND* with shape `[max_detections_per_image]` describing batches indices.
+* **3**: A 1D tensor of type *T* with shape `[max_detections_per_image]` providing scores indices.
**Types**
diff --git a/docs/ops/detection/ExperimentalDetectronGenerateProposalsSingleImage_6.md b/docs/ops/detection/ExperimentalDetectronGenerateProposalsSingleImage_6.md
index 7f8726f20d3ff8..ce1513ed2bb8d5 100644
--- a/docs/ops/detection/ExperimentalDetectronGenerateProposalsSingleImage_6.md
+++ b/docs/ops/detection/ExperimentalDetectronGenerateProposalsSingleImage_6.md
@@ -4,26 +4,25 @@
**Category**: Object detection
-**Short description**: An operation *ExperimentalDetectronGenerateProposalsSingleImage* computes ROIs and their scores
+**Short description**: The *ExperimentalDetectronGenerateProposalsSingleImage* operation computes ROIs and their scores
based on input data.
-**Detailed description**: Operation doing next steps:
+**Detailed description**: The operation performs the following steps:
-1. Transposes and reshape predicted bounding boxes deltas and scores to get them into the same order as the anchors;
-2. Transforms anchors into proposals using deltas and clips proposals to image;
-3. Removes predicted boxes with either height or width < *min_size*;
-4. Sorts all `(proposal, score)` pairs by score from highest to lowest, order of pairs with equal scores is undefined;
-5. Takes top *pre_nms_count* proposals, if total number of proposals is less than *pre_nms_count* then operation takes
-all proposals;
-6. Applies non-maximum suppression with *nms_threshold*;
-7. Takes top *post_nms_count* proposals and return these top proposals and their scores. If total number of proposals
-is less than *post_nms_count* then operation returns output tensors filled by zeroes.
+1. Transposes and reshapes predicted bounding boxes deltas and scores to get them into the same order as the anchors.
+2. Transforms anchors into proposals using deltas and clips proposals to an image.
+3. Removes predicted boxes with either height or width < *min_size*.
+4. Sorts all `(proposal, score)` pairs by score from highest to lowest; order of pairs with equal scores is undefined.
+5. Takes top *pre_nms_count* proposals, if total number of proposals is less than *pre_nms_count* takes all proposals.
+6. Applies non-maximum suppression with *nms_threshold*.
+7. Takes top *post_nms_count* proposals and returns these top proposals and their scores. If total number of proposals
+is less than *post_nms_count* returns output tensors filled with zeroes.
**Attributes**:
* *min_size*
- * **Description**: *min_size* attribute specifies minimum box width and height.
+ * **Description**: The *min_size* attribute specifies minimum box width and height.
* **Range of values**: non-negative floating point number
* **Type**: float
* **Default value**: None
@@ -31,7 +30,7 @@ is less than *post_nms_count* then operation returns output tensors filled by ze
* *nms_threshold*
- * **Description**: *nms_threshold* attribute specifies threshold to be used in the NMS stage.
+ * **Description**: The *nms_threshold* attribute specifies threshold to be used in the NMS stage.
* **Range of values**: non-negative floating point number
* **Type**: float
* **Default value**: None
@@ -39,7 +38,7 @@ is less than *post_nms_count* then operation returns output tensors filled by ze
* *pre_nms_count*
- * **Description**: *pre_nms_count* attribute specifies number of top-n proposals before NMS.
+ * **Description**: The *pre_nms_count* attribute specifies number of top-n proposals before NMS.
* **Range of values**: non-negative integer number
* **Type**: int
* **Default value**: None
@@ -47,7 +46,7 @@ is less than *post_nms_count* then operation returns output tensors filled by ze
* *post_nms_count*
- * **Description**: *post_nms_count* attribute specifies number of top-n proposals after NMS.
+ * **Description**: The *post_nms_count* attribute specifies number of top-n proposals after NMS.
* **Range of values**: non-negative integer number
* **Type**: int
* **Default value**: None
@@ -55,22 +54,22 @@ is less than *post_nms_count* then operation returns output tensors filled by ze
**Inputs**
-* **1**: A 1D tensor of type *T* with 3 elements `[image_height, image_width, scale_height_and_width]` describing input
+* **1**: A 1D tensor of type *T* with 3 elements `[image_height, image_width, scale_height_and_width]` providing input
image size info. **Required.**
-* **2**: A 2D tensor of type *T* with shape `[height * width * number_of_channels, 4]` describing anchors. **Required.**
+* **2**: A 2D tensor of type *T* with shape `[height * width * number_of_channels, 4]` providing anchors. **Required.**
-* **3**: A 3D tensor of type *T* with shape `[number_of_channels * 4, height, width]` describing deltas for anchors.
+* **3**: A 3D tensor of type *T* with shape `[number_of_channels * 4, height, width]` providing deltas for anchors.
Height and width for third and fourth inputs should be equal. **Required.**
-* **4**: A 3D tensor of type *T* with shape `[number_of_channels, height, width]` describing proposals scores.
+* **4**: A 3D tensor of type *T* with shape `[number_of_channels, height, width]` providing proposals scores.
**Required.**
**Outputs**
-* **1**: A 2D tensor of type *T* with shape `[post_nms_count, 4]` describing ROIs.
+* **1**: A 2D tensor of type *T* with shape `[post_nms_count, 4]` providing ROIs.
-* **2**: A 1D tensor of type *T* with shape `[post_nms_count]` describing ROIs scores.
+* **2**: A 1D tensor of type *T* with shape `[post_nms_count]` providing ROIs scores.
**Types**
diff --git a/docs/ops/detection/ExperimentalDetectronPriorGridGenerator_6.md b/docs/ops/detection/ExperimentalDetectronPriorGridGenerator_6.md
index 5a474c7efc782e..bcf09c46c890ea 100644
--- a/docs/ops/detection/ExperimentalDetectronPriorGridGenerator_6.md
+++ b/docs/ops/detection/ExperimentalDetectronPriorGridGenerator_6.md
@@ -4,34 +4,33 @@
**Category**: Object detection
-**Short description**: An operation *ExperimentalDetectronPriorGridGenerator* generates prior grids of
-specified sizes.
+**Short description**: The *ExperimentalDetectronPriorGridGenerator* operation generates prior grids of specified sizes.
-**Detailed description**: Operation takes coordinates of centres of boxes and add strides with offset `0.5` to them to
+**Detailed description**: The operation takes coordinates of centres of boxes and adds strides with offset `0.5` to them to
calculate coordinates of prior grids.
-Numbers of generated cells is `featmap_height` and `featmap_width` if *h* and *w* are zeroes, otherwise *h* and *w*
+Numbers of generated cells is `featmap_height` and `featmap_width` if *h* and *w* are zeroes; otherwise, *h* and *w*,
respectively. Steps of generated grid are `image_height` / `layer_height` and `image_width` / `layer_width` if
-*stride_h* and *stride_w* are zeroes, otherwise *stride_h* and *stride_w* respectively.
+*stride_h* and *stride_w* are zeroes; otherwise, *stride_h* and *stride_w*, respectively.
`featmap_height`, `featmap_width`, `image_height` and `image_width` are spatial dimensions values from second and third
-inputs respectively.
+inputs, respectively.
**Attributes**:
* *flatten*
- * **Description**: *flatten* attribute specifies whether the output tensor should be 2D or 4D.
+ * **Description**: The *flatten* attribute specifies whether the output tensor should be 2D or 4D.
* **Range of values**:
- * `true` - the output tensor should be 2D tensor
- * `false` - the output tensor should be 4D tensor
+ * `true` - the output tensor should be a 2D tensor
+ * `false` - the output tensor should be a 4D tensor
* **Type**: boolean
* **Default value**: true
* **Required**: *no*
* *h*
- * **Description**: *h* attribute specifies number of cells of the generated grid with respect to height.
+ * **Description**: The *h* attribute specifies number of cells of the generated grid with respect to height.
* **Range of values**: non-negative integer number less or equal than `featmap_height`
* **Type**: int
* **Default value**: 0
@@ -39,7 +38,7 @@ inputs respectively.
* *w*
- * **Description**: *w* attribute specifies number of cells of the generated grid with respect to width.
+ * **Description**: The *w* attribute specifies number of cells of the generated grid with respect to width.
* **Range of values**: non-negative integer number less or equal than `featmap_width`
* **Type**: int
* **Default value**: 0
@@ -47,7 +46,7 @@ inputs respectively.
* *stride_x*
- * **Description**: *stride_x* attribute specifies the step of generated grid with respect to x coordinate.
+ * **Description**: The *stride_x* attribute specifies the step of generated grid with respect to x coordinate.
* **Range of values**: non-negative float number
* **Type**: float
* **Default value**: 0.0
@@ -55,7 +54,7 @@ inputs respectively.
* *stride_y*
- * **Description**: *stride_y* attribute specifies the step of generated grid with respect to y coordinate.
+ * **Description**: The *stride_y* attribute specifies the step of generated grid with respect to y coordinate.
* **Range of values**: non-negative float number
* **Type**: float
* **Default value**: 0.0
@@ -75,8 +74,8 @@ not its data. **Required.**
**Outputs**
* **1**: A tensor of type *T* with priors grid with shape `[featmap_height * featmap_width * number_of_priors, 4]`
-if flatten is `true` or `[featmap_height, featmap_width, number_of_priors, 4]` otherwise.
-In case then 0 < *h* < `featmap_height` and/or 0 < *w* < `featmap_width` the output data size is less than
+if flatten is `true` or `[featmap_height, featmap_width, number_of_priors, 4]`, otherwise.
+If 0 < *h* < `featmap_height` and/or 0 < *w* < `featmap_width` the output data size is less than
`featmap_height` * `featmap_width` * `number_of_priors` * 4 and the output tensor is filled with undefined values for
rest output tensor elements.
diff --git a/docs/ops/detection/ExperimentalDetectronROIFeatureExtractor_6.md b/docs/ops/detection/ExperimentalDetectronROIFeatureExtractor_6.md
index d4e93a188fc204..407c4301dc4b7c 100644
--- a/docs/ops/detection/ExperimentalDetectronROIFeatureExtractor_6.md
+++ b/docs/ops/detection/ExperimentalDetectronROIFeatureExtractor_6.md
@@ -30,7 +30,7 @@ For more details please see the following source:
* *output_size*
- * **Description**: *output_size* attribute specifies the width and height of the output tensor.
+ * **Description**: The *output_size* attribute specifies the width and height of the output tensor.
* **Range of values**: a positive integer number
* **Type**: int
* **Default value**: None
@@ -38,7 +38,7 @@ For more details please see the following source:
* *sampling_ratio*
- * **Description**: *sampling_ratio* attribute specifies the number of sampling points per the output value. If 0,
+ * **Description**: The *sampling_ratio* attribute specifies the number of sampling points per the output value. If 0,
then use adaptive number computed as `ceil(roi_width / output_width)`, and likewise for height.
* **Range of values**: a non-negative integer number
* **Type**: int
@@ -47,7 +47,7 @@ For more details please see the following source:
* *pyramid_scales*
- * **Description**: *pyramid_scales* enlists `image_size / layer_size[l]` ratios for pyramid layers `l=1,...,L`,
+ * **Description**: The *pyramid_scales* enlists `image_size / layer_size[l]` ratios for pyramid layers `l=1,...,L`,
where `L` is the number of pyramid layers, and `image_size` refers to network's input image. Note that pyramid's
largest layer may have smaller size than input image, e.g. `image_size` is `800 x 1344` in the XML example below.
* **Range of values**: a list of positive integer numbers
@@ -57,7 +57,7 @@ For more details please see the following source:
* *aligned*
- * **Description**: *aligned* attribute specifies add offset (`-0.5`) to ROIs sizes or not.
+ * **Description**: The *aligned* attribute specifies add offset (`-0.5`) to ROIs sizes or not.
* **Range of values**:
* `true` - add offset to ROIs sizes
* `false` - do not add offset to ROIs sizes
@@ -67,7 +67,7 @@ For more details please see the following source:
**Inputs**:
-* **1**: 2D input tensor of type *T* with shape `[number_of_ROIs, 4]` describing the ROIs as 4-tuples:
+* **1**: 2D input tensor of type *T* with shape `[number_of_ROIs, 4]` providing the ROIs as 4-tuples:
[x1, y1, x2, y2]. Coordinates *x* and *y* are refer to the network's input
*image_size*. **Required**.
diff --git a/docs/ops/movement/Pad_1.md b/docs/ops/movement/Pad_1.md
index 79684706675d9a..853d94eac522d1 100644
--- a/docs/ops/movement/Pad_1.md
+++ b/docs/ops/movement/Pad_1.md
@@ -153,8 +153,7 @@ OUTPUT =
83748
-
-```
\ No newline at end of file
+```
diff --git a/docs/ops/movement/ScatterNDUpdate_3.md b/docs/ops/movement/ScatterNDUpdate_3.md
index 93398fa3f98dbc..5dd1ed9a462957 100644
--- a/docs/ops/movement/ScatterNDUpdate_3.md
+++ b/docs/ops/movement/ScatterNDUpdate_3.md
@@ -48,7 +48,7 @@ output = [[[5, 5, 5, 5], [6, 6, 6, 6], [7, 7, 7, 7], [8, 8, 8, 8]],
* **2**: `indices` tensor with indices of arbitrary rank `q` >= 1 and of type *T_IND*. All index values `i_j` in index entry `(i_0, i_1, ...,i_k)` (where `k = indices.shape[-1]`) must be within bounds `[0, s_j - 1]` where `s_j = data.shape[j]`. `k` must be at most `r`. Required.
-* **3**: `updates` tensor of rank `r - indices.shape[-1] + q - 1` of type *T*. Required.
+* **3**: `updates` tensor of rank `r - indices.shape[-1] + q - 1` of type *T*. If expected `updates` rank is 0D it can be a tensor with single element. Required.
**Outputs**:
diff --git a/docs/ops/opset1.md b/docs/ops/opset1.md
index 73da245d2dc541..eec109ad9c3877 100644
--- a/docs/ops/opset1.md
+++ b/docs/ops/opset1.md
@@ -93,7 +93,7 @@ declared in `namespace opset1`.
* [Result](infrastructure/Result_1.md)
* [ReverseSequence](movement/ReverseSequence_1.md)
* [Select](condition/Select_1.md)
-* [Selu](arithmetic/Selu_1.md)
+* [Selu](activation/Selu_1.md)
* [ShapeOf](shape/ShapeOf_1.md)
* [Sigmoid](activation/Sigmoid_1.md)
* [Sign](arithmetic/Sign_1.md)
diff --git a/docs/ops/opset2.md b/docs/ops/opset2.md
index bfee6cee9c45a8..67c51385a78a80 100644
--- a/docs/ops/opset2.md
+++ b/docs/ops/opset2.md
@@ -98,7 +98,7 @@ declared in `namespace opset2`.
* [ReverseSequence](movement/ReverseSequence_1.md)
* [ROIPooling](detection/ROIPooling_1.md)
* [Select](condition/Select_1.md)
-* [Selu](arithmetic/Selu_1.md)
+* [Selu](activation/Selu_1.md)
* [ShapeOf](shape/ShapeOf_1.md)
* [Sigmoid](activation/Sigmoid_1.md)
* [Sign](arithmetic/Sign_1.md)
diff --git a/docs/ops/opset3.md b/docs/ops/opset3.md
index e36d4be27c5227..52af67efc0f0de 100644
--- a/docs/ops/opset3.md
+++ b/docs/ops/opset3.md
@@ -113,7 +113,7 @@ declared in `namespace opset3`.
* [ScatterElementsUpdate](movement/ScatterElementsUpdate_3.md)
* [ScatterUpdate](movement/ScatterUpdate_3.md)
* [Select](condition/Select_1.md)
-* [Selu](arithmetic/Selu_1.md)
+* [Selu](activation/Selu_1.md)
* [ShapeOf](shape/ShapeOf_3.md)
* [ShuffleChannels](movement/ShuffleChannels_1.md)
* [Sigmoid](activation/Sigmoid_1.md)
diff --git a/docs/ops/opset4.md b/docs/ops/opset4.md
index 709319f0640d16..71607453ee983c 100644
--- a/docs/ops/opset4.md
+++ b/docs/ops/opset4.md
@@ -121,7 +121,7 @@ declared in `namespace opset4`.
* [ScatterNDUpdate](movement/ScatterNDUpdate_3.md)
* [ScatterUpdate](movement/ScatterUpdate_3.md)
* [Select](condition/Select_1.md)
-* [Selu](arithmetic/Selu_1.md)
+* [Selu](activation/Selu_1.md)
* [ShapeOf](shape/ShapeOf_3.md)
* [ShuffleChannels](movement/ShuffleChannels_1.md)
* [Sigmoid](activation/Sigmoid_1.md)
diff --git a/docs/ops/opset5.md b/docs/ops/opset5.md
index 7db25f894d5d32..6c79caca19b0a4 100644
--- a/docs/ops/opset5.md
+++ b/docs/ops/opset5.md
@@ -129,7 +129,7 @@ declared in `namespace opset5`.
* [ScatterNDUpdate](movement/ScatterNDUpdate_3.md)
* [ScatterUpdate](movement/ScatterUpdate_3.md)
* [Select](condition/Select_1.md)
-* [Selu](arithmetic/Selu_1.md)
+* [Selu](activation/Selu_1.md)
* [ShapeOf](shape/ShapeOf_3.md)
* [ShuffleChannels](movement/ShuffleChannels_1.md)
* [Sigmoid](activation/Sigmoid_1.md)
diff --git a/docs/ops/opset6.md b/docs/ops/opset6.md
index dbe17d468611d2..92deb4a6fbea3d 100644
--- a/docs/ops/opset6.md
+++ b/docs/ops/opset6.md
@@ -135,7 +135,7 @@ declared in `namespace opset6`.
* [ScatterNDUpdate](movement/ScatterNDUpdate_3.md)
* [ScatterUpdate](movement/ScatterUpdate_3.md)
* [Select](condition/Select_1.md)
-* [Selu](arithmetic/Selu_1.md)
+* [Selu](activation/Selu_1.md)
* [ShapeOf](shape/ShapeOf_3.md)
* [ShuffleChannels](movement/ShuffleChannels_1.md)
* [Sigmoid](activation/Sigmoid_1.md)
diff --git a/docs/ops/opset7.md b/docs/ops/opset7.md
index 242b1e029cf887..c04b90e81a0391 100644
--- a/docs/ops/opset7.md
+++ b/docs/ops/opset7.md
@@ -138,7 +138,7 @@ declared in `namespace opset7`.
* [ScatterNDUpdate](movement/ScatterNDUpdate_3.md)
* [ScatterUpdate](movement/ScatterUpdate_3.md)
* [Select](condition/Select_1.md)
-* [Selu](arithmetic/Selu_1.md)
+* [Selu](activation/Selu_1.md)
* [ShapeOf](shape/ShapeOf_3.md)
* [ShuffleChannels](movement/ShuffleChannels_1.md)
* [Sigmoid](activation/Sigmoid_1.md)
diff --git a/docs/ops/shape/Squeeze_1.md b/docs/ops/shape/Squeeze_1.md
index 9dff893cd8a419..4510748ca17551 100644
--- a/docs/ops/shape/Squeeze_1.md
+++ b/docs/ops/shape/Squeeze_1.md
@@ -4,15 +4,19 @@
**Category**: Shape manipulation
-**Short description**: *Squeeze* removes specified dimensions (second input) equal to 1 of the first input tensor. If the second input is omitted then all dimensions equal to 1 are removed. If the specified dimension is not equal to one then error is raised.
+**Short description**: *Squeeze* removes dimensions equal to 1 from the first input tensor.
+
+**Detailed description**: *Squeeze* can be used with or without the second input tensor.
+* If only the first input is provided, every dimension that is equal to 1 will be removed from it.
+* With the second input provided, each value is an index of a dimension from the first tensor that is to be removed. Specified dimension has to be equal to 1, otherwise an error will be raised. Dimension indices can be specified directly, or by negative indices (counting dimensions from the end).
**Attributes**: *Squeeze* operation doesn't have attributes.
**Inputs**:
-* **1**: Multidimensional input tensor of type *T*. *Required*.
+* **1**: Multidimensional input tensor of type *T*. **Required**.
-* **2**: 0D or 1D tensor of type *T_SHAPE* with dimensions indices to squeeze. Values could be negative. *Optional*.
+* **2**: Scalar or 1D tensor of type *T_INT* with indices of dimensions to squeeze. Values could be negative (have to be from range `[-R, R-1]`, where `R` is the rank of the first input). **Optional**.
**Outputs**:
@@ -20,13 +24,13 @@
**Types**
-* *T*: supported type.
+* *T*: any numeric type.
-* *T_SHAPE*: supported integer type.
+* *T_INT*: any supported integer type.
**Example**
-*Example 1:*
+*Example 1: squeeze 4D tensor to a 2D tensor*
```xml
diff --git a/docs/ops/sort/ExperimentalDetectronTopKROIs_6.md b/docs/ops/sort/ExperimentalDetectronTopKROIs_6.md
index a378d3b378d315..6b496fbc93a0b4 100644
--- a/docs/ops/sort/ExperimentalDetectronTopKROIs_6.md
+++ b/docs/ops/sort/ExperimentalDetectronTopKROIs_6.md
@@ -4,19 +4,19 @@
**Category**: Sort
-**Short description**: An operation *ExperimentalDetectronTopKROIs* is TopK operation applied to probabilities of input
+**Short description**: The *ExperimentalDetectronTopKROIs* operation is TopK operation applied to probabilities of input
ROIs.
-**Detailed description**: Operation performs probabilities descending sorting for input ROIs and returns *max_rois*
-number of ROIs. Order of sorted ROIs with equal probabilities is undefined. If number of ROIs is less than *max_rois*
-then operation returns all ROIs descended sorted and the output tensor is filled with undefined values for rest output
-tensor elements.
+**Detailed description**: The operation performs probabilities descending sorting for input ROIs and returns *max_rois*
+number of ROIs. Order of sorted ROIs with equal probabilities is undefined. If the number of ROIs is less than *max_rois*
+then operation returns all ROIs descended sorted and the output tensor is filled with undefined values for the rest of
+output tensor elements.
**Attributes**:
* *max_rois*
- * **Description**: *max_rois* attribute specifies maximal numbers of output ROIs.
+ * **Description**: The *max_rois* attribute specifies maximal numbers of output ROIs.
* **Range of values**: non-negative integer number
* **Type**: int
* **Default value**: 0
diff --git a/docs/template_plugin/src/template_plugin.cpp b/docs/template_plugin/src/template_plugin.cpp
index 50e1d828f73f20..ca3dbbdacfed52 100644
--- a/docs/template_plugin/src/template_plugin.cpp
+++ b/docs/template_plugin/src/template_plugin.cpp
@@ -118,14 +118,13 @@ InferenceEngine::ExecutableNetworkInternal::Ptr Plugin::LoadExeNetworkImpl(const
// ! [plugin:load_exe_network_impl]
// ! [plugin:import_network_impl]
-InferenceEngine::ExecutableNetwork Plugin::ImportNetworkImpl(std::istream& model, const std::map& config) {
+InferenceEngine::ExecutableNetworkInternal::Ptr
+Plugin::ImportNetworkImpl(std::istream& model, const std::map& config) {
OV_ITT_SCOPED_TASK(itt::domains::TemplatePlugin, "Plugin::ImportNetworkImpl");
Configuration cfg(config);
- auto exec_network_impl = std::make_shared(model, cfg,
+ return std::make_shared(model, cfg,
std::static_pointer_cast(shared_from_this()));
-
- return make_executable_network(exec_network_impl);
}
// ! [plugin:import_network_impl]
diff --git a/docs/template_plugin/src/template_plugin.hpp b/docs/template_plugin/src/template_plugin.hpp
index fd520767444b0c..10b68d7af42f10 100644
--- a/docs/template_plugin/src/template_plugin.hpp
+++ b/docs/template_plugin/src/template_plugin.hpp
@@ -30,7 +30,7 @@ class Plugin : public InferenceEngine::InferencePluginInternal {
void AddExtension(InferenceEngine::IExtensionPtr extension) override;
InferenceEngine::Parameter GetConfig(const std::string& name, const std::map & options) const override;
InferenceEngine::Parameter GetMetric(const std::string& name, const std::map & options) const override;
- InferenceEngine::ExecutableNetwork ImportNetworkImpl(std::istream& model, const std::map& config) override;
+ InferenceEngine::ExecutableNetworkInternal::Ptr ImportNetworkImpl(std::istream& model, const std::map& config) override;
private:
friend class ExecutableNetwork;
diff --git a/inference-engine/cmake/dependencies.cmake b/inference-engine/cmake/dependencies.cmake
index 7c6428b669eae9..8dd7d3e164d1ad 100644
--- a/inference-engine/cmake/dependencies.cmake
+++ b/inference-engine/cmake/dependencies.cmake
@@ -186,9 +186,9 @@ endif ()
if (ENABLE_OPENCV)
reset_deps_cache(OpenCV_DIR)
- set(OPENCV_VERSION "4.5.1")
- set(OPENCV_BUILD "044")
- set(OPENCV_BUILD_YOCTO "337")
+ set(OPENCV_VERSION "4.5.2")
+ set(OPENCV_BUILD "076")
+ set(OPENCV_BUILD_YOCTO "708")
if (AARCH64)
if(DEFINED ENV{THIRDPARTY_SERVER_PATH})
@@ -208,7 +208,7 @@ if (ENABLE_OPENCV)
TARGET_PATH "${TEMP}/opencv_${OPENCV_VERSION}_${OPENCV_SUFFIX}/opencv"
ENVIRONMENT "OpenCV_DIR"
VERSION_REGEX ".*_([0-9]+.[0-9]+.[0-9]+).*"
- SHA256 "b5239e0e50b9009f95a29cb11f0840ec085fa07f6c4d3349adf090f1e51b0787")
+ SHA256 "ee3e5255f381b8de5e6fffe4e43dae8c99035377d0380f9183bd7341f1d0f204")
unset(IE_PATH_TO_DEPS)
endif()
@@ -219,37 +219,37 @@ if (ENABLE_OPENCV)
TARGET_PATH "${TEMP}/opencv_${OPENCV_VERSION}/opencv"
ENVIRONMENT "OpenCV_DIR"
VERSION_REGEX ".*_([0-9]+.[0-9]+.[0-9]+).*"
- SHA256 "5250bfe5860c15eb1b31963c78804ee9b301a19d8d6e920c06ef41de681cb99e")
+ SHA256 "a14f872e6b63b6ac12c7ff47fa49e578d14c14433b57f5d85ab5dd48a079938c")
elseif(APPLE AND X86_64)
RESOLVE_DEPENDENCY(OPENCV
ARCHIVE_MAC "opencv/opencv_${OPENCV_VERSION}-${OPENCV_BUILD}_osx.txz"
TARGET_PATH "${TEMP}/opencv_${OPENCV_VERSION}_osx/opencv"
ENVIRONMENT "OpenCV_DIR"
VERSION_REGEX ".*_([0-9]+.[0-9]+.[0-9]+).*"
- SHA256 "f3ebc5cc72c86106c30cc711ac689e02281556bb43c09a89cd45cb99b6bef9a8")
+ SHA256 "3e162f96e86cba8836618134831d9cf76df0438778b3e27e261dedad9254c514")
elseif(LINUX)
if (AARCH64)
set(OPENCV_SUFFIX "yocto_kmb")
set(OPENCV_BUILD "${OPENCV_BUILD_YOCTO}")
elseif (ARM)
set(OPENCV_SUFFIX "debian9arm")
- set(OPENCV_HASH "0e787d6738092993bc92bb55975f52caabae45dc73473b5196d15e65e87d6b9d")
+ set(OPENCV_HASH "4274f8c40b17215f4049096b524e4a330519f3e76813c5a3639b69c48633d34e")
elseif ((LINUX_OS_NAME STREQUAL "CentOS 7" OR
CMAKE_CXX_COMPILER_VERSION VERSION_LESS "4.9") AND X86_64)
set(OPENCV_SUFFIX "centos7")
- set(OPENCV_HASH "9b813af064d463b31fa1603b11b6559532a031d59bb0782d234380955fd397e0")
+ set(OPENCV_HASH "5fa76985c84fe7c64531682ef0b272510c51ac0d0565622514edf1c88b33404a")
elseif (LINUX_OS_NAME MATCHES "CentOS 8" AND X86_64)
set(OPENCV_SUFFIX "centos8")
- set(OPENCV_HASH "8ec3e3552500dee334162386b98cc54a5608de1f1a18f283523fc0cc13ee2f83")
+ set(OPENCV_HASH "db087dfd412eedb8161636ec083ada85ff278109948d1d62a06b0f52e1f04202")
elseif (LINUX_OS_NAME STREQUAL "Ubuntu 16.04" AND X86_64)
set(OPENCV_SUFFIX "ubuntu16")
set(OPENCV_HASH "cd46831b4d8d1c0891d8d22ff5b2670d0a465a8a8285243059659a50ceeae2c3")
elseif (LINUX_OS_NAME STREQUAL "Ubuntu 18.04" AND X86_64)
set(OPENCV_SUFFIX "ubuntu18")
- set(OPENCV_HASH "8ec3e3552500dee334162386b98cc54a5608de1f1a18f283523fc0cc13ee2f83")
+ set(OPENCV_HASH "db087dfd412eedb8161636ec083ada85ff278109948d1d62a06b0f52e1f04202")
elseif ((LINUX_OS_NAME STREQUAL "Ubuntu 20.04" OR LINUX_OS_NAME STREQUAL "LinuxMint 20.1") AND X86_64)
set(OPENCV_SUFFIX "ubuntu20")
- set(OPENCV_HASH "2b7808d002864acdc5fc0b19cd30dadc31a37cc267931cad605f23f2383bfc21")
+ set(OPENCV_HASH "2fe7bbc40e1186eb8d099822038cae2821abf617ac7a16fadf98f377c723e268")
elseif(NOT DEFINED OpenCV_DIR AND NOT DEFINED ENV{OpenCV_DIR})
message(FATAL_ERROR "OpenCV is not available on current platform (${LINUX_OS_NAME})")
endif()
diff --git a/inference-engine/cmake/vpu_dependencies.cmake b/inference-engine/cmake/vpu_dependencies.cmake
index 86add651af1970..f01d9b4e23fde3 100644
--- a/inference-engine/cmake/vpu_dependencies.cmake
+++ b/inference-engine/cmake/vpu_dependencies.cmake
@@ -6,14 +6,14 @@ include_guard(GLOBAL)
set(VPU_SUPPORTED_FIRMWARES usb-ma2x8x pcie-ma2x8x)
set(VPU_SUPPORTED_FIRMWARES_HASH
- "d0f6aaaf71a595963e6013ef59045e20b07324f1a47deaa3f906419d39b2bd5a"
- "18d3cd10cf6cc36ff58001812d3d215c0bbb2de09a8832128592401c8f959358")
+ "11a6db07d3a17c9c0fc4247fce47c942e0dcd59f8d70665a96bae0d7b7121fe9"
+ "43f3dc0f0a8114ca34226167970aafdc869600929d6e3761c1eaa6eec71f2237")
#
# Default packages
#
-set(FIRMWARE_PACKAGE_VERSION 1642)
+set(FIRMWARE_PACKAGE_VERSION 1658)
set(VPU_CLC_MA2X8X_VERSION "movi-cltools-20.09.2")
#
diff --git a/inference-engine/ie_bridges/c/samples/hello_classification/README.md b/inference-engine/ie_bridges/c/samples/hello_classification/README.md
index 6bf0ddf0b6369b..b090b648f2716d 100644
--- a/inference-engine/ie_bridges/c/samples/hello_classification/README.md
+++ b/inference-engine/ie_bridges/c/samples/hello_classification/README.md
@@ -1,31 +1,104 @@
# Hello Classification C Sample {#openvino_inference_engine_ie_bridges_c_samples_hello_classification_README}
-This topic describes how to run the Hello Classification C sample application.
+Inference of image classification networks like AlexNet and GoogLeNet using Synchronous Inference Request API and input auto-resize feature.
-It demonstrates how to use the following Inference Engine C API in applications:
-* Synchronous Infer Request API
-* Input auto-resize API. It allows to set image of the original size as input for a network with other input size.
- Resize will be performed automatically by the corresponding plugin just before inference.
+Hello Classification C sample application demonstrates how to use the following Inference Engine C API in applications:
-There is also an API introduced to crop a ROI object and set it as input without additional memory re-allocation.
-To properly demonstrate this API, it is required to run several networks in pipeline which is out of scope of this sample.
+| Feature | API | Description |
+|:--- |:--- |:---
+| Basic Infer Flow | [ie_core_create], [ie_core_read_network], [ie_core_load_network], [ie_exec_network_create_infer_request], [ie_infer_request_set_blob], [ie_infer_request_get_blob] | Common API to do inference: configure input and output blobs, loading model, create infer request
+| Synchronous Infer | [ie_infer_request_infer] | Do synchronous inference
+| Network Operations | [ie_network_get_input_name], [ie_network_get_inputs_number], [ie_network_get_outputs_number], [ie_network_set_input_precision], [ie_network_get_output_name], [ie_network_get_output_precision] | Managing of network
+| Blob Operations| [ie_blob_make_memory_from_preallocated], [ie_blob_get_dims], [ie_blob_get_cbuffer] | Work with memory container for storing inputs, outputs of the network, weights and biases of the layers
+| Input auto-resize | [ie_network_set_input_resize_algorithm], [ie_network_set_input_layout] | Set image of the original size as input for a network with other input size. Resize and layout conversions will be performed automatically by the corresponding plugin just before inference
-> **NOTE**: By default, Inference Engine samples and demos expect input with BGR channels order. If you trained your model to work with RGB order, you need to manually rearrange the default channels order in the sample or demo application or reconvert your model using the Model Optimizer tool with `--reverse_input_channels` argument specified. For more information about the argument, refer to **When to Reverse Input Channels** section of [Converting a Model Using General Conversion Parameters](../../../../../docs/MO_DG/prepare_model/convert_model/Converting_Model_General.md).
+| Options | Values |
+|:--- |:---
+| Validated Models | AlexNet and GoogLeNet (image classification networks)
+| Model Format | Inference Engine Intermediate Representation (.xml + .bin), ONNX (.onnx)
+| Validated images | The sample uses OpenCV\* to [read input image](https://docs.opencv.org/master/d4/da8/group__imgcodecs.html#ga288b8b3da0892bd651fce07b3bbd3a56) (\*.bmp, \*.png)
+| Supported devices | [All](../../../../../docs/IE_DG/supported_plugins/Supported_Devices.md) |
+| Other language realization | [C++](../../../../samples/hello_classification/README.md), [Python](../../../python/sample/hello_classification/README.md) |
+
+## How It Works
+
+Upon the start-up, the sample application reads command line parameters, loads specified network and an image to the Inference Engine plugin.
+Then, the sample creates an synchronous inference request object. When inference is done, the application outputs data to the standard output stream.
+
+You can see the explicit description of
+each sample step at [Integration Steps](../../../../../docs/IE_DG/Integrate_with_customer_application_new_API.md) section of "Integrate the Inference Engine with Your Application" guide.
+
+## Building
+
+To build the sample, please use instructions available at [Build the Sample Applications](../../../../../docs/IE_DG/Samples_Overview.md) section in Inference Engine Samples guide.
## Running
-To run the sample, you can use [public](@ref omz_models_public_index) or [Intel's](@ref omz_models_intel_index) pre-trained models from the Open Model Zoo. The models can be downloaded using the [Model Downloader](@ref omz_tools_downloader_README).
+To run the sample, you need specify a model and image:
+- you can use [public](@ref omz_models_public_index) or [Intel's](@ref omz_models_intel_index) pre-trained models from the Open Model Zoo. The models can be downloaded using the [Model Downloader](@ref omz_tools_downloader_README).
+- you can use images from the media files collection available at https://storage.openvinotoolkit.org/data/test_data.
-> **NOTE**: Before running the sample with a trained model, make sure the model is converted to the Inference Engine format (\*.xml + \*.bin) using the [Model Optimizer tool](../../../../../docs/MO_DG/Deep_Learning_Model_Optimizer_DevGuide.md).
+> **NOTES**:
+>
+> - By default, Inference Engine samples and demos expect input with BGR channels order. If you trained your model to work with RGB order, you need to manually rearrange the default channels order in the sample or demo application or reconvert your model using the Model Optimizer tool with `--reverse_input_channels` argument specified. For more information about the argument, refer to **When to Reverse Input Channels** section of [Converting a Model Using General Conversion Parameters](../../../../../docs/MO_DG/prepare_model/convert_model/Converting_Model_General.md).
>
-> The sample accepts models in ONNX format (.onnx) that do not require preprocessing.
+> - Before running the sample with a trained model, make sure the model is converted to the Inference Engine format (\*.xml + \*.bin) using the [Model Optimizer tool](../../../../../docs/MO_DG/Deep_Learning_Model_Optimizer_DevGuide.md).
+>
+> - The sample accepts models in ONNX format (.onnx) that do not require preprocessing.
You can do inference of an image using a trained AlexNet network on a GPU using the following command:
```sh
-./hello_classification_c /alexnet_fp32.xml /cat.bmp GPU
+./hello_classification_c /alexnet_fp32.xml /cat.png GPU
```
## Sample Output
The application outputs top-10 inference results.
+
+```sh
+Top 10 results:
+
+Image /opt/intel/openvino/deployment_tools/demo/car.png
+
+classid probability
+------- -----------
+479 0.7562205
+511 0.0760381
+436 0.0724111
+817 0.0462140
+656 0.0301231
+661 0.0056171
+581 0.0031622
+468 0.0029917
+717 0.0023081
+627 0.0016193
+
+This sample is an API example, for any performance measurements please use the dedicated benchmark_app tool
+```
+
+## See Also
+
+- [Integrate the Inference Engine with Your Application](../../../../../docs/IE_DG/Integrate_with_customer_application_new_API.md)
+- [Using Inference Engine Samples](../../../../../docs/IE_DG/Samples_Overview.md)
+- [Model Downloader](@ref omz_tools_downloader_README)
+- [Model Optimizer](../../../../../docs/MO_DG/Deep_Learning_Model_Optimizer_DevGuide.md)
+
+[ie_core_create]:https://docs.openvinotoolkit.org/latest/ie_c_api/group__Core.html#gaab73c7ee3704c742eaac457636259541
+[ie_core_read_network]:https://docs.openvinotoolkit.org/latest/ie_c_api/group__Core.html#gaa40803295255b3926a3d1b8924f26c29
+[ie_network_get_input_name]:https://docs.openvinotoolkit.org/latest/ie_c_api/group__Network.html#ga36b0c28dfab6db2bfcc2941fd57fbf6d
+[ie_network_set_input_precision]:https://docs.openvinotoolkit.org/latest/ie_c_api/group__Network.html#gadd99b7cc98b3c33daa2095b8a29f66d7
+[ie_network_get_output_name]:https://docs.openvinotoolkit.org/latest/ie_c_api/group__Network.html#ga1feabc49576db24d9821a150b2b50a6c
+[ie_network_get_output_precision]:https://docs.openvinotoolkit.org/latest/ie_c_api/group__Network.html#gaeaa7f1fb8f56956fc492cd9207235984
+[ie_core_load_network]:https://docs.openvinotoolkit.org/latest/ie_c_api/group__Core.html#ga318d4b0214b8a3fd33f9e44170befcc5
+[ie_exec_network_create_infer_request]:https://docs.openvinotoolkit.org/latest/ie_c_api/group__ExecutableNetwork.html#gae72247391c1429a18c367594a4b7db9f
+[ie_blob_make_memory_from_preallocated]:https://docs.openvinotoolkit.org/latest/ie_c_api/group__Blob.html#ga7a874d46375e10fa1a7e8e3d7e1c9c9c
+[ie_infer_request_set_blob]:https://docs.openvinotoolkit.org/latest/ie_c_api/group__InferRequest.html#ga891c2d475501bba761148a0c3faca196
+[ie_infer_request_infer]:https://docs.openvinotoolkit.org/latest/ie_c_api/group__InferRequest.html#gac6c6fcb67ccb4d0ec9ad1c63a5bee7b6
+[ie_infer_request_get_blob]:https://docs.openvinotoolkit.org/latest/ie_c_api/group__InferRequest.html#ga6cd04044ea95987260037bfe17ce1a2d
+[ie_blob_get_dims]:https://docs.openvinotoolkit.org/latest/ie_c_api/group__Blob.html#ga25d93efd7ec1052a8896ac61cc14c30a
+[ie_blob_get_cbuffer]:https://docs.openvinotoolkit.org/latest/ie_c_api/group__Blob.html#gaf6b4a110b4c5723dcbde135328b3620a
+[ie_network_set_input_resize_algorithm]:https://docs.openvinotoolkit.org/latest/ie_c_api/group__Network.html#ga46ab3b3a06359f2b77f58bdd6e8a5492
+[ie_network_set_input_layout]:https://docs.openvinotoolkit.org/latest/ie_c_api/group__Network.html#ga27ea9f92290e0b2cdedbe8a85feb4c01
+[ie_network_get_inputs_number]:https://docs.openvinotoolkit.org/latest/ie_c_api/group__Network.html#ga6a3349bca66c4ba8b41a434061fccf52
+[ie_network_get_outputs_number]:https://docs.openvinotoolkit.org/latest/ie_c_api/group__Network.html#ga869b8c309797f1e09f73ddffd1b57509
diff --git a/inference-engine/ie_bridges/c/samples/hello_classification/main.c b/inference-engine/ie_bridges/c/samples/hello_classification/main.c
index e17107f8213e92..86d8125a1b0cad 100644
--- a/inference-engine/ie_bridges/c/samples/hello_classification/main.c
+++ b/inference-engine/ie_bridges/c/samples/hello_classification/main.c
@@ -2,17 +2,28 @@
// SPDX-License-Identifier: Apache-2.0
//
-#include
+#include
#include
+#include
#include
-#include
+
#include
+#include
+/**
+* @brief Struct to store classification results
+*/
struct classify_res {
size_t class_id;
float probability;
};
+/**
+* @brief Sort result of image classification by probability
+* @param struct with classification results to sort
+* @param size of the struct
+* @return none
+*/
void classify_res_sort(struct classify_res *res, size_t n) {
size_t i, j;
for (i = 0; i < n; ++i) {
@@ -30,6 +41,12 @@ void classify_res_sort(struct classify_res *res, size_t n) {
}
}
+/**
+* @brief Convert output blob to classify struct for processing results
+* @param blob of output data
+* @param size of the blob
+* @return struct classify_res
+*/
struct classify_res *output_blob_to_classify_res(ie_blob_t *blob, size_t *n) {
dimensions_t output_dim;
IEStatusCode status = ie_blob_get_dims(blob, &output_dim);
@@ -60,6 +77,13 @@ struct classify_res *output_blob_to_classify_res(ie_blob_t *blob, size_t *n) {
return cls;
}
+/**
+* @brief Print results of classification
+* @param struct of the classification results
+* @param size of the struct of classification results
+* @param string image path
+* @return none
+*/
void print_classify_res(struct classify_res *cls, size_t n, const char *img_path) {
printf("\nImage %s\n", img_path);
printf("\nclassid probability\n");
@@ -68,6 +92,7 @@ void print_classify_res(struct classify_res *cls, size_t n, const char *img_path
for (i = 0; i < n; ++i) {
printf("%zu %f\n", cls[i].class_id, cls[i].probability);
}
+ printf("\nThis sample is an API example, for any performance measurements please use the dedicated benchmark_app tool\n");
}
int main(int argc, char **argv) {
@@ -86,22 +111,36 @@ int main(int argc, char **argv) {
ie_infer_request_t *infer_request = NULL;
char *input_name = NULL, *output_name = NULL;
ie_blob_t *imgBlob = NULL, *output_blob = NULL;
+ size_t network_input_size;
+ size_t network_output_size;
// -----------------------------------------------------------------------------------------------------
- // --------------------------- 1. Load inference engine instance -------------------------------------
+ // --------------------------- Step 1. Initialize inference engine core -------------------------------------
IEStatusCode status = ie_core_create("", &core);
if (status != OK)
goto err;
// -----------------------------------------------------------------------------------------------------
- // 2. Read a model in OpenVINO Intermediate Representation (.xml and .bin files) or ONNX (.onnx file) format
+ // Step 2. Read a model in OpenVINO Intermediate Representation (.xml and .bin files) or ONNX (.onnx file) format
status = ie_core_read_network(core, input_model, NULL, &network);
if (status != OK)
goto err;
+ // check the network topology
+ status = ie_network_get_inputs_number(network, &network_input_size);
+ if (status != OK || network_input_size != 1) {
+ printf("Sample supports topologies with 1 input only\n");
+ goto err;
+ }
+
+ status = ie_network_get_outputs_number(network, &network_output_size);
+ if (status != OK || network_output_size != 1) {
+ printf("Sample supports topologies with 1 output only\n");
+ goto err;
+ }
// -----------------------------------------------------------------------------------------------------
- // --------------------------- 3. Configure input & output ---------------------------------------------
+ // --------------------------- Step 3. Configure input & output ---------------------------------------------
// --------------------------- Prepare input blobs -----------------------------------------------------
status = ie_network_get_input_name(network, 0, &input_name);
@@ -124,20 +163,20 @@ int main(int argc, char **argv) {
// -----------------------------------------------------------------------------------------------------
- // --------------------------- 4. Loading model to the device ------------------------------------------
+ // --------------------------- Step 4. Loading model to the device ------------------------------------------
ie_config_t config = {NULL, NULL, NULL};
status = ie_core_load_network(core, network, device_name, &config, &exe_network);
if (status != OK)
goto err;
// -----------------------------------------------------------------------------------------------------
- // --------------------------- 5. Create infer request -------------------------------------------------
+ // --------------------------- Step 5. Create infer request -------------------------------------------------
status = ie_exec_network_create_infer_request(exe_network, &infer_request);
if (status != OK)
goto err;
// -----------------------------------------------------------------------------------------------------
- // --------------------------- 6. Prepare input --------------------------------------------------------
+ // --------------------------- Step 6. Prepare input --------------------------------------------------------
/* Read input image to a blob and set it to an infer request without resize and layout conversions. */
c_mat_t img;
image_read(input_image_path, &img);
@@ -158,14 +197,14 @@ int main(int argc, char **argv) {
goto err;
// -----------------------------------------------------------------------------------------------------
- // --------------------------- 7. Do inference --------------------------------------------------------
+ // --------------------------- Step 7. Do inference --------------------------------------------------------
/* Running the request synchronously */
status = ie_infer_request_infer(infer_request);
if (status != OK)
goto err;
// -----------------------------------------------------------------------------------------------------
- // --------------------------- 8. Process output ------------------------------------------------------
+ // --------------------------- Step 8. Process output ------------------------------------------------------
status = ie_infer_request_get_blob(infer_request, output_name, &output_blob);
if (status != OK) {
image_free(&img);
diff --git a/inference-engine/ie_bridges/c/samples/hello_nv12_input_classification/CMakeLists.txt b/inference-engine/ie_bridges/c/samples/hello_nv12_input_classification/CMakeLists.txt
index d0452b283558d9..ddffe4686a7cc3 100644
--- a/inference-engine/ie_bridges/c/samples/hello_nv12_input_classification/CMakeLists.txt
+++ b/inference-engine/ie_bridges/c/samples/hello_nv12_input_classification/CMakeLists.txt
@@ -3,5 +3,4 @@
#
ie_add_sample(NAME hello_nv12_input_classification_c
- SOURCES "${CMAKE_CURRENT_SOURCE_DIR}/main.c"
- DEPENDENCIES opencv_c_wraper)
+ SOURCES "${CMAKE_CURRENT_SOURCE_DIR}/main.c")
diff --git a/inference-engine/ie_bridges/c/samples/hello_nv12_input_classification/README.md b/inference-engine/ie_bridges/c/samples/hello_nv12_input_classification/README.md
index a9e1e20056b049..ba7b58ad473ba7 100644
--- a/inference-engine/ie_bridges/c/samples/hello_nv12_input_classification/README.md
+++ b/inference-engine/ie_bridges/c/samples/hello_nv12_input_classification/README.md
@@ -1,51 +1,104 @@
# Hello NV12 Input Classification C Sample {#openvino_inference_engine_ie_bridges_c_samples_hello_nv12_input_classification_README}
-This topic describes how to run the Hello NV12 Input Classification sample application.
-The sample demonstrates how to use the new NV12 automatic input pre-processing API of the Inference Engine in your applications.
-Refer to [Integrate the Inference Engine New Request API with Your Application](../../../../../docs/IE_DG/Integrate_with_customer_application_new_API.md) for details.
+Inference of image classification networks like AlexNet with images in NV12 color format using Synchronous Inference Request API.
+
+Hello NV12 Input Classification C Sample demonstrates how to use the NV12 automatic input pre-processing API of the Inference Engine in your applications:
+
+| Feature | API | Description |
+|:--- |:--- |:---
+| Blob Operations| [ie_blob_make_memory_nv12] | Create a NV12 blob
+| Input in N12 color format |[ie_network_set_color_format]| Change the color format of the input data
+Basic Inference Engine API is covered by [Hello Classification C sample](../hello_classification/README.md).
+
+| Options | Values |
+|:--- |:---
+| Validated Models | AlexNet (image classification network)
+| Model Format | Inference Engine Intermediate Representation (\*.xml + \*.bin), ONNX (\*.onnx)
+| Validated images | An uncompressed image in the NV12 color format - \*.yuv
+| Supported devices | [All](../../../../../docs/IE_DG/supported_plugins/Supported_Devices.md) |
+| Other language realization | [C++](../../../../samples/hello_nv12_input_classification/README.md) |
## How It Works
-Upon the start-up, the sample application reads command-line parameters, loads a network and sets an
-image in the NV12 color format to an Inference Engine plugin. When inference is done, the
+Upon the start-up, the sample application reads command-line parameters, loads specified network and an
+image in the NV12 color format to an Inference Engine plugin. Then, the sample creates an synchronous inference request object. When inference is done, the
application outputs data to the standard output stream.
+You can see the explicit description of
+each sample step at [Integration Steps](https://docs.openvinotoolkit.org/latest/openvino_docs_IE_DG_Integrate_with_customer_application_new_API.html) section of "Integrate the Inference Engine with Your Application" guide.
+
+## Building
+
+To build the sample, please use instructions available at [Build the Sample Applications](../../../../../docs/IE_DG/Samples_Overview.md) section in Inference Engine Samples guide.
+
+## Running
+
+To run the sample, you need specify a model and image:
+
+- you can use [public](@ref omz_models_public_index) or [Intel's](@ref omz_models_intel_index) pre-trained models from the Open Model Zoo. The models can be downloaded using the [Model Downloader](@ref omz_tools_downloader_README).
+- you can use images from the media files collection available at https://storage.openvinotoolkit.org/data/test_data.
+
The sample accepts an uncompressed image in the NV12 color format. To run the sample, you need to
convert your BGR/RGB image to NV12. To do this, you can use one of the widely available tools such
as FFmpeg\* or GStreamer\*. The following command shows how to convert an ordinary image into an
uncompressed NV12 image using FFmpeg:
+
```sh
ffmpeg -i cat.jpg -pix_fmt nv12 cat.yuv
```
-> **NOTE**:
+> **NOTES**:
>
-> * Because the sample reads raw image files, you should provide a correct image size along with the
+> - Because the sample reads raw image files, you should provide a correct image size along with the
> image path. The sample expects the logical size of the image, not the buffer size. For example,
> for 640x480 BGR/RGB image the corresponding NV12 logical image size is also 640x480, whereas the
> buffer size is 640x720.
-> * The sample uses input autoresize API of the Inference Engine to simplify user-side
-> pre-processing.
-> * By default, this sample expects that network input has BGR channels order. If you trained your
+> - By default, this sample expects that network input has BGR channels order. If you trained your
> model to work with RGB order, you need to reconvert your model using the Model Optimizer tool
> with `--reverse_input_channels` argument specified. For more information about the argument,
> refer to **When to Reverse Input Channels** section of
> [Converting a Model Using General Conversion Parameters](../../../../../docs/MO_DG/prepare_model/convert_model/Converting_Model_General.md).
-
-## Running
-
-To run the sample, you can use [public](@ref omz_models_public_index) or [Intel's](@ref omz_models_intel_index) pre-trained models from the Open Model Zoo. The models can be downloaded using the [Model Downloader](@ref omz_tools_downloader_README).
-
-> **NOTE**: Before running the sample with a trained model, make sure the model is converted to the
-> Inference Engine format (\*.xml + \*.bin) using the [Model Optimizer tool](../../../../../docs/MO_DG/Deep_Learning_Model_Optimizer_DevGuide.md).
+> - Before running the sample with a trained model, make sure the model is converted to the Inference Engine format (\*.xml + \*.bin) using the [Model Optimizer tool](../../../../../docs/MO_DG/Deep_Learning_Model_Optimizer_DevGuide.md).
>
-> The sample accepts models in ONNX format (.onnx) that do not require preprocessing.
+> - The sample accepts models in ONNX format (.onnx) that do not require preprocessing.
+
+You can perform inference on an NV12 image using a trained AlexNet network on a CPU with the following command:
-You can perform inference on an NV12 image using a trained AlexNet network on CPU with the following command:
```sh
-./hello_nv12_input_classification_c /alexnet_fp32.xml /cat.yuv 640x480 CPU
+./hello_nv12_input_classification_c /alexnet_fp32.xml /cat.yuv 300x300 CPU
```
## Sample Output
The application outputs top-10 inference results.
+
+```sh
+Top 10 results:
+
+Image ./cat.yuv
+
+classid probability
+------- -----------
+435 0.091733
+876 0.081725
+999 0.069305
+587 0.043726
+666 0.038957
+419 0.032892
+285 0.030309
+700 0.029941
+696 0.021628
+855 0.020339
+
+This sample is an API example, for any performance measurements please use the dedicated benchmark_app tool
+```
+
+## See Also
+
+- [Integrate the Inference Engine with Your Application](../../../../../docs/IE_DG/Integrate_with_customer_application_new_API.md)
+- [Using Inference Engine Samples](../../../../../docs/IE_DG/Samples_Overview.md)
+- [Model Downloader](@ref omz_tools_downloader_README)
+- [Model Optimizer](../../../../../docs/MO_DG/Deep_Learning_Model_Optimizer_DevGuide.md)
+
+[ie_network_set_color_format]:https://docs.openvinotoolkit.org/latest/ie_c_api/group__Network.html#ga85f3251f1f7b08507c297e73baa58969
+[ie_blob_make_memory_nv12]:https://docs.openvinotoolkit.org/latest/ie_c_api/group__Blob.html#ga0a2d97b0d40a53c01ead771f82ae7f4a
diff --git a/inference-engine/ie_bridges/c/samples/hello_nv12_input_classification/main.c b/inference-engine/ie_bridges/c/samples/hello_nv12_input_classification/main.c
index 55343c5b7ff07c..d5384a79bf6c71 100644
--- a/inference-engine/ie_bridges/c/samples/hello_nv12_input_classification/main.c
+++ b/inference-engine/ie_bridges/c/samples/hello_nv12_input_classification/main.c
@@ -2,16 +2,27 @@
// SPDX-License-Identifier: Apache-2.0
//
-#include
+#include
#include
+#include
#include
+
#include
+/**
+* @brief Struct to store classification results
+*/
struct classify_res {
size_t class_id;
float probability;
};
+/**
+* @brief Sort result of image classification by probability
+* @param struct with classification results to sort
+* @param size of the struct
+* @return none
+*/
void classify_res_sort(struct classify_res *res, size_t n) {
size_t i, j;
for (i = 0; i < n; ++i) {
@@ -29,6 +40,12 @@ void classify_res_sort(struct classify_res *res, size_t n) {
}
}
+/**
+* @brief Convert output blob to classify struct for processing results
+* @param blob of output data
+* @param size of the blob
+* @return struct classify_res
+*/
struct classify_res *output_blob_to_classify_res(ie_blob_t *blob, size_t *n) {
dimensions_t output_dim;
IEStatusCode status = ie_blob_get_dims(blob, &output_dim);
@@ -59,6 +76,13 @@ struct classify_res *output_blob_to_classify_res(ie_blob_t *blob, size_t *n) {
return cls;
}
+/**
+* @brief Print results of classification
+* @param struct of the classification results
+* @param size of the struct of classification results
+* @param string image path
+* @return none
+*/
void print_classify_res(struct classify_res *cls, size_t n, const char *img_path) {
printf("\nImage %s\n", img_path);
printf("\nclassid probability\n");
@@ -67,8 +91,16 @@ void print_classify_res(struct classify_res *cls, size_t n, const char *img_path
for (i = 0; i < n; ++i) {
printf("%zu %f\n", cls[i].class_id, cls[i].probability);
}
+ printf("\nThis sample is an API example, for any performance measurements please use the dedicated benchmark_app tool\n");
}
+/**
+* @brief Read image data
+* @param string image path
+* @param pointer to store image data
+* @param size bytes of image
+* @return total number of elements successfully read, in case of error it doesn't equal to size param
+*/
size_t read_image_from_file(const char *img_path, unsigned char *img_data, size_t size) {
FILE *fp = fopen(img_path, "rb+");
size_t read_size = 0;
@@ -84,7 +116,14 @@ size_t read_image_from_file(const char *img_path, unsigned char *img_data, size_
return read_size;
}
-size_t parse_image_size(const char *size_str, size_t *width, size_t *height) {
+/**
+* @brief Check image has supported width and height
+* @param string image size in WIDTHxHEIGHT format
+* @param pointer to image width
+* @param pointer to image height
+* @return bool status True(success) or False(fail)
+*/
+bool is_supported_image_size(const char *size_str, size_t *width, size_t *height) {
const char *_size = size_str;
size_t _width = 0, _height = 0;
while (_size && *_size != 'x' && *_size != '\0') {
@@ -112,10 +151,10 @@ size_t parse_image_size(const char *size_str, size_t *width, size_t *height) {
if (_width % 2 == 0 && _height % 2 == 0) {
*width = _width;
*height = _height;
- return 0;
+ return true;
} else {
printf("Unsupported image size, width and height must be even numbers \n");
- return -1;
+ return false;
}
} else {
goto err;
@@ -123,7 +162,7 @@ size_t parse_image_size(const char *size_str, size_t *width, size_t *height) {
err:
printf("Incorrect format of image size parameter, expected WIDTHxHEIGHT, "
"actual: %s\n", size_str);
- return -1;
+ return false;
}
int main(int argc, char **argv) {
@@ -134,7 +173,7 @@ int main(int argc, char **argv) {
}
size_t input_width = 0, input_height = 0, img_size = 0;
- if (parse_image_size(argv[3], &input_width, &input_height) == -1)
+ if (!is_supported_image_size(argv[3], &input_width, &input_height))
return EXIT_FAILURE;
const char *input_model = argv[1];
@@ -149,28 +188,30 @@ int main(int argc, char **argv) {
ie_blob_t *y_blob = NULL, *uv_blob = NULL, *nv12_blob = NULL, *output_blob = NULL;
// -----------------------------------------------------------------------------------------------------
- // --------------------------- 1. Load inference engine instance -------------------------------------
+ // --------------------------- Step 1. Initialize inference engine core -------------------------------------
IEStatusCode status = ie_core_create("", &core);
if (status != OK)
goto err;
// -----------------------------------------------------------------------------------------------------
- // 2. Read a model in OpenVINO Intermediate Representation (.xml and .bin files) or ONNX (.onnx file) format
+ // Step 2. Read a model in OpenVINO Intermediate Representation (.xml and .bin files) or ONNX (.onnx file) format
status = ie_core_read_network(core, input_model, NULL, &network);
if (status != OK)
goto err;
// -----------------------------------------------------------------------------------------------------
- // --------------------------- 3. Configure input & output ---------------------------------------------
+ // --------------------------- Step 3. Configure input & output ---------------------------------------------
// --------------------------- Prepare input blobs -----------------------------------------------------
status = ie_network_get_input_name(network, 0, &input_name);
if (status != OK)
goto err;
+ /* Mark input as resizable by setting of a resize algorithm.
+ * In this case we will be able to set an input blob of any shape to an infer request.
+ * Resize and layout conversions are executed automatically during inference */
+ status |= ie_network_set_input_resize_algorithm(network, input_name, RESIZE_BILINEAR);
status |= ie_network_set_input_layout(network, input_name, NCHW);
status |= ie_network_set_input_precision(network, input_name, U8);
- // set input resize algorithm to enable input autoresize
- status |= ie_network_set_input_resize_algorithm(network, input_name, RESIZE_BILINEAR);
// set input color format to NV12 to enable automatic input color format pre-processing
status |= ie_network_set_color_format(network, input_name, NV12);
@@ -185,20 +226,20 @@ int main(int argc, char **argv) {
// -----------------------------------------------------------------------------------------------------
- // --------------------------- 4. Loading model to the device ------------------------------------------
+ // --------------------------- Step 4. Loading model to the device ------------------------------------------
ie_config_t config = {NULL, NULL, NULL};
status = ie_core_load_network(core, network, device_name, &config, &exe_network);
if (status != OK)
goto err;
// -----------------------------------------------------------------------------------------------------
- // --------------------------- 5. Create infer request -------------------------------------------------
+ // --------------------------- Step 5. Create infer request -------------------------------------------------
status = ie_exec_network_create_infer_request(exe_network, &infer_request);
if (status != OK)
goto err;
// -----------------------------------------------------------------------------------------------------
- // --------------------------- 6. Prepare input --------------------------------------------------------
+ // --------------------------- Step 6. Prepare input --------------------------------------------------------
// read image with size converted to NV12 data size: height(NV12) = 3 / 2 * logical height
img_size = input_width * (input_height * 3 / 2);
img_data = (unsigned char *)calloc(img_size, sizeof(unsigned char));
@@ -230,14 +271,14 @@ int main(int argc, char **argv) {
goto err;
// -----------------------------------------------------------------------------------------------------
- // --------------------------- 7. Do inference --------------------------------------------------------
+ // --------------------------- Step 7. Do inference --------------------------------------------------------
/* Running the request synchronously */
status = ie_infer_request_infer(infer_request);
if (status != OK)
goto err;
// -----------------------------------------------------------------------------------------------------
- // --------------------------- 8. Process output ------------------------------------------------------
+ // --------------------------- Step 8. Process output ------------------------------------------------------
status = ie_infer_request_get_blob(infer_request, output_name, &output_blob);
if (status != OK)
goto err;
diff --git a/inference-engine/ie_bridges/c/samples/object_detection_sample_ssd/README.md b/inference-engine/ie_bridges/c/samples/object_detection_sample_ssd/README.md
index 55916a129f9473..e9736f3385dfb8 100644
--- a/inference-engine/ie_bridges/c/samples/object_detection_sample_ssd/README.md
+++ b/inference-engine/ie_bridges/c/samples/object_detection_sample_ssd/README.md
@@ -1,21 +1,50 @@
# Object Detection C Sample SSD {#openvino_inference_engine_ie_bridges_c_samples_object_detection_sample_ssd_README}
-This topic demonstrates how to run the Object Detection C sample application, which does inference using object detection
-networks like SSD-VGG on Intel® Processors and Intel® HD Graphics.
+Inference of object detection networks like SSD-VGG using Asynchronous Inference Request API and [input reshape feature](../../../../../docs/IE_DG/ShapeInference.md).
-> **NOTE:** This topic describes usage of C implementation of the Object Detection Sample SSD. For the C++* implementation, refer to [Object Detection C++* Sample SSD](../../../../samples/object_detection_sample_ssd/README.md) and for the Python* implementation, refer to [Object Detection Python* Sample SSD](../../../python/sample/object_detection_sample_ssd/README.md).
+Object Detection C sample SSD application demonstrates how to use the following Inference Engine C API in applications:
+
+| Feature | API | Description |
+|:--- |:--- |:---
+|Asynchronous Infer |[ie_infer_request_infer_async][ie_infer_request_wait]| Do Asynchronous inference
+|Inference Engine Version| [ie_c_api_version] | Get Inference Engine API version
+|Available Devices| [ie_core_get_versions] | Get version information of the devices for inference
+|Custom Extension Kernels|[ie_core_add_extension] [ie_core_set_config]| Load extension library and config to the device
+|Network Operations|[ie_network_get_inputs_number] [ie_network_get_input_dims] [ie_network_get_input_shapes] [ie_network_get_outputs_number] [ie_network_get_output_dims]| Managing of network
+|Blob Operations|[ie_blob_get_buffer]| Work with memory container for storing inputs, outputs of the network, weights and biases of the layers
+|Input Reshape|[ie_network_reshape]| Set the batch size equal to the number of input images
+
+Basic Inference Engine API is covered by [Hello Classification C sample](../hello_classification/README.md).
+
+> **NOTE**: This sample uses `ie_network_reshape()` to set the batch size. While supported by SSD networks, reshape may not work with arbitrary topologies. See [Shape Inference Guide](../../../../../docs/IE_DG/ShapeInference.md) for more info.
+
+| Options | Values |
+|:--- |:---
+| Validated Models | Person detection SSD (object detection network)
+| Model Format | Inference Engine Intermediate Representation (.xml + .bin), ONNX (.onnx)
+| Validated images | The sample uses OpenCV* to [read input image](https://docs.opencv.org/master/d4/da8/group__imgcodecs.html#ga288b8b3da0892bd651fce07b3bbd3a56) (.bmp, .png, .jpg)
+| Supported devices | [All](../../../../../docs/IE_DG/supported_plugins/Supported_Devices.md) |
+| Other language realization | [C++](../../../../samples/object_detection_sample_ssd/README.md), [Python](../../../python/sample/object_detection_sample_ssd/README.md) |
## How It Works
-Upon the start-up the sample application reads command line parameters and loads a network and an image to the Inference
-Engine device. When inference is done, the application creates output images and outputs data to the standard output stream.
+Upon the start-up the sample application reads command line parameters, loads specified network and image(s) to the Inference
+Engine plugin. Then, the sample creates an synchronous inference request object. When inference is done, the application creates output image(s) and output data to the standard output stream.
+
+You can see the explicit description of
+each sample step at [Integration Steps](../../../../../docs/IE_DG/Integrate_with_customer_application_new_API.md) section of "Integrate the Inference Engine with Your Application" guide.
-> **NOTE**: By default, Inference Engine samples and demos expect input with BGR channels order. If you trained your model to work with RGB order, you need to manually rearrange the default channels order in the sample or demo application or reconvert your model using the Model Optimizer tool with `--reverse_input_channels` argument specified. For more information about the argument, refer to **When to Reverse Input Channels** section of [Converting a Model Using General Conversion Parameters](../../../../../docs/MO_DG/prepare_model/convert_model/Converting_Model_General.md).
+## Building
-> **NOTE**: This sample uses `ie_network_reshape()` to set the batch size. While supported by SSD networks, reshape may not work with arbitrary topologies. See [Shape Inference Guide](https://docs.openvinotoolkit.org/latest/openvino_docs_IE_DG_ShapeInference.html) for more info.
+To build the sample, please use instructions available at [Build the Sample Applications](../../../../../docs/IE_DG/Samples_Overview.md) section in Inference Engine Samples guide.
## Running
+To run the sample, you need specify a model and image:
+
+- you can use [public](@ref omz_models_public_index) or [Intel's](@ref omz_models_intel_index) pre-trained models from the Open Model Zoo. The models can be downloaded using the [Model Downloader](@ref omz_tools_downloader_README).
+- you can use images from the media files collection available at https://storage.openvinotoolkit.org/data/test_data.
+
Running the application with the -h option yields the following usage message:
```sh
@@ -28,39 +57,43 @@ object_detection_sample_ssd_c [OPTION]
Options:
-h Print a usage message.
- -i "" Required. Path to one or more .bmp images.
-m "" Required. Path to an .xml file with a trained model.
- -l "" Required for CPU custom layers. Absolute path to a shared library with the kernels implementations.
+ -i "" Required. Path to one or more images or folder with images.
+ -l "" Required for CPU plugin custom layers. Absolute path to a shared library with the kernels implementations.
Or
- -c "" Required for GPU custom kernels. Absolute path to the .xml file with the kernels descriptions.
- -d "" Optional. Specify the target device to infer on (the list of available devices is shown below). Default value is CPU. Use "-d HETERO:" format to specify HETERO plugin. Sample will look for a suitable plugin for device specified
+ -c "" Required for GPU, MYRIAD, HDDL custom kernels. Absolute path to the .xml config file
+ with the kernels descriptions.
+ -d "" Optional. Specify the target device to infer. Default value is CPU.
+ Use "-d HETERO:" format to specify HETERO plugin. Sample will look for a suitable plugin for device specified
-g Path to the configuration file. Default value: "config".
```
-Running the application with the empty list of options yields the usage message given above and an error message.
-
-To run the sample, you can use [public](@ref omz_models_public_index) or [Intel's](@ref omz_models_intel_index) pre-trained models from the Open Model Zoo. The models can be downloaded using the [Model Downloader](@ref omz_tools_downloader_README).
-
-> **NOTE**: Before running the sample with a trained model, make sure the model is converted to the Inference Engine format (\*.xml + \*.bin) using the [Model Optimizer tool](../../../../../docs/MO_DG/Deep_Learning_Model_Optimizer_DevGuide.md).
+> **NOTES**:
>
-> The sample accepts models in ONNX format (.onnx) that do not require preprocessing.
+> - By default, Inference Engine samples and demos expect input with BGR channels order. If you trained your model to work with RGB order, you need to manually rearrange the default channels order in the sample or demo application or reconvert your model using the Model Optimizer tool with `--reverse_input_channels` argument specified. For more information about the argument, refer to **When to Reverse Input Channels** section of [Converting a Model Using General Conversion Parameters](../../../../../docs/MO_DG/prepare_model/convert_model/Converting_Model_General.md).
+>
+> - Before running the sample with a trained model, make sure the model is converted to the Inference Engine format (\*.xml + \*.bin) using the [Model Optimizer tool](../../../../../docs/MO_DG/Deep_Learning_Model_Optimizer_DevGuide.md).
+>
+> - The sample accepts models in ONNX format (.onnx) that do not require preprocessing.
For example, to do inference on a CPU with the OpenVINO™ toolkit person detection SSD models, run one of the following commands:
+- with one image and [person-detection-retail-0013](https://docs.openvinotoolkit.org/latest/omz_models_intel_person_detection_retail_0013_description_person_detection_retail_0013.html) model
+
```sh
-./object_detection_sample_ssd_c -i /inputImage.bmp -m person-detection-retail-0013.xml -d CPU
+./object_detection_sample_ssd_c -i /inputImage.bmp -m /person-detection-retail-0013.xml -d CPU
```
-or
+- with some images and [person-detection-retail-0013](https://docs.openvinotoolkit.org/latest/omz_models_intel_person_detection_retail_0013_description_person_detection_retail_0013.html) model
```sh
-./object_detection_sample_ssd_c -i /inputImage1.bmp /inputImage2.bmp ... -m person-detection-retail-0013.xml -d CPU
+./object_detection_sample_ssd_c -i /inputImage1.bmp /inputImage2.bmp ... -m /person-detection-retail-0013.xml -d CPU
```
-or
+- with [person-detection-retail-0002](https://docs.openvinotoolkit.org/latest/omz_models_intel_person_detection_retail_0002_description_person_detection_retail_0002.html) model
```sh
-./object_detection_sample_ssd_c -i /inputImage.jpg -m person-detection-retail-0002.xml -d CPU
+./object_detection_sample_ssd_c -i -m /person-detection-retail-0002.xml -d CPU
```
## Sample Output
@@ -68,7 +101,59 @@ or
The application outputs several images (`out_0.bmp`, `out_1.bmp`, ... ) with detected objects enclosed in rectangles. It outputs the list of
classes of the detected objects along with the respective confidence values and the coordinates of the rectangles to the standard output stream.
+```sh
+object_detection_sample_ssd_c -m person-detection-retail-0013.xml -i image_1.png image_2.jpg
+
+[ INFO ] InferenceEngine:
+
+[ INFO ] Parsing input parameters
+[ INFO ] Files were added: 2
+[ INFO ] image_1.png
+[ INFO ] image_2.jpg
+[ INFO ] Loading Inference Engine
+[ INFO ] Device info:
+ CPU
+ MKLDNNPlugin version .........
+ Build .........
+[ INFO ] Loading network:
+ person-detection-retail-0013.xml
+[ INFO ] Preparing input blobs
+[ WARNING ] Image is resized from (1699, 960) to (544, 320)
+[ WARNING ] Image is resized from (614, 346) to (544, 320)
+[ INFO ] Batch size is 2
+[ INFO ] Preparing output blobs
+[ INFO ] Loading model to the device
+[ INFO ] Create infer request
+[ INFO ] Start inference
+[ INFO ] Processing output blobs
+[0, 1] element, prob = 0.999090 (370, 201)-(634, 762) batch id : 0 WILL BE PRINTED!
+[1, 1] element, prob = 0.997386 (836, 192)-(999, 663) batch id : 0 WILL BE PRINTED!
+[2, 1] element, prob = 0.314753 (192, 2)-(265, 172) batch id : 0
+...
+[ INFO ] Image out_0.bmp created!
+[ INFO ] Image out_1.bmp created!
+[ INFO ] Execution successful
+
+This sample is an API example, for any performance measurements please use the dedicated benchmark_app tool
+```
## See Also
-* [Model Optimizer](../../../../../docs/MO_DG/Deep_Learning_Model_Optimizer_DevGuide.md)
-* [Model Downloader](@ref omz_tools_downloader_README)
+
+- [Integrate the Inference Engine with Your Application](../../../../../docs/IE_DG/Integrate_with_customer_application_new_API.md)
+- [Using Inference Engine Samples](../../../../../docs/IE_DG/Samples_Overview.md)
+- [Model Downloader](@ref omz_tools_downloader_README)
+- [Model Optimizer](../../../../../docs/MO_DG/Deep_Learning_Model_Optimizer_DevGuide.md)
+
+[ie_infer_request_infer_async]:https://docs.openvinotoolkit.org/latest/ie_c_api/group__InferRequest.html#gad2351010e292b6faec959a3d5a8fb60e
+[ie_infer_request_wait]:https://docs.openvinotoolkit.org/latest/ie_c_api/group__InferRequest.html#ga0c05e63e63c8d9cdd92900e82b0137c9
+[ie_c_api_version]:https://docs.openvinotoolkit.org/latest/ie_c_api/ie__c__api_8h.html#a8fe3efe9cc606dcc7bec203102043e68
+[ie_core_get_versions]:https://docs.openvinotoolkit.org/latest/ie_c_api/group__Core.html#ga2932e188a690393f5d594572ac5d237b
+[ie_core_add_extension]:https://docs.openvinotoolkit.org/latest/ie_c_api/group__Core.html#gadded2444ba81d2d396516b72c2478f8e
+[ie_core_set_config]:https://docs.openvinotoolkit.org/latest/ie_c_api/group__Core.html#gaf09d1e77cc264067e4e22ddf99f21ec1
+[ie_network_get_inputs_number]:https://docs.openvinotoolkit.org/latest/ie_c_api/group__Network.html#ga6a3349bca66c4ba8b41a434061fccf52
+[ie_network_get_input_dims]:https://docs.openvinotoolkit.org/latest/ie_c_api/group__Network.html#gac621a654b89d413041cbc2288627f6a5
+[ie_network_get_input_shapes]:https://docs.openvinotoolkit.org/latest/ie_c_api/group__Network.html#ga5409734f25ffbb1379e876217c0bc6f3
+[ie_network_get_outputs_number]:https://docs.openvinotoolkit.org/latest/ie_c_api/group__Network.html#ga869b8c309797f1e09f73ddffd1b57509
+[ie_network_get_output_dims]:https://docs.openvinotoolkit.org/latest/ie_c_api/group__Network.html#ga8de7bf2f626f19eba08a2f043fc1b5d2
+[ie_network_reshape]:https://docs.openvinotoolkit.org/latest/ie_c_api/group__Network.html#gac4f690afd0c2221f7db2ff9be4aa0637
+[ie_blob_get_buffer]:https://docs.openvinotoolkit.org/latest/ie_c_api/group__Blob.html#ga948e0186cea6a393c113d5c399cfcb4c
diff --git a/inference-engine/ie_bridges/c/samples/object_detection_sample_ssd/c_w_dirent.h b/inference-engine/ie_bridges/c/samples/object_detection_sample_ssd/c_w_dirent.h
index 56c7005b2ea937..c6dc9ee442de0f 100644
--- a/inference-engine/ie_bridges/c/samples/object_detection_sample_ssd/c_w_dirent.h
+++ b/inference-engine/ie_bridges/c/samples/object_detection_sample_ssd/c_w_dirent.h
@@ -42,10 +42,18 @@
#define S_ISREG(m) (((m) & S_IFMT) == S_IFREG)
#define S_ISDIR(m) (((m) & S_IFMT) == S_IFDIR)
+/// @brief structure to store directory names
typedef struct dirent {
char *d_name;
}dirent;
+/**
+* @brief Add directory to directory names struct
+* @param int argc - count of args
+* @param char *argv[] - array values of args
+* @param char *opts - array of options
+* @return pointer to directory names struct
+*/
static dirent *createDirent(const wchar_t *wsFilePath) {
dirent *d = (dirent *)malloc(sizeof(dirent));
size_t i;
@@ -55,6 +63,11 @@ static dirent *createDirent(const wchar_t *wsFilePath) {
return d;
}
+/**
+* @brief Free directory names struct
+* @param point to directory names structure
+* @return none
+*/
static void freeDirent(dirent **d) {
free((*d)->d_name);
(*d)->d_name = NULL;
@@ -62,12 +75,19 @@ static void freeDirent(dirent **d) {
*d = NULL;
}
+/// @brief structure to store directory data (files meta)
typedef struct DIR {
WIN32_FIND_DATAA FindFileData;
HANDLE hFind;
dirent *next;
}DIR;
+/**
+* @brief Compare two string, second string is the end of the first
+* @param string to compare
+* @param end string to find
+* @return status 1(success) or 0(fail)
+*/
static int endsWith(const char *src, const char *with) {
int wl = (int)(strlen(with));
int so = (int)(strlen(with)) - wl;
@@ -77,6 +97,12 @@ static int endsWith(const char *src, const char *with) {
else
return 0;
}
+
+/**
+* @brief Check file handler is valid
+* @param struct of directory data
+* @return status 1(success) or 0(fail)
+*/
static int isValid(DIR* dp) {
if (dp->hFind != INVALID_HANDLE_VALUE && dp->FindFileData.dwReserved0) {
return 1;
@@ -84,6 +110,12 @@ static int isValid(DIR* dp) {
return 0;
}
}
+
+/**
+* @brief Create directory data struct element
+* @param string directory path
+* @return pointer to directory data struct element
+*/
static DIR *opendir(const char *dirPath) {
DIR *dp = (DIR *)malloc(sizeof(DIR));
dp->next = NULL;
@@ -103,6 +135,11 @@ static DIR *opendir(const char *dirPath) {
return dp;
}
+/**
+* @brief Walk throw directory data struct
+* @param pointer to directory data struct
+* @return pointer to directory data struct next element
+*/
static struct dirent *readdir(DIR *dp) {
if (dp->next != NULL) freeDirent(&(dp->next));
@@ -117,6 +154,11 @@ static struct dirent *readdir(DIR *dp) {
return dp->next;
}
+/**
+* @brief Remove directory data struct
+* @param pointer to struct directory data
+* @return none
+*/
static void closedir(DIR *dp){
if (dp->next) {
freeDirent(&(dp->next));
diff --git a/inference-engine/ie_bridges/c/samples/object_detection_sample_ssd/main.c b/inference-engine/ie_bridges/c/samples/object_detection_sample_ssd/main.c
index 33099ac95e8e27..a482632399f481 100644
--- a/inference-engine/ie_bridges/c/samples/object_detection_sample_ssd/main.c
+++ b/inference-engine/ie_bridges/c/samples/object_detection_sample_ssd/main.c
@@ -6,6 +6,7 @@
#include
#include
#include
+
#include
#include "object_detection_sample_ssd.h"
#include
@@ -21,8 +22,8 @@
static const char *img_msg = NULL;
static const char *input_model = NULL;
static const char *device_name = "CPU";
-static const char *custom_cldnn_msg = NULL;
-static const char *custom_cpu_library_msg = NULL;
+static const char *custom_plugin_cfg_msg = NULL;
+static const char *custom_ex_library_msg = NULL;
static const char *config_msg = NULL;
static int file_num = 0;
static char **file_paths = NULL;
@@ -30,6 +31,12 @@ static char **file_paths = NULL;
const char *info = "[ INFO ] ";
const char *warn = "[ WARNING ] ";
+/**
+* @brief Parse and check command line arguments
+* @param int argc - count of args
+* @param char *argv[] - array values of args
+* @return int - status 1(success) or -1(fail)
+*/
int ParseAndCheckCommandLine(int argc, char *argv[]) {
int opt = 0;
int help = 0;
@@ -53,12 +60,12 @@ int ParseAndCheckCommandLine(int argc, char *argv[]) {
device_name = optarg;
break;
case 'c':
- custom_cldnn_msg = optarg;
+ custom_plugin_cfg_msg = optarg;
break;
case 'l':
- custom_cpu_library_msg = optarg;
+ custom_ex_library_msg = optarg;
break;
- case 'f':
+ case 'g':
config_msg = optarg;
break;
default:
@@ -69,11 +76,11 @@ int ParseAndCheckCommandLine(int argc, char *argv[]) {
if (help)
return -1;
if (input_model == NULL) {
- printf("Model is required but not set. Please set -m option. \n");
+ printf("Model is required but not set. Please set -m option.\n");
return -1;
}
if (img_msg == NULL) {
- printf("Input is required but not set.Please set - i option.\n");
+ printf("Input is required but not set.Please set -i option.\n");
return -1;
}
@@ -138,15 +145,6 @@ void readInputFilesArgument(const char *arg) {
}
file_paths[file_num++] = file_path;
}
-
- if (file_num) {
- printf("%sFiles were added: %d\n", info, file_num);
- for (i = 0; i < file_num; ++i) {
- printf("%s %s\n", info, file_paths[i]);
- }
- } else {
- printf("%sFiles were added: %d. Too many to display each of them.\n", info, file_num);
- }
}
/**
@@ -168,10 +166,19 @@ void parseInputFilesArguments(int argc, char **argv) {
}
readInputFilesArgument(argv[i]);
}
+
+ if (file_num) {
+ printf("%sFiles were added: %d\n", info, file_num);
+ for (i = 0; i < file_num; ++i) {
+ printf("%s %s\n", info, file_paths[i]);
+ }
+ } else {
+ printf("%sFiles were added: %d. Too many to display each of them.\n", info, file_num);
+ }
}
/**
-* @brief Convert the contents of configuration file to the ie_config_t type.
+* @brief Convert the contents of configuration file to the ie_config_t struct.
* @param config_file File path.
* @param comment Separator symbol.
* @return A pointer to the ie_config_t instance.
@@ -274,11 +281,14 @@ void int2str(char *str, int num) {
int main(int argc, char **argv) {
/** This sample covers certain topology and cannot be generalized for any object detection one **/
+ // ------------------------------ Get Inference Engine API version ---------------------------------
ie_version_t version = ie_c_api_version();
printf("%sInferenceEngine: \n", info);
printf("%s\n", version.api_version);
ie_version_free(&version);
+ // ------------------------------ Parsing and validation of input args ---------------------------------
+
char **argv_temp =(char **)calloc(argc, sizeof(char *));
if (!argv_temp) {
return EXIT_FAILURE;
@@ -296,14 +306,13 @@ int main(int argc, char **argv) {
ie_infer_request_t *infer_request = NULL;
ie_blob_t *imageInput = NULL, *output_blob = NULL;
- // --------------------------- 1. Parsing and validation of input args ---------------------------------
if (ParseAndCheckCommandLine(argc, argv) < 0) {
free(argv_temp);
return EXIT_FAILURE;
}
// -----------------------------------------------------------------------------------------------------
- // --------------------------- 2. Read input -----------------------------------------------------------
+ // --------------------------- Read input -----------------------------------------------------------
/** This file_paths stores paths to the processed images **/
parseInputFilesArguments(argc, argv_temp);
if (!file_num) {
@@ -313,12 +322,14 @@ int main(int argc, char **argv) {
}
// -----------------------------------------------------------------------------------------------------
- // --------------------------- 3. Load inference engine ------------------------------------------------
+ // --------------------------- Step 1. Initialize inference engine core -------------------------------------
+
printf("%sLoading Inference Engine\n", info);
IEStatusCode status = ie_core_create("", &core);
if (status != OK)
goto err;
+ // ------------------------------ Get Available Devices ------------------------------------------------------
ie_core_versions_t ver;
printf("%sDevice info: \n", info);
status = ie_core_get_versions(core, device_name, &ver);
@@ -331,25 +342,25 @@ int main(int argc, char **argv) {
}
ie_core_versions_free(&ver);
- if (custom_cpu_library_msg) {
- // CPU(MKLDNN) extensions are loaded as a shared library and passed as a pointer to base extension
- status = ie_core_add_extension(core, custom_cpu_library_msg, "CPU");
+ if (custom_ex_library_msg) {
+ // Custom CPU extension is loaded as a shared library and passed as a pointer to base extension
+ status = ie_core_add_extension(core, custom_ex_library_msg, "CPU");
if (status != OK)
goto err;
- printf("%sCPU Extension loaded: %s\n", info, custom_cpu_library_msg);
+ printf("%sCustom extension loaded: %s\n", info, custom_ex_library_msg);
}
- if (custom_cldnn_msg) {
- // clDNN Extensions are loaded from an .xml description and OpenCL kernel files
- ie_config_t cfg = {"CONFIG_FILE", custom_cldnn_msg, NULL};
- status = ie_core_set_config(core, &cfg, "GPU");
+ if (custom_plugin_cfg_msg && (device_name == "GPU" || device_name == "MYRIAD" || device_name == "HDDL")) {
+ // Config for device plugin custom extension is loaded from an .xml description
+ ie_config_t cfg = {"CONFIG_FILE", custom_plugin_cfg_msg, NULL};
+ status = ie_core_set_config(core, &cfg, device_name);
if (status != OK)
goto err;
- printf("%sGPU Extension loaded: %s\n", info, custom_cldnn_msg);
+ printf("%sConfig for device plugin custom extension loaded: %s\n", info, custom_plugin_cfg_msg);
}
// -----------------------------------------------------------------------------------------------------
- // 4. Read a model in OpenVINO Intermediate Representation (.xml and .bin files) or ONNX (.onnx file) format
+ // Step 2. Read a model in OpenVINO Intermediate Representation (.xml and .bin files) or ONNX (.onnx file) format
printf("%sLoading network:\n", info);
printf("\t%s\n", input_model);
status = ie_core_read_network(core, input_model, NULL, &network);
@@ -357,7 +368,8 @@ int main(int argc, char **argv) {
goto err;
// -----------------------------------------------------------------------------------------------------
- // --------------------------- 5. Prepare input blobs --------------------------------------------------
+ // --------------------------- Step 3. Configure input & output ---------------------------------------------
+ // --------------------------- Prepare input blobs -----------------------------------------------------
printf("%sPreparing input blobs\n", info);
/** SSD network has one input and one output **/
@@ -494,9 +506,8 @@ int main(int argc, char **argv) {
size_t batchSize = shapes2.shapes[0].shape.dims[0];
ie_network_input_shapes_free(&shapes2);
printf("%sBatch size is %zu\n", info, batchSize);
- // -----------------------------------------------------------------------------------------------------
- // --------------------------- 6. Prepare output blobs -------------------------------------------------
+ // --------------------------- Prepare output blobs ----------------------------------------------------
printf("%sPreparing output blobs\n", info);
size_t output_num = 0;
@@ -534,7 +545,7 @@ int main(int argc, char **argv) {
goto err;
// -----------------------------------------------------------------------------------------------------
- // --------------------------- 7. Loading model to the device ------------------------------------------
+ // --------------------------- Step 4. Loading model to the device ------------------------------------------
printf("%sLoading model to the device\n", info);
if (config_msg) {
ie_config_t * config = parseConfig(config_msg, '#');
@@ -552,15 +563,14 @@ int main(int argc, char **argv) {
// -----------------------------------------------------------------------------------------------------
- // --------------------------- 8. Create infer request -------------------------------------------------
+ // --------------------------- Step 5. Create infer request -------------------------------------------------
printf("%sCreate infer request\n", info);
status = ie_exec_network_create_infer_request(exe_network, &infer_request);
if (status != OK)
goto err;
// -----------------------------------------------------------------------------------------------------
- // --------------------------- 9. Prepare input --------------------------------------------------------
-
+ // --------------------------- Step 6. Prepare input --------------------------------------------------------
/** Creating input blob **/
status = ie_infer_request_get_blob(infer_request, imageInputName, &imageInput);
@@ -624,7 +634,7 @@ int main(int argc, char **argv) {
}
// -----------------------------------------------------------------------------------------------------
- // --------------------------- 10. Do inference ---------------------------------------------------------
+ // --------------------------- Step 7. Do inference --------------------------------------------------------
printf("%sStart inference\n", info);
status = ie_infer_request_infer_async(infer_request);
status |= ie_infer_request_wait(infer_request, -1);
@@ -632,7 +642,7 @@ int main(int argc, char **argv) {
goto err;
// -----------------------------------------------------------------------------------------------------
- // --------------------------- 11. Process output -------------------------------------------------------
+ // --------------------------- Step 8. Process output ------------------------------------------------------
printf("%sProcessing output blobs\n", info);
status = ie_infer_request_get_blob(infer_request, output_name, &output_blob);
@@ -706,6 +716,7 @@ int main(int argc, char **argv) {
// -----------------------------------------------------------------------------------------------------
printf("%sExecution successful\n", info);
+ printf("\nThis sample is an API example, for any performance measurements please use the dedicated benchmark_app tool\n");
for (i = 0; i < image_num; ++i) {
free(classes[i]);
diff --git a/inference-engine/ie_bridges/c/samples/object_detection_sample_ssd/object_detection_sample_ssd.h b/inference-engine/ie_bridges/c/samples/object_detection_sample_ssd/object_detection_sample_ssd.h
index c097aaf968b8cb..e8e1c126e797e8 100644
--- a/inference-engine/ie_bridges/c/samples/object_detection_sample_ssd/object_detection_sample_ssd.h
+++ b/inference-engine/ie_bridges/c/samples/object_detection_sample_ssd/object_detection_sample_ssd.h
@@ -13,19 +13,19 @@ static const char *help_message = "Print a usage message.";
static const char* model_message = "Required. Path to an .xml file with a trained model.";
/// @brief message for images argument
-static const char *image_message = "Required. Path to one or more .bmp images.";
+static const char *image_message = "Required. Path to one or more images or folder with images.";
/// @brief message for assigning cnn calculation to device
-static const char *target_device_message = "Optional. Specify the target device to infer on (the list of available devices is shown below). " \
+static const char *target_device_message = "Optional. Specify the target device to infer. " \
"Default value is CPU. Use \"-d HETERO:\" format to specify HETERO plugin. " \
-"Sample will look for a suitable plugin for device specified";
+"Sample will look for a suitable plugin for device specified.";
-/// @brief message for clDNN custom kernels desc
-static const char *custom_cldnn_message = "Required for GPU custom kernels. "\
-"Absolute path to the .xml file with the kernels descriptions.";
+/// @brief message for plugin custom kernels desc
+static const char *custom_plugin_config_message = "Required for GPU, MYRIAD, HDDL custom kernels. "\
+"Absolute path to the .xml config file with the kernels descriptions.";
-/// @brief message for user library argument
-static const char *custom_cpu_library_message = "Required for CPU custom layers. " \
+/// @brief message for user extension library argument
+static const char *custom_ex_library_message = "Required for CPU plugin custom layers. " \
"Absolute path to a shared library with the kernels implementations.";
/// @brief message for config argument
@@ -34,14 +34,14 @@ static const char *config_message = "Path to the configuration file. Default val
* \brief This function show a help message
*/
static void showUsage() {
- printf("\nobject_detection_sample_ssd [OPTION]\n");
+ printf("\nobject_detection_sample_ssd_c [OPTION]\n");
printf("Options:\n\n");
printf(" -h %s\n", help_message);
printf(" -m \"\" %s\n", model_message);
printf(" -i \"\" %s\n", image_message);
- printf(" -l \"\" %s\n", custom_cpu_library_message);
+ printf(" -l \"\" %s\n", custom_ex_library_message);
printf(" Or\n");
- printf(" -c \"\" %s\n", custom_cldnn_message);
+ printf(" -c \"\" %s\n", custom_plugin_config_message);
printf(" -d \"\" %s\n", target_device_message);
printf(" -g %s\n", config_message);
}
@@ -58,6 +58,13 @@ char *optarg;
fputc(c, stderr);\
fputs("\'\n", stderr);}
+/**
+* @brief Check command line arguments with available options
+* @param int argc - count of args
+* @param char *argv[] - array values of args
+* @param char *opts - array of options
+* @return option name or -1(fail)
+*/
static int getopt(int argc, char **argv, char *opts) {
static int sp = 1;
register int c = 0;
diff --git a/inference-engine/ie_bridges/python/src/openvino/inference_engine/ie_api_impl.hpp b/inference-engine/ie_bridges/python/src/openvino/inference_engine/ie_api_impl.hpp
index 29cb1acfd61fe4..0faab4baccd721 100644
--- a/inference-engine/ie_bridges/python/src/openvino/inference_engine/ie_api_impl.hpp
+++ b/inference-engine/ie_bridges/python/src/openvino/inference_engine/ie_api_impl.hpp
@@ -23,6 +23,7 @@
#include
#include
+#include
typedef std::chrono::high_resolution_clock Time;
typedef std::chrono::nanoseconds ns;
diff --git a/inference-engine/ie_bridges/python/tests/test_Blob.py b/inference-engine/ie_bridges/python/tests/test_Blob.py
index 91c16102b5046b..1f0d00519ab02c 100644
--- a/inference-engine/ie_bridges/python/tests/test_Blob.py
+++ b/inference-engine/ie_bridges/python/tests/test_Blob.py
@@ -39,7 +39,22 @@ def test_get_buffer():
blob = Blob(tensor_desc, array)
assert np.array_equal(blob.buffer, array)
-def write_to_buffer(precision, numpy_precision):
+
+@pytest.mark.parametrize("precision, numpy_precision", [
+ ("FP32", np.float32),
+ ("FP64", np.float64),
+ ("FP16", np.float16),
+ ("I8", np.int8),
+ ("U8", np.uint8),
+ ("I32", np.int32),
+ ("I16", np.int16),
+ ("U16", np.uint16),
+ ("I64", np.int64),
+ ("BOOL", np.uint8),
+ ("BIN", np.int8),
+ ("BF16", np.float16),
+])
+def test_write_to_buffer(precision, numpy_precision):
tensor_desc = TensorDesc(precision, [1, 3, 127, 127], "NCHW")
array = np.zeros(shape=(1, 3, 127, 127), dtype=numpy_precision)
blob = Blob(tensor_desc, array)
@@ -47,53 +62,6 @@ def write_to_buffer(precision, numpy_precision):
blob.buffer[:] = ones_arr
assert np.array_equal(blob.buffer, ones_arr)
-def test_write_to_buffer_fp32():
- write_to_buffer("FP32", np.float32)
-
-
-def test_write_to_buffer_fp64():
- write_to_buffer("FP64", np.float64)
-
-
-def test_write_to_buffer_fp16():
- write_to_buffer("FP16", np.float16)
-
-
-def test_write_to_buffer_int8():
- write_to_buffer("I8", np.int8)
-
-
-def test_write_to_buffer_uint8():
- write_to_buffer("U8", np.uint8)
-
-
-def test_write_to_buffer_int32():
- write_to_buffer("I32", np.int32)
-
-
-def test_write_to_buffer_int16():
- write_to_buffer("I16", np.int16)
-
-
-def test_write_to_buffer_uint16():
- write_to_buffer("U16", np.uint16)
-
-
-def test_write_to_buffer_int64():
- write_to_buffer("I64", np.int64)
-
-
-def test_write_to_buffer_bool():
- write_to_buffer("BOOL", np.uint8)
-
-
-def test_write_to_buffer_bin():
- write_to_buffer("BIN", np.int8)
-
-
-def test_write_to_buffer_bf16():
- write_to_buffer("BF16", np.float16)
-
def test_write_numpy_scalar_int64():
tensor_desc = TensorDesc("I64", [], "SCALAR")
diff --git a/inference-engine/ie_bridges/python/tests/test_ExecutableNetwork.py b/inference-engine/ie_bridges/python/tests/test_ExecutableNetwork.py
index d722608696cd82..2193a6501c24bb 100644
--- a/inference-engine/ie_bridges/python/tests/test_ExecutableNetwork.py
+++ b/inference-engine/ie_bridges/python/tests/test_ExecutableNetwork.py
@@ -234,10 +234,11 @@ def test_plugin_accessible_after_deletion(device):
del ie_core
-@pytest.mark.skipif(os.environ.get("TEST_DEVICE", "CPU") == "ARM",
- reason=f"Cannot run test on device {os.environ.get('TEST_DEVICE')}")
def test_exec_graph(device):
ie_core = ie.IECore()
+ if device == "CPU":
+ if ie_core.get_metric(device, "FULL_DEVICE_NAME") == "arm_compute::NEON":
+ pytest.skip("Can't run on ARM plugin due-to get_exec_graph_info method isn't implemented")
net = ie_core.read_network(model=test_net_xml, weights=test_net_bin)
exec_net = ie_core.load_network(net, device)
img = read_image()
@@ -294,9 +295,11 @@ def test_get_metric(device):
assert network_name == "test_model"
-@pytest.mark.skipif(os.environ.get("TEST_DEVICE", "CPU") != "CPU", reason="Device independent test")
+@pytest.mark.skipif(os.environ.get("TEST_DEVICE", "CPU") != "CPU", reason="Device dependent test")
def test_get_config(device):
ie_core = ie.IECore()
+ if ie_core.get_metric(device, "FULL_DEVICE_NAME") == "arm_compute::NEON":
+ pytest.skip("Can't run on ARM plugin due-to CPU dependent test")
net = ie_core.read_network(model=test_net_xml, weights=test_net_bin)
exec_net = ie_core.load_network(net, device)
config = exec_net.get_config("PERF_COUNT")
diff --git a/inference-engine/ie_bridges/python/tests/test_IECore.py b/inference-engine/ie_bridges/python/tests/test_IECore.py
index 2684def8b41752..41d28f1c41b1f6 100644
--- a/inference-engine/ie_bridges/python/tests/test_IECore.py
+++ b/inference-engine/ie_bridges/python/tests/test_IECore.py
@@ -4,7 +4,6 @@
import os
import pytest
from sys import platform
-import numpy as np
from pathlib import Path
from openvino.inference_engine import IENetwork, IECore, ExecutableNetwork
@@ -61,8 +60,11 @@ def test_load_network_wrong_device():
def test_query_network(device):
- import ngraph as ng
ie = IECore()
+ if device == "CPU":
+ if ie.get_metric(device, "FULL_DEVICE_NAME") == "arm_compute::NEON":
+ pytest.skip("Can't run on ARM plugin due-to ngraph")
+ import ngraph as ng
net = ie.read_network(model=test_net_xml, weights=test_net_bin)
query_res = ie.query_network(net, device)
func_net = ng.function_from_cnn(net)
@@ -73,18 +75,22 @@ def test_query_network(device):
assert next(iter(set(query_res.values()))) == device, "Wrong device for some layers"
-@pytest.mark.skipif(os.environ.get("TEST_DEVICE", "CPU") != "CPU", reason="Device independent test")
+@pytest.mark.skipif(os.environ.get("TEST_DEVICE", "CPU") != "CPU", reason="Device dependent test")
def test_register_plugin():
ie = IECore()
+ if ie.get_metric("CPU", "FULL_DEVICE_NAME") == "arm_compute::NEON":
+ pytest.skip("Can't run on ARM plugin due-to MKLDNNPlugin specific test")
ie.register_plugin("MKLDNNPlugin", "BLA")
net = ie.read_network(model=test_net_xml, weights=test_net_bin)
exec_net = ie.load_network(net, "BLA")
assert isinstance(exec_net, ExecutableNetwork), "Cannot load the network to the registered plugin with name 'BLA'"
-@pytest.mark.skipif(os.environ.get("TEST_DEVICE", "CPU") != "CPU", reason="Device independent test")
+@pytest.mark.skipif(os.environ.get("TEST_DEVICE", "CPU") != "CPU", reason="Device dependent test")
def test_register_plugins():
ie = IECore()
+ if ie.get_metric("CPU", "FULL_DEVICE_NAME") == "arm_compute::NEON":
+ pytest.skip("Can't run on ARM plugin due-to MKLDNNPlugin specific test")
if platform == "linux" or platform == "linux2":
ie.register_plugins(plugins_xml)
elif platform == "darwin":
@@ -126,11 +132,12 @@ def test_get_metric_list_of_str():
"metric are strings!"
-
@pytest.mark.skipif(os.environ.get("TEST_DEVICE", "CPU") != "CPU",
reason=f"Cannot run test on device {os.environ.get('TEST_DEVICE')}, Plugin specific test")
def test_get_metric_tuple_of_two_ints():
ie = IECore()
+ if ie.get_metric("CPU", "FULL_DEVICE_NAME") == "arm_compute::NEON":
+ pytest.skip("Can't run on ARM plugin due-to unsupported device metric")
param = ie.get_metric("CPU", "RANGE_FOR_STREAMS")
assert isinstance(param, tuple), "Parameter value for 'RANGE_FOR_STREAMS' " \
f"metric must be tuple but {type(param)} is returned"
@@ -142,6 +149,8 @@ def test_get_metric_tuple_of_two_ints():
reason=f"Cannot run test on device {os.environ.get('TEST_DEVICE')}, Plugin specific test")
def test_get_metric_tuple_of_three_ints():
ie = IECore()
+ if ie.get_metric("CPU", "FULL_DEVICE_NAME") == "arm_compute::NEON":
+ pytest.skip("Can't run on ARM plugin due-to unsupported device metric")
param = ie.get_metric("CPU", "RANGE_FOR_ASYNC_INFER_REQUESTS")
assert isinstance(param, tuple), "Parameter value for 'RANGE_FOR_ASYNC_INFER_REQUESTS' " \
f"metric must be tuple but {type(param)} is returned"
@@ -185,21 +194,25 @@ def test_read_network_from_onnx():
net = ie.read_network(model=test_net_onnx)
assert isinstance(net, IENetwork)
+
def test_read_network_from_onnx_as_path():
ie = IECore()
net = ie.read_network(model=Path(test_net_onnx))
assert isinstance(net, IENetwork)
+
def test_read_network_from_prototxt():
ie = IECore()
net = ie.read_network(model=test_net_prototxt)
assert isinstance(net, IENetwork)
+
def test_read_network_from_prototxt_as_path():
ie = IECore()
net = ie.read_network(model=Path(test_net_prototxt))
assert isinstance(net, IENetwork)
+
def test_incorrect_xml():
ie = IECore()
with pytest.raises(Exception) as e:
diff --git a/inference-engine/ie_bridges/python/tests/test_IENetwork.py b/inference-engine/ie_bridges/python/tests/test_IENetwork.py
index 60e17268ede4e3..1c3474e689134d 100644
--- a/inference-engine/ie_bridges/python/tests/test_IENetwork.py
+++ b/inference-engine/ie_bridges/python/tests/test_IENetwork.py
@@ -4,7 +4,6 @@
import os
import pytest
import warnings
-import numpy as np
from openvino.inference_engine import IECore, IENetwork, DataPtr, InputInfoPtr, PreProcessInfo
from conftest import model_path
@@ -183,9 +182,12 @@ def test_batch_size_after_reshape():
assert net.input_info['data'].input_data.shape == [8, 3, 32, 32]
-def test_serialize():
- import ngraph as ng
+def test_serialize(device):
ie = IECore()
+ if device == "CPU":
+ if ie.get_metric(device, "FULL_DEVICE_NAME") == "arm_compute::NEON":
+ pytest.skip("Can't run on ARM plugin due-to ngraph")
+ import ngraph as ng
net = ie.read_network(model=test_net_xml, weights=test_net_bin)
net.serialize("./serialized_net.xml", "./serialized_net.bin")
serialized_net = ie.read_network(model="./serialized_net.xml", weights="./serialized_net.bin")
diff --git a/inference-engine/ie_bridges/python/tests/test_InferRequest.py b/inference-engine/ie_bridges/python/tests/test_InferRequest.py
index b2783f5cad9c10..5a534703598fc3 100644
--- a/inference-engine/ie_bridges/python/tests/test_InferRequest.py
+++ b/inference-engine/ie_bridges/python/tests/test_InferRequest.py
@@ -376,6 +376,9 @@ def execute(self, input_data):
def test_get_perf_counts(device):
ie_core = ie.IECore()
+ if device == "CPU":
+ if ie_core.get_metric(device, "FULL_DEVICE_NAME") == "arm_compute::NEON":
+ pytest.skip("Can't run on ARM plugin due-to ngraph")
net = ie_core.read_network(test_net_xml, test_net_bin)
ie_core.set_config({"PERF_COUNT": "YES"}, device)
exec_net = ie_core.load_network(net, device)
@@ -395,6 +398,8 @@ def test_get_perf_counts(device):
"Dynamic batch fully supported only on CPU")
def test_set_batch_size(device):
ie_core = ie.IECore()
+ if ie_core.get_metric(device, "FULL_DEVICE_NAME") == "arm_compute::NEON":
+ pytest.skip("Can't run on ARM plugin due-to dynamic batch isn't supported")
ie_core.set_config({"DYN_BATCH_ENABLED": "YES"}, device)
net = ie_core.read_network(test_net_xml, test_net_bin)
net.batch_size = 10
@@ -438,6 +443,9 @@ def test_set_negative_batch_size(device):
def test_blob_setter(device):
ie_core = ie.IECore()
+ if device == "CPU":
+ if ie_core.get_metric(device, "FULL_DEVICE_NAME") == "arm_compute::NEON":
+ pytest.skip("Can't run on ARM plugin")
net = ie_core.read_network(test_net_xml, test_net_bin)
exec_net_1 = ie_core.load_network(network=net, device_name=device, num_requests=1)
diff --git a/inference-engine/ie_bridges/python/wheel/meta/openvino-dev.requirements.txt b/inference-engine/ie_bridges/python/wheel/meta/openvino-dev.requirements.txt
index 9fbd32370e952c..487eac4ce12803 100644
--- a/inference-engine/ie_bridges/python/wheel/meta/openvino-dev.requirements.txt
+++ b/inference-engine/ie_bridges/python/wheel/meta/openvino-dev.requirements.txt
@@ -1,9 +1,9 @@
defusedxml>=0.5.0
scipy==1.5.4
jstyleson==0.0.2
-numpy==1.16.6
+numpy~=1.18.5
addict==2.2.1
-pandas==0.24.2
+pandas~=1.1.5
hyperopt==0.1.2
networkx==2.2
tqdm==4.31.1
@@ -11,17 +11,17 @@ texttable==1.6.3
py-cpuinfo!=5.0,!=6.0
PyYAML>=5.4.1
pillow>=8.1.0
-scikit-image
-scikit-learn
-yamlloader
-shapely
-nibabel
-pydicom
-sentencepiece
-tokenizers
-editdistance
-parasail
-fast-ctc-decode
-rawpy
-nltk
-opencv-python
+scikit-image>=0.17
+scikit-learn>=0.23
+yamlloader>=0.5
+shapely>=1.7
+nibabel>=3.1
+pydicom>=2.0
+sentencepiece>=0.1.91
+tokenizers>=0.8
+editdistance>=0.5
+parasail>=1.2
+fast-ctc-decode>=0.2
+rawpy>=0.15
+nltk>=3.5
+opencv-python>=4.4
diff --git a/inference-engine/ie_bridges/python/wheel/meta/openvino-dev.setup.cfg b/inference-engine/ie_bridges/python/wheel/meta/openvino-dev.setup.cfg
index 9b9012f08bb6c3..77c423416baa0c 100644
--- a/inference-engine/ie_bridges/python/wheel/meta/openvino-dev.setup.cfg
+++ b/inference-engine/ie_bridges/python/wheel/meta/openvino-dev.setup.cfg
@@ -1,25 +1,27 @@
[options]
-py_modules =
- mo
- mo_tf
- mo_caffe
- mo_mxnet
- mo_onnx
- mo_kaldi
+py_modules =
+ mo
+ mo_tf
+ mo_caffe
+ mo_mxnet
+ mo_onnx
+ mo_kaldi
[options.package_data]
- mo = *.txt
+ mo = *.txt
+ compression.configs.hardware = *.json
[options.entry_points]
console_scripts =
+ mo=mo.__main__:main
pot=app.run:main
accuracy_check=accuracy_checker.main:main
convert_annotation=accuracy_checker.annotation_converters.convert:main
[metadata]
license_files =
- readme*
- *LICENSE*
- *license*
- *third-party-programs*
- *EULA*
+ readme*
+ *LICENSE*
+ *license*
+ *third-party-programs*
+ *EULA*
diff --git a/inference-engine/include/cldnn/cldnn_config.hpp b/inference-engine/include/cldnn/cldnn_config.hpp
index 111eff4104f913..faf198517fc9d4 100644
--- a/inference-engine/include/cldnn/cldnn_config.hpp
+++ b/inference-engine/include/cldnn/cldnn_config.hpp
@@ -72,6 +72,11 @@ DECLARE_CLDNN_CONFIG_KEY(ENABLE_FP16_FOR_QUANTIZED_MODELS);
*/
DECLARE_CLDNN_CONFIG_KEY(NV12_TWO_INPUTS);
+/**
+* @brief This key sets the max number of host threads that can be used by GPU plugin on model loading.
+* Default value is maximum number of threads available in the environment.
+*/
+DECLARE_CLDNN_CONFIG_KEY(MAX_NUM_THREADS);
} // namespace CLDNNConfigParams
} // namespace InferenceEngine
diff --git a/inference-engine/include/cpp/ie_executable_network.hpp b/inference-engine/include/cpp/ie_executable_network.hpp
index 9716d40bacf2d9..94b84f66b528ea 100644
--- a/inference-engine/include/cpp/ie_executable_network.hpp
+++ b/inference-engine/include/cpp/ie_executable_network.hpp
@@ -16,18 +16,26 @@
#include "cpp/ie_cnn_network.h"
#include "cpp/ie_infer_request.hpp"
-#include "cpp/ie_memory_state.hpp"
-#include "ie_iexecutable_network.hpp"
-#include "details/ie_so_loader.h"
namespace InferenceEngine {
+namespace details {
+class SharedObjectLoader;
+}
+
+class IExecutableNetworkInternal;
+class IExecutableNetwork;
/**
* @brief This is an interface of an executable network
*/
class INFERENCE_ENGINE_API_CLASS(ExecutableNetwork) {
- IExecutableNetwork::Ptr actual;
- details::SharedObjectLoader::Ptr plg;
+ std::shared_ptr _impl;
+ std::shared_ptr _so;
+
+ explicit ExecutableNetwork(const std::shared_ptr& impl,
+ const std::shared_ptr& so);
+
+ friend class InferencePlugin;
public:
/**
@@ -40,14 +48,6 @@ class INFERENCE_ENGINE_API_CLASS(ExecutableNetwork) {
*/
~ExecutableNetwork();
- /**
- * @brief Constructs ExecutableNetwork from the initialized shared_pointer
- *
- * @param actual Initialized shared pointer
- * @param plg Plugin to use
- */
- explicit ExecutableNetwork(IExecutableNetwork::Ptr actual, details::SharedObjectLoader::Ptr plg = {});
-
/**
* @brief Gets the Executable network output Data node information.
*
@@ -74,10 +74,11 @@ class INFERENCE_ENGINE_API_CLASS(ExecutableNetwork) {
/**
* @brief reset owned object to new pointer.
*
- * Eessential for cases when simultaneously loaded networks not expected.
+ * Essential for cases when simultaneously loaded networks not expected.
* @param newActual actual pointed object
*/
- void reset(IExecutableNetwork::Ptr newActual);
+ INFERENCE_ENGINE_DEPRECATED("Will be removed")
+ void reset(std::shared_ptr newActual);
/**
* @brief Creates an inference request object used to infer the network.
@@ -94,6 +95,7 @@ class INFERENCE_ENGINE_API_CLASS(ExecutableNetwork) {
* Wraps IExecutableNetwork::CreateInferRequest.
* @return shared pointer on InferenceEngine::InferRequest object
*/
+ INFERENCE_ENGINE_DEPRECATED("Use CreateInferRequest")
InferRequest::Ptr CreateInferRequestPtr();
/**
@@ -118,7 +120,8 @@ class INFERENCE_ENGINE_API_CLASS(ExecutableNetwork) {
* @brief cast operator is used when this wrapper initialized by LoadNetwork
* @return A shared pointer to IExecutableNetwork interface.
*/
- operator IExecutableNetwork::Ptr&();
+ INFERENCE_ENGINE_DEPRECATED("Will be removed")
+ operator std::shared_ptr();
/**
* @copybrief IExecutableNetwork::GetExecGraphInfo
@@ -151,7 +154,7 @@ class INFERENCE_ENGINE_API_CLASS(ExecutableNetwork) {
* The method is responsible to extract information
* which affects executable network execution. The list of supported configuration values can be extracted via
* ExecutableNetwork::GetMetric with the SUPPORTED_CONFIG_KEYS key, but some of these keys cannot be changed
- * dymanically, e.g. DEVICE_ID cannot changed if an executable network has already been compiled for particular
+ * dynamically, e.g. DEVICE_ID cannot changed if an executable network has already been compiled for particular
* device.
*
* @param name config key, can be found in ie_plugin_config.hpp
@@ -178,9 +181,15 @@ class INFERENCE_ENGINE_API_CLASS(ExecutableNetwork) {
RemoteContext::Ptr GetContext() const;
/**
- * @brief A smart pointer to the ExecutableNetwork object
+ * @brief Checks if current ExecutableNetwork object is not initialized
+ * @return true if current ExecutableNetwork object is not initialized, false - otherwise
+ */
+ bool operator!() const noexcept;
+ /**
+ * @brief Checks if current ExecutableNetwork object is initialized
+ * @return true if current ExecutableNetwork object is initialized, false - otherwise
*/
- using Ptr = std::shared_ptr;
+ explicit operator bool() const noexcept;
};
} // namespace InferenceEngine
diff --git a/inference-engine/include/details/ie_so_loader.h b/inference-engine/include/details/ie_so_loader.h
index 6ba85360271ff1..aa1a82a2281a6d 100644
--- a/inference-engine/include/details/ie_so_loader.h
+++ b/inference-engine/include/details/ie_so_loader.h
@@ -46,7 +46,7 @@ class INFERENCE_ENGINE_API_CLASS(SharedObjectLoader) {
/**
* @brief A destructor
*/
- ~SharedObjectLoader() noexcept(false);
+ ~SharedObjectLoader();
/**
* @brief Searches for a function symbol in the loaded module
diff --git a/inference-engine/include/details/ie_so_pointer.hpp b/inference-engine/include/details/ie_so_pointer.hpp
index 3342d03af74b8c..b2926ee44ecae7 100644
--- a/inference-engine/include/details/ie_so_pointer.hpp
+++ b/inference-engine/include/details/ie_so_pointer.hpp
@@ -123,7 +123,7 @@ IE_SUPPRESS_DEPRECATED_END
}
explicit operator bool() const noexcept {
- return (nullptr != _so_loader) && (nullptr != _pointedObj);
+ return (nullptr != _pointedObj);
}
friend bool operator==(std::nullptr_t, const SOPointer& ptr) noexcept {
@@ -145,7 +145,7 @@ IE_SUPPRESS_DEPRECATED_END
return *this;
}
- operator std::shared_ptr() const noexcept {
+ operator const std::shared_ptr&() const noexcept {
return _so_loader;
}
diff --git a/inference-engine/include/gna/gna_config.hpp b/inference-engine/include/gna/gna_config.hpp
index 8b91de3b066e85..7273f5a0332ae0 100644
--- a/inference-engine/include/gna/gna_config.hpp
+++ b/inference-engine/include/gna/gna_config.hpp
@@ -43,12 +43,11 @@ namespace GNAConfigParams {
DECLARE_GNA_CONFIG_KEY(SCALE_FACTOR);
/**
-* @brief By default gna api work in Int16 precision, however this can be adjusted if necessary,
+* @brief By default gna api works with Int16 weights precision, however this can be adjusted if necessary,
* currently supported values are I16, I8
*/
DECLARE_GNA_CONFIG_KEY(PRECISION);
-
/**
* @brief if turned on, dump GNA firmware model into specified file
*/
diff --git a/inference-engine/include/ie_common.h b/inference-engine/include/ie_common.h
index 263579d62dd6db..d14f26e70e692a 100644
--- a/inference-engine/include/ie_common.h
+++ b/inference-engine/include/ie_common.h
@@ -281,6 +281,11 @@ struct QueryNetworkResult {
ResponseDesc resp;
};
+/**
+ * @brief A collection that contains string as key, and const Data smart pointer as value
+ */
+using ConstOutputsDataMap = std::map;
+
namespace details {
struct INFERENCE_ENGINE_DEPRECATED("Use InferRequest::Exception")
INFERENCE_ENGINE_API_CLASS(InferenceEngineException) : public std::runtime_error {
diff --git a/inference-engine/include/ie_iexecutable_network.hpp b/inference-engine/include/ie_iexecutable_network.hpp
index 04bbd2df052905..16c1e9d971e284 100644
--- a/inference-engine/include/ie_iexecutable_network.hpp
+++ b/inference-engine/include/ie_iexecutable_network.hpp
@@ -23,16 +23,11 @@
#include "ie_remote_context.hpp"
namespace InferenceEngine {
-
-/**
- * @brief A collection that contains string as key, and const Data smart pointer as value
- */
-using ConstOutputsDataMap = std::map;
-
/**
* @brief This is an interface of an executable network
*/
-class IExecutableNetwork : public std::enable_shared_from_this {
+class INFERENCE_ENGINE_DEPRECATED("Use InferenceEngine::ExecutableNetwork instead") IExecutableNetwork
+ : public std::enable_shared_from_this {
public:
/**
* @brief A smart pointer to the current IExecutableNetwork object
diff --git a/inference-engine/samples/benchmark_app/README.md b/inference-engine/samples/benchmark_app/README.md
index 084edf45a046f1..d3aa8b5e489134 100644
--- a/inference-engine/samples/benchmark_app/README.md
+++ b/inference-engine/samples/benchmark_app/README.md
@@ -74,49 +74,51 @@ InferenceEngine:
benchmark_app [OPTION]
Options:
- -h, --help Print a usage message
- -m "" Required. Path to an .xml/.onnx/.prototxt file with a trained model or to a .blob files with a trained compiled model.
- -i "" Optional. Path to a folder with images and/or binaries or to specific image or binary file.
- -d "" Optional. Specify a target device to infer on (the list of available devices is shown below). Default value is CPU.
- Use "-d HETERO:" format to specify HETERO plugin.
- Use "-d MULTI:" format to specify MULTI plugin.
+ -h, --help Print a usage message
+ -m "" Required. Path to an .xml/.onnx/.prototxt file with a trained model or to a .blob files with a trained compiled model.
+ -i "" Optional. Path to a folder with images and/or binaries or to specific image or binary file.
+ -d "" Optional. Specify a target device to infer on (the list of available devices is shown below). Default value is CPU.
+ Use "-d HETERO:" format to specify HETERO plugin.
+ Use "-d MULTI:" format to specify MULTI plugin.
The application looks for a suitable plugin for the specified device.
- -l "" Required for CPU custom layers. Absolute path to a shared library with the kernels implementations.
+ -l "" Required for CPU custom layers. Absolute path to a shared library with the kernels implementations.
Or
- -c "" Required for GPU custom kernels. Absolute path to an .xml file with the kernels description.
- -api "" Optional. Enable Sync/Async API. Default value is "async".
- -niter "" Optional. Number of iterations. If not specified, the number of iterations is calculated depending on a device.
- -nireq "" Optional. Number of infer requests. Default value is determined automatically for a device.
- -b "" Optional. Batch size value. If not specified, the batch size value is determined from Intermediate Representation.
- -stream_output Optional. Print progress as a plain text. When specified, an interactive progress bar is replaced with a multiline output.
- -t Optional. Time, in seconds, to execute topology.
- -progress Optional. Show progress bar (can affect performance measurement). Default values is "false".
- -shape Optional. Set shape for input. For example, "input1[1,3,224,224],input2[1,4]" or "[1,3,224,224]" in case of one input size.
- -layout Optional. Prompts how network layouts should be treated by application. For example, "input1[NCHW],input2[NC]" or "[NCHW]" in case of one input size.
+ -c "" Required for GPU custom kernels. Absolute path to an .xml file with the kernels description.
+ -api "" Optional. Enable Sync/Async API. Default value is "async".
+ -niter "" Optional. Number of iterations. If not specified, the number of iterations is calculated depending on a device.
+ -nireq "" Optional. Number of infer requests. Default value is determined automatically for a device.
+ -b "" Optional. Batch size value. If not specified, the batch size value is determined from Intermediate Representation.
+ -stream_output Optional. Print progress as a plain text. When specified, an interactive progress bar is replaced with a multiline output.
+ -t Optional. Time, in seconds, to execute topology.
+ -progress Optional. Show progress bar (can affect performance measurement). Default values is "false".
+ -shape Optional. Set shape for input. For example, "input1[1,3,224,224],input2[1,4]" or "[1,3,224,224]" in case of one input size.
+ -layout Optional. Prompts how network layouts should be treated by application. For example, "input1[NCHW],input2[NC]" or "[NCHW]" in case of one input size.
CPU-specific performance options:
- -nstreams "" Optional. Number of streams to use for inference on the CPU, GPU or MYRIAD devices
- (for HETERO and MULTI device cases use format :,: or just ).
- Default value is determined automatically for a device.
- Please note that although the automatic selection usually provides a reasonable performance,
- it still may be non-optimal for some cases, especially for very small networks.
- Also, using nstreams>1 is inherently throughput-oriented option, while for the best-latency
- estimations the number of streams should be set to 1.
- -nthreads "" Optional. Number of threads to use for inference on the CPU (including HETERO and MULTI cases).
- -enforcebf16 Optional. Enforcing of floating point operations execution in bfloat16 precision on platforms with native bfloat16 support. By default, this key sets "true" on platforms with native bfloat16 support and "false" for other platforms. Use "-enforcebf16=false" to disable this feature.
- -pin "YES"/"NO"/"NUMA" Optional. Enable threads->cores ("YES", default), threads->(NUMA)nodes ("NUMA") or completely disable ("NO") CPU threads pinning for CPU-involved inference.
- -ip "U8"/"FP16"/"FP32" Optional. Specifies precision for all input layers of the network.
- -op "U8"/"FP16"/"FP32" Optional. Specifies precision for all output layers of the network.
- -iop Optional. Specifies precision for input and output layers by name. Example: -iop "input:FP16, output:FP16". Notice that quotes are required. Overwrites precision from ip and op options for specified layers.
+ -nstreams "" Optional. Number of streams to use for inference on the CPU, GPU or MYRIAD devices
+ (for HETERO and MULTI device cases use format :,: or just ).
+ Default value is determined automatically for a device.
+ Please note that although the automatic selection usually provides a reasonable performance,
+ it still may be non-optimal for some cases, especially for very small networks.
+ Also, using nstreams>1 is inherently throughput-oriented option, while for the best-latency
+ estimations the number of streams should be set to 1.
+ -nthreads "" Optional. Number of threads to use for inference on the CPU (including HETERO and MULTI cases).
+ -enforcebf16="" Optional. By default floating point operations execution in bfloat16 precision are enforced if supported by platform.
+ 'true' - enable bfloat16 regardless of platform support
+ 'false' - disable bfloat16 regardless of platform support.
+ -pin "YES"/"NO"/"NUMA" Optional. Enable threads->cores ("YES", default), threads->(NUMA)nodes ("NUMA") or completely disable ("NO") CPU threads pinning for CPU-involved inference.
+ -ip "U8"/"FP16"/"FP32" Optional. Specifies precision for all input layers of the network.
+ -op "U8"/"FP16"/"FP32" Optional. Specifies precision for all output layers of the network.
+ -iop Optional. Specifies precision for input and output layers by name. Example: -iop "input:FP16, output:FP16". Notice that quotes are required. Overwrites precision from ip and op options for specified layers.
Statistics dumping options:
- -report_type "" Optional. Enable collecting statistics report. "no_counters" report contains configuration options specified, resulting FPS and latency. "average_counters" report extends "no_counters" report and additionally includes average PM counters values for each layer from the network. "detailed_counters" report extends "average_counters" report and additionally includes per-layer PM counters and latency for each executed infer request.
- -report_folder Optional. Path to a folder where statistics report is stored.
- -exec_graph_path Optional. Path to a file where to store executable graph information serialized.
- -pc Optional. Report performance counters.
- -dump_config Optional. Path to XML/YAML/JSON file to dump IE parameters, which were set by application.
- -load_config Optional. Path to XML/YAML/JSON file to load custom IE parameters. Please note, command line parameters have higher priority then parameters from configuration file.
+ -report_type "" Optional. Enable collecting statistics report. "no_counters" report contains configuration options specified, resulting FPS and latency. "average_counters" report extends "no_counters" report and additionally includes average PM counters values for each layer from the network. "detailed_counters" report extends "average_counters" report and additionally includes per-layer PM counters and latency for each executed infer request.
+ -report_folder Optional. Path to a folder where statistics report is stored.
+ -exec_graph_path Optional. Path to a file where to store executable graph information serialized.
+ -pc Optional. Report performance counters.
+ -dump_config Optional. Path to XML/YAML/JSON file to dump IE parameters, which were set by application.
+ -load_config Optional. Path to XML/YAML/JSON file to load custom IE parameters. Please note, command line parameters have higher priority then parameters from configuration file.
```
Running the application with the empty list of options yields the usage message given above and an error message.
diff --git a/inference-engine/samples/benchmark_app/benchmark_app.hpp b/inference-engine/samples/benchmark_app/benchmark_app.hpp
index bfe75ccf7b6392..66f9d0b2224161 100644
--- a/inference-engine/samples/benchmark_app/benchmark_app.hpp
+++ b/inference-engine/samples/benchmark_app/benchmark_app.hpp
@@ -51,7 +51,9 @@ static const char infer_num_streams_message[] = "Optional. Number of streams to
"while for the best-latency estimations the number of streams should be set to 1.";
/// @brief message for enforcing of BF16 execution where it is possible
-static const char enforce_bf16_message[] = "Optional. Enforcing of floating point operations execution in bfloat16 precision where it is acceptable.";
+static const char enforce_bf16_message[] = "Optional. By default floating point operations execution in bfloat16 precision are enforced if supported by platform.\n"
+ " 'true' - enable bfloat16 regardless of platform support\n"
+ " 'false' - disable bfloat16 regardless of platform support";
/// @brief message for user library argument
static const char custom_cpu_library_message[] = "Required for CPU custom layers. Absolute path to a shared library with the kernels implementations.";
@@ -249,7 +251,7 @@ static void showUsage() {
std::cout << std::endl << " device-specific performance options:" << std::endl;
std::cout << " -nstreams \"\" " << infer_num_streams_message << std::endl;
std::cout << " -nthreads \"\" " << infer_num_threads_message << std::endl;
- std::cout << " -enforcebf16 " << enforce_bf16_message << std::endl;
+ std::cout << " -enforcebf16= " << enforce_bf16_message << std::endl;
std::cout << " -pin \"YES\"/\"NO\"/\"NUMA\" " << infer_threads_pinning_message << std::endl;
std::cout << std::endl << " Statistics dumping options:" << std::endl;
std::cout << " -report_type \"\" " << report_type_message << std::endl;
diff --git a/inference-engine/samples/benchmark_app/main.cpp b/inference-engine/samples/benchmark_app/main.cpp
index 4910d5949984ca..ed2153c2bd9ba0 100644
--- a/inference-engine/samples/benchmark_app/main.cpp
+++ b/inference-engine/samples/benchmark_app/main.cpp
@@ -378,18 +378,21 @@ int main(int argc, char *argv[]) {
topology_name = cnnNetwork.getName();
slog::info << (FLAGS_b != 0 ? "Network batch size was changed to: " : "Network batch size: ") << batchSize << slog::endl;
- // ----------------- 6. Configuring input ----------------------------------------------------------------------
+ // ----------------- 6. Configuring inputs and outputs ----------------------------------------------------------------------
next_step();
- for (auto& item : inputInfo) {
- if (app_inputs_info.at(item.first).isImage()) {
- /** Set the precision of input data provided by the user, should be called before load of the network to the device **/
+ processPrecision(cnnNetwork, FLAGS_ip, FLAGS_op, FLAGS_iop);
+ for (auto& item : cnnNetwork.getInputsInfo()) {
+ // if precision for input set by user, then set it to app_inputs
+ // if it an image, set U8
+ if (!FLAGS_ip.empty() || FLAGS_iop.find(item.first) != std::string::npos) {
+ app_inputs_info.at(item.first).precision = item.second->getPrecision();
+ } else if (app_inputs_info.at(item.first).isImage()) {
app_inputs_info.at(item.first).precision = Precision::U8;
item.second->setPrecision(app_inputs_info.at(item.first).precision);
}
}
- processPrecision(cnnNetwork, FLAGS_ip, FLAGS_op, FLAGS_iop);
printInputAndOutputsInfo(cnnNetwork);
// ----------------- 7. Loading the model to the device --------------------------------------------------------
diff --git a/inference-engine/samples/build_samples.sh b/inference-engine/samples/build_samples.sh
index 91354e3913d6eb..d584a11011985d 100755
--- a/inference-engine/samples/build_samples.sh
+++ b/inference-engine/samples/build_samples.sh
@@ -14,7 +14,7 @@ error() {
}
trap 'error ${LINENO}' ERR
-SAMPLES_PATH="$( cd "$( dirname "${BASH_SOURCE[0]}" )" && pwd )"
+SAMPLES_PATH="$( cd "$( dirname "${BASH_SOURCE[0]-$0}" )" && pwd )"
printf "\nSetting environment variables for building samples...\n"
diff --git a/inference-engine/samples/classification_sample_async/classification_sample_async.h b/inference-engine/samples/classification_sample_async/classification_sample_async.h
index 6696ab85c53ca6..07e6895d8ea6c6 100644
--- a/inference-engine/samples/classification_sample_async/classification_sample_async.h
+++ b/inference-engine/samples/classification_sample_async/classification_sample_async.h
@@ -48,6 +48,7 @@ DEFINE_string(i, "", image_message);
/// @brief Define parameter for set model file
/// It is a required parameter
DEFINE_string(m, "", model_message);
+DEFINE_string(m2, "", model_message);
/// @brief device the target device to infer on
DEFINE_string(d, "CPU", target_device_message);
diff --git a/inference-engine/samples/speech_sample/main.cpp b/inference-engine/samples/speech_sample/main.cpp
index 11a90ad09700c3..133a8ad3a9955d 100644
--- a/inference-engine/samples/speech_sample/main.cpp
+++ b/inference-engine/samples/speech_sample/main.cpp
@@ -263,11 +263,6 @@ float StdDevError(score_error_t error) {
- (error.sumError / error.numScores) * (error.sumError / error.numScores)));
}
-float StdDevRelError(score_error_t error) {
- return (sqrt(error.sumSquaredRelError / error.numScores
- - (error.sumRelError / error.numScores) * (error.sumRelError / error.numScores)));
-}
-
#if !defined(__arm__) && !defined(_M_ARM) && !defined(__aarch64__) && !defined(_M_ARM64)
#ifdef _WIN32
#include
@@ -579,23 +574,24 @@ int main(int argc, char *argv[]) {
// --------------------------- 1. Load inference engine -------------------------------------
slog::info << "Loading Inference Engine" << slog::endl;
Core ie;
+ CNNNetwork network;
+ ExecutableNetwork executableNet;
/** Printing device version **/
slog::info << "Device info: " << slog::endl;
std::cout << ie.GetVersions(deviceStr) << std::endl;
// -----------------------------------------------------------------------------------------------------
- // 2. Read a model in OpenVINO Intermediate Representation (.xml and .bin files) or ONNX (.onnx file) format
+ // --------------------------- 2. Read a model in OpenVINO Intermediate Representation (.xml and .bin files) or ONNX (.onnx file) format
slog::info << "Loading network files" << slog::endl;
- CNNNetwork network;
if (!FLAGS_m.empty()) {
/** Read network model **/
network = ie.ReadNetwork(FLAGS_m);
CheckNumberOfInputs(network.getInputsInfo().size(), numInputArkFiles);
// -------------------------------------------------------------------------------------------------
- // --------------------------- 3. Set batch size ---------------------------------------------------
+ // --------------------------- Set batch size ---------------------------------------------------
/** Set batch size. Unlike in imaging, batching in time (rather than space) is done for speech recognition. **/
network.setBatchSize(batchSize);
slog::info << "Batch size is " << std::to_string(network.getBatchSize())
@@ -604,7 +600,7 @@ int main(int argc, char *argv[]) {
// -----------------------------------------------------------------------------------------------------
- // --------------------------- 4. Set parameters and scale factors -------------------------------------
+ // --------------------------- Set parameters and scale factors -------------------------------------
/** Setting parameter for per layer metrics **/
std::map gnaPluginConfig;
std::map genericPluginConfig;
@@ -678,7 +674,7 @@ int main(int argc, char *argv[]) {
gnaPluginConfig[GNA_CONFIG_KEY(PWL_MAX_ERROR_PERCENT)] = std::to_string(FLAGS_pwl_me);
// -----------------------------------------------------------------------------------------------------
- // --------------------------- 5. Write model to file --------------------------------------------------
+ // --------------------------- Write model to file --------------------------------------------------
// Embedded GNA model dumping (for Intel(R) Speech Enabling Developer Kit)
if (!FLAGS_we.empty()) {
gnaPluginConfig[GNAConfigParams::KEY_GNA_FIRMWARE_MODEL_IMAGE] = FLAGS_we;
@@ -686,14 +682,13 @@ int main(int argc, char *argv[]) {
}
// -----------------------------------------------------------------------------------------------------
- // --------------------------- 6. Loading model to the device ------------------------------------------
+ // --------------------------- 3. Loading model to the device ------------------------------------------
if (useGna) {
genericPluginConfig.insert(std::begin(gnaPluginConfig), std::end(gnaPluginConfig));
}
auto t0 = Time::now();
std::vector outputs;
- ExecutableNetwork executableNet;
if (!FLAGS_oname.empty()) {
std::vector output_names = ParseBlobName(FLAGS_oname);
@@ -726,7 +721,7 @@ int main(int argc, char *argv[]) {
ms loadTime = std::chrono::duration_cast(Time::now() - t0);
slog::info << "Model loading time " << loadTime.count() << " ms" << slog::endl;
- // --------------------------- 7. Exporting gna model using InferenceEngine AOT API---------------------
+ // --------------------------- Exporting gna model using InferenceEngine AOT API---------------------
if (!FLAGS_wg.empty()) {
slog::info << "Writing GNA Model to file " << FLAGS_wg << slog::endl;
t0 = Time::now();
@@ -744,13 +739,17 @@ int main(int argc, char *argv[]) {
return 0;
}
+
+ // --------------------------- 4. Create infer request --------------------------------------------------
std::vector inferRequests((FLAGS_cw_r > 0 || FLAGS_cw_l > 0) ? 1 : FLAGS_nthreads);
for (auto& inferRequest : inferRequests) {
inferRequest = {executableNet.CreateInferRequest(), -1, batchSize};
}
- // -----------------------------------------------------------------------------------------------------
+ // ---------------------------------------------------------------------------------------------------------
- // --------------------------- 8. Prepare input blobs --------------------------------------------------
+ // --------------------------- 5. Configure input & output --------------------------------------------------
+
+ //--- Prepare input blobs ----------------------------------------------
/** Taking information about all topology inputs **/
ConstInputsDataMap cInputInfo = executableNet.GetInputsInfo();
CheckNumberOfInputs(cInputInfo.size(), numInputArkFiles);
@@ -788,9 +787,9 @@ int main(int argc, char *argv[]) {
item.second->setPrecision(inputPrecision);
}
- // -----------------------------------------------------------------------------------------------------
+ // ---------------------------------------------------------------------
- // --------------------------- 9. Prepare output blobs -------------------------------------------------
+ //--- Prepare output blobs ---------------------------------------------
ConstOutputsDataMap cOutputInfo(executableNet.GetOutputsInfo());
OutputsDataMap outputInfo;
if (!FLAGS_m.empty()) {
@@ -821,9 +820,10 @@ int main(int argc, char *argv[]) {
Precision outputPrecision = Precision::FP32; // specify Precision::I32 to retrieve quantized outputs
outData->setPrecision(outputPrecision);
}
+ // ---------------------------------------------------------------------
// -----------------------------------------------------------------------------------------------------
- // --------------------------- 10. Do inference --------------------------------------------------------
+ // --------------------------- 6. Do inference --------------------------------------------------------
std::vector output_name_files;
std::vector reference_name_files;
size_t count_file = 1;
@@ -854,6 +854,7 @@ int main(int argc, char *argv[]) {
state.Reset();
}
+ /** Work with each utterance **/
for (uint32_t utteranceIndex = 0; utteranceIndex < numUtterances; ++utteranceIndex) {
std::map utterancePerfMap;
std::string uttName;
@@ -867,6 +868,7 @@ int main(int argc, char *argv[]) {
slog::info << "Number scores per frame : " << numScoresPerFrame << slog::endl;
+ /** Get information from ark file for current utterance **/
numFrameElementsInput.resize(numInputArkFiles);
for (size_t i = 0; i < inputArkFiles.size(); i++) {
std::vector ptrUtterance;
@@ -905,6 +907,7 @@ int main(int argc, char *argv[]) {
ptrScores.resize(numFrames * numScoresPerFrame * sizeof(float));
if (!FLAGS_r.empty()) {
+ /** Read ark file with reference scores **/
std::string refUtteranceName;
GetKaldiArkInfo(reference_name_files[next_output].c_str(), utteranceIndex, &n, &numBytesReferenceScoreThisUtterance);
ptrReferenceScores.resize(numBytesReferenceScoreThisUtterance);
@@ -950,6 +953,7 @@ int main(int argc, char *argv[]) {
}
bool inferRequestFetched = false;
+ /** Start inference loop **/
for (auto &inferRequest : inferRequests) {
if (frameIndex == numFrames) {
numFramesThisBatch = 1;
@@ -969,6 +973,7 @@ int main(int argc, char *argv[]) {
ConstOutputsDataMap newOutputInfo;
if (inferRequest.frameIndex >= 0) {
if (!FLAGS_o.empty()) {
+ /* Prepare output data for save to file in future */
outputFrame =
&ptrScores.front() +
numScoresPerFrame * sizeof(float) * (inferRequest.frameIndex);
@@ -993,6 +998,7 @@ int main(int argc, char *argv[]) {
byteSize);
}
if (!FLAGS_r.empty()) {
+ /** Compare output data with reference scores **/
if (!outputs.empty()) {
newOutputInfo[outputs[next_output]] = cOutputInfo[outputs[next_output]];
} else {
@@ -1029,6 +1035,7 @@ int main(int argc, char *argv[]) {
continue;
}
+ /** Prepare input blobs**/
ptrInputBlobs.clear();
if (FLAGS_iname.empty()) {
for (auto &input : cInputInfo) {
@@ -1063,6 +1070,7 @@ int main(int argc, char *argv[]) {
}
int index = static_cast(frameIndex) - (FLAGS_cw_l + FLAGS_cw_r);
+ /** Start inference **/
inferRequest.inferRequest.StartAsync();
inferRequest.frameIndex = index < 0 ? -2 : index;
inferRequest.numFramesThisBatch = numFramesThisBatch;
@@ -1086,6 +1094,7 @@ int main(int argc, char *argv[]) {
}
inferRequestFetched |= true;
}
+ /** Inference was finished for current frame **/
if (!inferRequestFetched) {
std::this_thread::sleep_for(std::chrono::milliseconds(1));
continue;
@@ -1103,6 +1112,7 @@ int main(int argc, char *argv[]) {
}
if (!FLAGS_o.empty()) {
+ /* Save output data to file */
bool shouldAppend = (utteranceIndex == 0) ? false : true;
SaveKaldiArkArray(output_name_files[next_output].c_str(), shouldAppend, uttName, &ptrScores.front(),
numFramesArkFile, numScoresPerFrame);
diff --git a/inference-engine/src/cldnn_engine/CMakeLists.txt b/inference-engine/src/cldnn_engine/CMakeLists.txt
index c7ac932910bd07..1ba2bc9e98e277 100644
--- a/inference-engine/src/cldnn_engine/CMakeLists.txt
+++ b/inference-engine/src/cldnn_engine/CMakeLists.txt
@@ -40,6 +40,8 @@ target_include_directories(${TARGET_NAME} PRIVATE
set_target_properties(${TARGET_NAME} PROPERTIES INTERPROCEDURAL_OPTIMIZATION_RELEASE ${ENABLE_LTO})
+
+set_ie_threading_interface_for(clDNN_lib)
# Failed because of OpenCL
# ie_add_api_validator_post_build_step(TARGET ${TARGET_NAME})
diff --git a/inference-engine/src/cldnn_engine/cldnn_config.cpp b/inference-engine/src/cldnn_engine/cldnn_config.cpp
index eb7359c2625b32..c25ef88d122a31 100644
--- a/inference-engine/src/cldnn_engine/cldnn_config.cpp
+++ b/inference-engine/src/cldnn_engine/cldnn_config.cpp
@@ -11,6 +11,7 @@
#include "ie_api.h"
#include "file_utils.h"
#include "cldnn_itt.h"
+#include
#ifdef _WIN32
# include
@@ -221,6 +222,20 @@ void Config::UpdateFromMap(const std::map& configMap)
} else {
IE_THROW(NotFound) << "Unsupported KEY_CLDNN_ENABLE_FP16_FOR_QUANTIZED_MODELS flag value: " << val;
}
+ } else if (key.compare(CLDNNConfigParams::KEY_CLDNN_MAX_NUM_THREADS) == 0) {
+ int max_threads = std::max(1, static_cast(std::thread::hardware_concurrency()));
+ try {
+ int val_i = std::stoi(val);
+ if (val_i <= 0 || val_i > max_threads) {
+ n_threads = max_threads;
+ } else {
+ n_threads = val_i;
+ }
+ } catch (const std::exception&) {
+ IE_THROW() << "Wrong value for property key " << CLDNNConfigParams::KEY_CLDNN_MAX_NUM_THREADS << ": " << val
+ << "\nSpecify the number of threads use for build as an integer."
+ << "\nOut of range value will be set as a default value, maximum concurrent threads.";
+ }
} else {
IE_THROW(NotFound) << "Unsupported property key by plugin: " << key;
}
@@ -306,5 +321,6 @@ void Config::adjustKeyMapValues() {
key_config_map[PluginConfigParams::KEY_GPU_THROUGHPUT_STREAMS] = std::to_string(throughput_streams);
key_config_map[PluginConfigParams::KEY_DEVICE_ID] = device_id;
key_config_map[PluginConfigParams::KEY_CONFIG_FILE] = "";
+ key_config_map[CLDNNConfigParams::KEY_CLDNN_MAX_NUM_THREADS] = std::to_string(n_threads);
}
} // namespace CLDNNPlugin
diff --git a/inference-engine/src/cldnn_engine/cldnn_config.h b/inference-engine/src/cldnn_engine/cldnn_config.h
index fed2617df8ca5d..756f324cf99739 100644
--- a/inference-engine/src/cldnn_engine/cldnn_config.h
+++ b/inference-engine/src/cldnn_engine/cldnn_config.h
@@ -31,7 +31,8 @@ struct Config {
graph_dumps_dir(""),
sources_dumps_dir(""),
device_id(""),
- kernels_cache_dir("") {
+ kernels_cache_dir(""),
+ n_threads(std::max(static_cast(1), std::thread::hardware_concurrency())) {
adjustKeyMapValues();
}
@@ -56,6 +57,7 @@ struct Config {
std::string sources_dumps_dir;
std::string device_id;
std::string kernels_cache_dir;
+ size_t n_threads;
std::map key_config_map;
};
diff --git a/inference-engine/src/cldnn_engine/cldnn_engine.cpp b/inference-engine/src/cldnn_engine/cldnn_engine.cpp
index 41aabc518e62ba..01ea25f87eead8 100644
--- a/inference-engine/src/cldnn_engine/cldnn_engine.cpp
+++ b/inference-engine/src/cldnn_engine/cldnn_engine.cpp
@@ -32,6 +32,7 @@
#include
#include
#include "transformations/common_optimizations/convert_quantize_dequantize.hpp"
+#include "transformations/common_optimizations/softmax_fusion.hpp"
#include
#include
#include
@@ -323,6 +324,11 @@ InferenceEngine::CNNNetwork clDNNEngine::CloneAndTransformNetwork(const Inferenc
return false;
});
+ pass_config->set_callback(
+ [](const_node_ptr &node) -> bool {
+ return node->input_value(0).get_partial_shape().rank().get_length() > 5;
+ });
+
// List of enabled/disabled transformations
pass_config->disable();
pass_config->disable();
@@ -488,7 +494,8 @@ ExecutableNetworkInternal::Ptr clDNNEngine::LoadExeNetworkImpl(const InferenceEn
context_config.tuningConfig.mode == current_config.tuningConfig.mode &&
context_config.tuningConfig.cache_file_path == current_config.tuningConfig.cache_file_path &&
context_config.kernels_cache_dir == current_config.kernels_cache_dir &&
- context_config.device_id == current_config.device_id;
+ context_config.device_id == current_config.device_id &&
+ context_config.n_threads == current_config.n_threads;
};
{
diff --git a/inference-engine/src/cldnn_engine/cldnn_primitives_list.hpp b/inference-engine/src/cldnn_engine/cldnn_primitives_list.hpp
index 54ec6f5eb798f6..95dc67da5d5798 100644
--- a/inference-engine/src/cldnn_engine/cldnn_primitives_list.hpp
+++ b/inference-engine/src/cldnn_engine/cldnn_primitives_list.hpp
@@ -193,10 +193,10 @@ REGISTER_FACTORY(v5, LogSoftmax);
REGISTER_FACTORY(v5, LSTMSequence);
//REGISTER_FACTORY(v5, NonMaxSuppression); Supported via v5 -> v5 internal conversion
REGISTER_FACTORY(v5, Round);
+REGISTER_FACTORY(v5, GatherND);
// ----------------------------- Unsupported v5 ops ----------------------------- //
// REGISTER_FACTORY(v5, BatchNormInference);
-// REGISTER_FACTORY(v5, GatherND);
// REGISTER_FACTORY(v5, GRUSequence);
// REGISTER_FACTORY(v5, Loop);
// REGISTER_FACTORY(v5, RNNSequence);
diff --git a/inference-engine/src/cldnn_engine/cldnn_remote_context.cpp b/inference-engine/src/cldnn_engine/cldnn_remote_context.cpp
index e73f9d7451cfe7..f03db1c4834e0d 100644
--- a/inference-engine/src/cldnn_engine/cldnn_remote_context.cpp
+++ b/inference-engine/src/cldnn_engine/cldnn_remote_context.cpp
@@ -267,7 +267,8 @@ CLDNNExecutionContextImpl::CLDNNExecutionContextImpl(const std::shared_ptr& op) {
+ p.ValidateInputs(op, {2});
+ auto inputPrimitives = p.GetInputPrimitiveIDs(op);
+ std::string layerName = layer_type_name_ID(op);
+
+ int32_t indices_rank = static_cast(op->get_input_shape(1).size());
+
+ auto batch_dims = op->get_batch_dims();
+
+ auto primitive = cldnn::gather_nd(layerName,
+ inputPrimitives[0],
+ inputPrimitives[1],
+ indices_rank,
+ batch_dims);
+
+ p.AddPrimitive(primitive);
+ p.AddPrimitiveToProfiler(op);
+}
+
+REGISTER_FACTORY_IMPL(v5, GatherND);
+
+} // namespace CLDNNPlugin
diff --git a/inference-engine/src/cldnn_engine/ops/interpolate.cpp b/inference-engine/src/cldnn_engine/ops/interpolate.cpp
index 4212459d5798df..f9241b8ca0fd75 100644
--- a/inference-engine/src/cldnn_engine/ops/interpolate.cpp
+++ b/inference-engine/src/cldnn_engine/ops/interpolate.cpp
@@ -124,14 +124,7 @@ void CreateInterpolateOp(Program& p, const std::shared_ptrv4 Interpolate converison
- // This WA must be removed as soon as optimized kernel supports linear mode
- auto input_shape_rank = op->get_input_shape(0).size();
auto mode = attrs.mode;
- if (mode == ngraph::op::v4::Interpolate::InterpolateMode::linear && input_shape_rank < 5) {
- mode = ngraph::op::v4::Interpolate::InterpolateMode::linear_onnx;
- }
-
auto cldnnSampleType = GetResampleType(mode);
auto shapeCalcMode = GetShapeCalculationMode(attrs.shape_calculation_mode);
auto coordTransMode = GetCoordinateTransformationMode(attrs.coordinate_transformation_mode);
diff --git a/inference-engine/src/cldnn_engine/ops/rnn.cpp b/inference-engine/src/cldnn_engine/ops/rnn.cpp
index b7f3b3f842901d..5801ef91549d67 100644
--- a/inference-engine/src/cldnn_engine/ops/rnn.cpp
+++ b/inference-engine/src/cldnn_engine/ops/rnn.cpp
@@ -153,11 +153,19 @@ void CreateLSTMCellOp(Program& p, const std::shared_ptrget_friendly_name(), op);
p.AddInnerPrimitiveToProfiler(lstm_elt_id, op->get_friendly_name(), op);
+ cldnn::tensor outSz = cldnn::tensor{ lstm_batch_size, lstm_hidden_size, 1, 1 };
+ cldnn::primitive_id outputHiddenCropID = layerName + "_hc";
cldnn::primitive_id outputHiddenID = layerName + ".0";
- p.AddPrimitive(cldnn::crop(outputHiddenID, lstm_elt_id, hiddenSz, cldnn::tensor{0, 0, 0, 0}));
+ p.AddPrimitive(cldnn::crop(outputHiddenCropID, lstm_elt_id, hiddenSz, cldnn::tensor{0, 0, 0, 0}));
+ p.AddInnerPrimitiveToProfiler(outputHiddenCropID, op->get_friendly_name(), op);
+ p.AddPrimitive(cldnn::reshape(outputHiddenID, outputHiddenCropID, outSz));
p.AddInnerPrimitiveToProfiler(outputHiddenID, op->get_friendly_name(), op);
+
+ cldnn::primitive_id outputCellCropID = layerName + "_cc";
cldnn::primitive_id outputCellID = layerName + ".1";
- p.AddPrimitive(cldnn::crop(outputCellID, lstm_elt_id, hiddenSz, cellCropSz));
+ p.AddPrimitive(cldnn::crop(outputCellCropID, lstm_elt_id, hiddenSz, cellCropSz));
+ p.AddInnerPrimitiveToProfiler(outputCellCropID, op->get_friendly_name(), op);
+ p.AddPrimitive(cldnn::reshape(outputCellID, outputHiddenCropID, outSz));
p.AddInnerPrimitiveToProfiler(outputCellID, op->get_friendly_name(), op);
// output primitive IDs
diff --git a/inference-engine/src/gna_plugin/backend/am_intel_dnn.cpp b/inference-engine/src/gna_plugin/backend/am_intel_dnn.cpp
index e3b0f417ff0f3c..6257d8da47d7c9 100644
--- a/inference-engine/src/gna_plugin/backend/am_intel_dnn.cpp
+++ b/inference-engine/src/gna_plugin/backend/am_intel_dnn.cpp
@@ -824,20 +824,38 @@ void GNAPluginNS::backend::AMIntelDNN::WriteDnnText(const char *filename, intel_
std::ofstream out_bfile((out_file_name.str() + "_biases.txt").c_str(), std::ios::out);
if (num_bytes_per_weight == 1) {
- int8_t *ptr_weight = reinterpret_cast(component[i].op.affine.ptr_weights);
- gna_compound_bias_t *ptr_bias = reinterpret_cast(component[i].op.affine.ptr_biases);
+ if (num_bytes_per_bias != 1) {
+ int8_t* ptr_weight = reinterpret_cast(component[i].op.affine.ptr_weights);
+ gna_compound_bias_t* ptr_bias = reinterpret_cast(component[i].op.affine.ptr_biases);
#ifdef DUMP_WB
- for (uint32_t row = 0; row < num_weight_rows; row++) {
- for (uint32_t col = 0; col < num_weight_columns; col++) {
- if (logging_precision == kDnnFloat) {
- float val =
- static_cast(ptr_weight[row * num_weight_columns + col]) * ptr_bias[row].multiplier
+ for (uint32_t row = 0; row < num_weight_rows; row++) {
+ for (uint32_t col = 0; col < num_weight_columns; col++) {
+ if (logging_precision == kDnnFloat) {
+ float val =
+ static_cast(ptr_weight[row * num_weight_columns + col]) * ptr_bias[row].multiplier
/ weight_scale_factor;
- out_wfile << std::setprecision(4) << val << " ";
- } else {
- out_wfile << int((int8_t) ptr_weight[row * num_weight_columns + col]) << " ";
+ out_wfile << std::setprecision(4) << val << " ";
+ } else {
+ out_wfile << int((int8_t)ptr_weight[row * num_weight_columns + col]) << " ";
+ }
+ out_wfile << "\n";
+ }
+ }
+#endif
+ } else {
+ int8_t* ptr_weight = reinterpret_cast(component[i].op.affine.ptr_weights);
+#ifdef DUMP_WB
+ for (uint32_t row = 0; row < num_weight_rows; row++) {
+ for (uint32_t col = 0; col < num_weight_columns; col++) {
+ if (logging_precision == kDnnFloat) {
+ float val =
+ static_cast(ptr_weight[row * num_weight_columns + col]) / weight_scale_factor;
+ out_wfile << std::setprecision(4) << val << " ";
+ } else {
+ out_wfile << int((int8_t)ptr_weight[row * num_weight_columns + col]) << " ";
+ }
+ out_wfile << "\n";
}
- out_wfile << "\n";
}
}
#endif
@@ -873,18 +891,31 @@ void GNAPluginNS::backend::AMIntelDNN::WriteDnnText(const char *filename, intel_
}
if (compute_precision_ == kDnnInt) {
if (num_bytes_per_weight == 1) {
- gna_compound_bias_t
- *ptr_biases = reinterpret_cast(component[i].op.affine.ptr_biases);
+ if (num_bytes_per_bias != 1) {
+ gna_compound_bias_t
+ * ptr_biases = reinterpret_cast(component[i].op.affine.ptr_biases);
#ifdef DUMP_WB
- for (uint32_t row = 0; row < num_rows_out; row++) {
- if (logging_precision == kDnnInt) {
- out_bfile << std::setw(8) << ptr_biases[row].bias << ", ";
- out_bfile << std::setw(8) << int(ptr_biases[row].multiplier) << "\n";
- } else {
- out_bfile << std::setw(8) << ptr_biases[row].bias / output_scale_factor << "\n";
+ for (uint32_t row = 0; row < num_rows_out; row++) {
+ if (logging_precision == kDnnInt) {
+ out_bfile << std::setw(8) << ptr_biases[row].bias << ", ";
+ out_bfile << std::setw(8) << int(ptr_biases[row].multiplier) << "\n";
+ } else {
+ out_bfile << std::setw(8) << ptr_biases[row].bias / output_scale_factor << "\n";
+ }
}
- }
#endif
+ } else {
+ int8_t *ptr_biases = reinterpret_cast(component[i].op.affine.ptr_biases);
+#ifdef DUMP_WB
+ for (uint32_t row = 0; row < num_rows_out; row++) {
+ if (logging_precision == kDnnInt) {
+ out_bfile << std::setw(8) << ptr_biases[row] << "\n";
+ } else {
+ out_bfile << std::setw(8) << ptr_biases[row] / output_scale_factor << "\n";
+ }
+ }
+#endif
+ }
} else {
int32_t *ptr_biases = reinterpret_cast(component[i].op.affine.ptr_biases);
#ifdef DUMP_WB
@@ -2102,9 +2133,12 @@ void GNAPluginNS::backend::AMIntelDNN::WriteInputAndOutputText() {
} else {
floatValue = reinterpret_cast(component[i].ptr_outputs)[k * component[i].num_columns_out+ j];
}
- } else {
+ } else if (component[i].num_bytes_per_output == 2) {
auto value = reinterpret_cast(component[i].ptr_outputs)[k * component[i].num_columns_out+ j];
floatValue = static_cast(value);
+ } else {
+ auto value = reinterpret_cast(component[i].ptr_outputs)[k * component[i].num_columns_out + j];
+ floatValue = static_cast(value);
}
floatValue /= component[i].output_scale_factor;
out_file << std::setw(8) << floatValue << "\n";
@@ -2142,10 +2176,14 @@ void GNAPluginNS::backend::AMIntelDNN::WriteInputAndOutputText() {
} else {
floatValue = reinterpret_cast(component[i].ptr_inputs)[k * component[i].num_columns_in + j];
}
- } else {
+ } else if (component[i].num_bytes_per_input == 2) {
auto value = reinterpret_cast(component[i].ptr_inputs)[k * component[i].num_columns_in+ j];
floatValue = static_cast(value);
+ } else {
+ auto value = reinterpret_cast(component[i].ptr_inputs)[k * component[i].num_columns_in + j];
+ floatValue = static_cast(value);
}
+
in_file << std::setw(8) << floatValue / input_scale_factor << "\n";
}
}
diff --git a/inference-engine/src/gna_plugin/backend/gna_limitations.hpp b/inference-engine/src/gna_plugin/backend/gna_limitations.hpp
index f293b7110cfc47..975397362839d9 100644
--- a/inference-engine/src/gna_plugin/backend/gna_limitations.hpp
+++ b/inference-engine/src/gna_plugin/backend/gna_limitations.hpp
@@ -13,6 +13,8 @@ constexpr uint32_t convMinFiltersNum = 4;
constexpr uint32_t convMaxFiltersNum = 65532;
constexpr uint32_t convFiltersNumDivider = 4;
constexpr uint32_t convEachKernelByteAlignment = 16;
+constexpr uint32_t noOfInputsDivisor = 8;
+constexpr uint32_t noOfInputsLowPrecDivisor = 16;
}
} // namespace GNAPluginNS
diff --git a/inference-engine/src/gna_plugin/backend/make_pwl.cpp b/inference-engine/src/gna_plugin/backend/make_pwl.cpp
index c63ab7a314526b..6190a89540fbec 100644
--- a/inference-engine/src/gna_plugin/backend/make_pwl.cpp
+++ b/inference-engine/src/gna_plugin/backend/make_pwl.cpp
@@ -18,6 +18,7 @@ void make_gna_pwl(const DnnActivation fun,
const double u_bound,
const double in_scale,
const double out_scale,
+ const bool low_precision,
std::vector &gna_pwl) {
pwl_gna_slope_scale_t s;
uint32_t pwl_size = static_cast(pwl.size());
@@ -230,7 +231,7 @@ void make_gna_pwl(const DnnActivation fun,
gnalog() << "=========================== LeakyReLU Segments ======================\n";
int32_t x_lower = INT32_MIN;
int32_t x_upper = INT32_MAX;
- int16_t y_lower = INT16_MIN;
+ int16_t y_lower = low_precision ? INT8_MIN : INT16_MIN;
int16_t y_upper = INT16_MAX;
if (fun.fqParams.set) {
x_lower = FLOAT_TO_INT32(*fun.fqParams.input_low * 1.25 * in_scale);
diff --git a/inference-engine/src/gna_plugin/backend/make_pwl.hpp b/inference-engine/src/gna_plugin/backend/make_pwl.hpp
index eef981034ed2ce..62d95210906d18 100644
--- a/inference-engine/src/gna_plugin/backend/make_pwl.hpp
+++ b/inference-engine/src/gna_plugin/backend/make_pwl.hpp
@@ -15,4 +15,5 @@ void make_gna_pwl(const DnnActivation fun,
const double u_bound,
const double in_scale,
const double out_scale,
+ const bool low_precision,
std::vector &gna_pwl);
diff --git a/inference-engine/src/gna_plugin/descriptions/gna_flags.hpp b/inference-engine/src/gna_plugin/descriptions/gna_flags.hpp
index 1669fe050fc079..e55e36a5f1a657 100644
--- a/inference-engine/src/gna_plugin/descriptions/gna_flags.hpp
+++ b/inference-engine/src/gna_plugin/descriptions/gna_flags.hpp
@@ -18,5 +18,6 @@ struct GNAFlags {
bool sw_fp32 = false;
bool fake_quantized = false;
bool performance_counting = false;
+ bool input_low_precision = false;
};
} // namespace GNAPluginNS
diff --git a/inference-engine/src/gna_plugin/descriptions/gna_input_desc.cpp b/inference-engine/src/gna_plugin/descriptions/gna_input_desc.cpp
index 3fc2d49afb6df7..8095d9cf4ddd3e 100644
--- a/inference-engine/src/gna_plugin/descriptions/gna_input_desc.cpp
+++ b/inference-engine/src/gna_plugin/descriptions/gna_input_desc.cpp
@@ -18,7 +18,11 @@ size_t InputDesc::minBytesRequiredForStoreInput(CNNLayerPtr layer) {
auto quantized = getInjectedData(layer);
size_t precision_bytes;
if (quantized) {
- precision_bytes = 2;
+ if (quantized->lowPrecision) {
+ precision_bytes = 1;
+ } else {
+ precision_bytes = 2;
+ }
} else {
precision_bytes = 4;
}
diff --git a/inference-engine/src/gna_plugin/frontend/layer_quantizer.hpp b/inference-engine/src/gna_plugin/frontend/layer_quantizer.hpp
index ae0edf28013240..ac1c6bdf47a51a 100644
--- a/inference-engine/src/gna_plugin/frontend/layer_quantizer.hpp
+++ b/inference-engine/src/gna_plugin/frontend/layer_quantizer.hpp
@@ -25,6 +25,7 @@ namespace frontend {
/**
* @brief description of quantisation precision
* @tparam Ip - input precision
+ * @tparam Op - output precision
* @tparam Wp - weights precision
* @tparam Bp - biases precision
* @tparam Np - network precision - can be auto generated in future
@@ -82,6 +83,12 @@ struct QuantI8 : public QuantDescTmpl {
+ QuantI8_I8() {
+ _Np = InferenceEngine::Precision::MIXED;
+ }
+};
// for support proper trait instantiation for quantization function callback
struct FakeQuantI16 : public QuantI16 {};
@@ -155,6 +162,17 @@ class Quant {
}
};
+template<>
+class Quant {
+public:
+ template
+ void operator()(Args && ... args) const {
+ QuantizationCallback {
+ std::forward(args)...
+ }.runQuantize();
+ }
+};
+
template<>
class Quant {
public:
@@ -393,13 +411,12 @@ inline void quantizeWeightsBiasesConv(const QuantDesc & quantDesc,
<< "cannot copy weights for layer :"<< conv->name << " of size" << intWeights->byteSize();
}
- auto getBiasSizeForLayer = [](InferenceEngine::WeightableLayer *wl) {
+ auto getBiasSizeForLayer = [](InferenceEngine::WeightableLayer *wl) -> size_t {
if (wl->_biases) {
return wl->_biases->size();
}
- // calculating biases len using outdata dims
- auto & dims = wl->outData.front()->getDims();
- return dims[1];
+ // calculating biases len using outdata dims: biases number should be equal to output channels number
+ return InferenceEngine::GetDataDimSize(wl->outData.front(), InferenceEngine::DataDimName::C);
};
using BiasesPrecision = typename QuantDesc::BiasesPrecision;
@@ -651,8 +668,8 @@ template
class DataQuantizer : public DataQuantizerBase {
public:
explicit DataQuantizer(float scaleFactor) : DataQuantizerBase(scaleFactor) {}
- bool operator()(InferenceEngine::WeightableLayer *wl) const {
- quantizeWeightsBiasesConv(Desc::optional(), wl, Quant());
+ bool operator()(InferenceEngine::ConvolutionLayer *cl) const {
+ quantizeWeightsBiasesConv(Desc::optional(), cl, Quant());
return true;
}
};
@@ -661,8 +678,8 @@ template
class DataQuantizer : public DataQuantizerBase {
public:
explicit DataQuantizer(float scaleFactor) : DataQuantizerBase(scaleFactor) {}
- bool operator()(InferenceEngine::ScaleShiftLayer *wl) const {
- quantizeWeightsBiases(Desc::optional(), wl, Quant(), true);
+ bool operator()(InferenceEngine::ScaleShiftLayer *ssl) const {
+ quantizeWeightsBiases(Desc::optional(), ssl, Quant(), true);
return true;
}
};
@@ -681,6 +698,7 @@ class LayersQuantizer : public frontend::DataQuantizerBase {
using QuantI16 = frontend::QuantPair;
using QuantI8 = frontend::QuantPair;
+using QuantI8_I8 = frontend::QuantPair;
using FakeQuantI16 = frontend::QuantPair;
diff --git a/inference-engine/src/gna_plugin/frontend/model_quantizer.hpp b/inference-engine/src/gna_plugin/frontend/model_quantizer.hpp
index 46b000e35df2ba..1f3f125a029172 100644
--- a/inference-engine/src/gna_plugin/frontend/model_quantizer.hpp
+++ b/inference-engine/src/gna_plugin/frontend/model_quantizer.hpp
@@ -26,7 +26,7 @@ template
class ModelQuantizer {
public:
InferenceEngine::CNNNetwork quantize(const InferenceEngine::CNNNetwork &model, float scaleFactor) const {
- return quantize(model, [](const InferenceEngine::CNNNetwork &, bool runBeforeCopy){}, std::vector({scaleFactor}));
+ return quantize(model, [](const InferenceEngine::CNNNetwork &, bool runBeforeCopy, bool lowPrecision){}, std::vector({scaleFactor}));
}
template
@@ -35,7 +35,7 @@ class ModelQuantizer {
}
InferenceEngine::CNNNetwork quantize(const InferenceEngine::CNNNetwork &model, std::vector scaleFactor) const {
- return quantize(model, [](InferenceEngine::CNNNetwork &, bool runBeforeCopy){}, scaleFactor);
+ return quantize(model, [](InferenceEngine::CNNNetwork &, bool runBeforeCopy, bool lowPrecision){}, scaleFactor);
}
template
@@ -45,14 +45,15 @@ class ModelQuantizer {
transformLayer(newLayer, WeightsConverter());
return newLayer;
};
+ bool lowPrecision = (T::mandatory().getInputPrecision().size() == sizeof(uint8_t));
InferenceEngine::CNNNetwork copiedNet = InferenceEngine::CNNNetCopy(model);
- cb(copiedNet, true);
+ cb(copiedNet, true, lowPrecision);
copiedNet = InferenceEngine::CNNNetCopy(copiedNet, visitor);
// allow client code to access copied topology, to avoid copies if user would like to chain quantisation with
// another preprocessing
- cb(copiedNet, false);
+ cb(copiedNet, false, lowPrecision);
if (scaleFactor.empty()) {
THROW_GNA_EXCEPTION << "Scale factor is empty";
@@ -62,6 +63,8 @@ class ModelQuantizer {
auto sortedNewNet = InferenceEngine::details::CNNNetSortTopologically(copiedNet);
gnalog() << "Sorted layers: " << std::endl;
for (auto &&layer : sortedNewNet) {
+ auto quantData = InferenceEngine::getInjectedData(layer);
+ quantData->lowPrecision = lowPrecision;
gnalog() << layer->name << std::endl;
}
/// filling scale factors for input layers, memory layers will have scaleFactor of 1.0 by default
@@ -79,7 +82,8 @@ class ModelQuantizer {
}
bool isFakeQuantize = std::is_same() || std::is_same();
- propagateScaleFactor(sortedNewNet, T::mandatory().getWeightsPrecision().size(), isFakeQuantize);
+ propagateScaleFactor(sortedNewNet, T::mandatory().getWeightsPrecision().size(), T::optional().getWeightsPrecision().size(),
+ T::mandatory().getInputPrecision().size(), isFakeQuantize);
// sorted order gives possibility for propagate quantisation along depended layers
for (auto &&layer : sortedNewNet) {
@@ -90,8 +94,9 @@ class ModelQuantizer {
}
private :
- void propagateScaleFactor(std::vector & net, int weightsBytesSize, bool fakeQuantize) const {
- ScaleFactorCalculator sf(net, weightsBytesSize, fakeQuantize);
+ void propagateScaleFactor(std::vector & net, int mandWeightsBytesSize,
+ int optWeightsBytesSize, int inputsBytesSize, bool fakeQuantize) const {
+ ScaleFactorCalculator sf(net, mandWeightsBytesSize, optWeightsBytesSize, inputsBytesSize, fakeQuantize);
while (!sf.allLayersProcessed()) {
for (auto &&layer : sf.getStartLayers()) {
diff --git a/inference-engine/src/gna_plugin/frontend/quantization.cpp b/inference-engine/src/gna_plugin/frontend/quantization.cpp
index df060354f09edb..69dcc1ccb586d8 100644
--- a/inference-engine/src/gna_plugin/frontend/quantization.cpp
+++ b/inference-engine/src/gna_plugin/frontend/quantization.cpp
@@ -358,7 +358,6 @@ void QuantizationCallback::runQuantize() const {
int8_t *ptr_weight_8 = ptr_int_weights + (row * num_columns_padded + col);
rounding_value = (ptr_float_weights[row * num_columns + col] > 0) ? 0.5f : -0.5f;
-
value = ptr_float_weights[row * num_columns + col] * (*ptr_weight_scale_factor / ptr_int_biases[row].multiplier) + rounding_value;
if (value > 127.0) {
*ptr_weight_8 = 127;
@@ -404,3 +403,57 @@ void QuantizationCallback::runQuantize() const {
QUANTWARNING("Warning: %d / %d saturations in QuantizeAffine8()\n", num_saturate, num_rows * num_columns + num_rows);
}
}
+
+template<>
+void QuantizationCallback::runQuantize() const {
+ uint32_t num_saturate = 0;
+ for (uint32_t row = 0; row < num_rows; row++) {
+ for (uint32_t col = 0; col < num_columns; col++) {
+ float rounding_value = (ptr_float_weights[row * num_columns + col] > 0) ? 0.5f : -0.5f;
+ float value = ptr_float_weights[row * num_columns + col] * *ptr_weight_scale_factor + rounding_value;
+ int8_t* ptr_weight_8 = ptr_int_weights + (row * num_columns_padded + col);
+ if (value > 127.0) {
+ *ptr_weight_8 = 127;
+ num_saturate++;
+ } else if (value < -128.0) {
+ *ptr_weight_8 = -128;
+ num_saturate++;
+ } else {
+ *ptr_weight_8 = (int8_t)value;
+ }
+ }
+ for (uint32_t col = num_columns; col < num_columns_padded; col++) {
+ int8_t* ptr_weight_8 = ptr_int_weights + (row * num_columns_padded + col);
+ *ptr_weight_8 = 0;
+ }
+ }
+ for (uint32_t row = num_rows; row < num_rows_padded; row++) {
+ for (uint32_t col = 0; col < num_columns_padded; col++) {
+ int8_t* ptr_weight_8 = ptr_int_weights + (row * num_columns_padded + col);
+ *ptr_weight_8 = 0;
+ }
+ }
+
+ if (ptr_float_biases != nullptr && ptr_int_biases != nullptr) {
+ for (uint32_t j = 0; j < num_rows; j++) {
+ float rounding_value = (ptr_float_biases[j] > 0) ? 0.5f : -0.5f;
+ float value = ptr_float_biases[j] * *ptr_output_scale_factor + rounding_value;
+ if (value > 127.0) {
+ ptr_int_biases[j] = 127;
+ num_saturate++;
+ } else if (value < -128.0) {
+ ptr_int_biases[j] = -128;
+ num_saturate++;
+ } else {
+ ptr_int_biases[j] = (int8_t)value;
+ }
+ }
+ for (uint32_t j = num_rows; j < num_rows_padded; j++) {
+ ptr_int_biases[j] = 0;
+ }
+ }
+
+ if (num_saturate > 0) {
+ QUANTWARNING("Warning: %d / %d saturations in QuantizeAffine8_8()\n", num_saturate, num_rows * num_columns + num_rows);
+ }
+}
diff --git a/inference-engine/src/gna_plugin/frontend/quantization.h b/inference-engine/src/gna_plugin/frontend/quantization.h
index 7817b66da297a5..4aaebebe8f6d68 100644
--- a/inference-engine/src/gna_plugin/frontend/quantization.h
+++ b/inference-engine/src/gna_plugin/frontend/quantization.h
@@ -13,6 +13,8 @@
#define MAX_OUT_MULTIPLIER 230
#define MAX_VAL_1B_WEIGHT 127
+#define MAX_VAL_1B_FEAT 64
+#define MAX_VAL_1B_BIAS 127
#define MAX_VAL_2B_WEIGHT 16384
#define MAX_VAL_2B_FEAT 16384
#define MAX_VAL_4B_BIAS 1073741824
@@ -45,6 +47,7 @@ struct QuantizationCallback {
template class QuantizationCallback;
template class QuantizationCallback;
+template class QuantizationCallback;
std::pair FindMinMaxValues(void* ptr_float_memory, size_t num_elements);
float ScaleFactorForQuantization(void *ptr_float_memory, float target_max, size_t num_elements);
diff --git a/inference-engine/src/gna_plugin/frontend/quantized_layer_params.hpp b/inference-engine/src/gna_plugin/frontend/quantized_layer_params.hpp
index 4de70f711e89db..918ac8ee3d3ec2 100644
--- a/inference-engine/src/gna_plugin/frontend/quantized_layer_params.hpp
+++ b/inference-engine/src/gna_plugin/frontend/quantized_layer_params.hpp
@@ -84,8 +84,8 @@ struct QuantizedLayerParams {
// deprecate this
Quantization _weights_quant;
Quantization _bias_quant;
- float _o_shift = 0.0f;
- float _b_shift = 0.0f;
+
+ bool lowPrecision = false;
};
} // namespace GNAPluginNS
diff --git a/inference-engine/src/gna_plugin/frontend/scale_factor_calc.hpp b/inference-engine/src/gna_plugin/frontend/scale_factor_calc.hpp
index 80280df403a0f7..a2bfaccc00f54a 100644
--- a/inference-engine/src/gna_plugin/frontend/scale_factor_calc.hpp
+++ b/inference-engine/src/gna_plugin/frontend/scale_factor_calc.hpp
@@ -17,6 +17,7 @@
#include "gna_plugin_log.hpp"
#include "gna_slope_scale.h"
#include "runtime/pwl.h"
+#include "gna_data_types.hpp"
namespace GNAPluginNS {
namespace frontend {
@@ -181,14 +182,14 @@ template
class ScaleFactorPerLayer {
public:
/**
- * @brief calculates weights scale factor for fit dynamic range into target bitsize,
+ * @brief calculates weights scale factor to fit dynamic range into target bitsize,
* also calculates output scale factor for the given layer
* @param cnnLayer
* @param weightsSize
* @param result
* @return
*/
- bool operator()(T cnnLayer, int weightsSize, ScaleFactorUpdateResult &result, const bool fakeQuantize) {
+ bool operator()(T cnnLayer, int weightsSize, int inputsSize, ScaleFactorUpdateResult &result, const bool fakeQuantize) {
return false;
}
};
@@ -197,6 +198,7 @@ template<>
class ScaleFactorPerLayer {
private :
const float activation_scale_factor = 2048.f;
+ const float low_prec_activation_scale_factor = 4.f;
const float identity_scale_factor = 2049.0f;
const float max_activation_scale_factor = 4096.0f;
const float k = 5;
@@ -206,12 +208,13 @@ class ScaleFactorPerLayer {
protected :
float getActivationScale(InferenceEngine::CNNLayer const* cnnLayer,
GNAPluginNS::LayerInfo const& layer,
+ int inputsSize,
const bool fakeQuantize) {
auto quantizedParams = InferenceEngine::getInjectedData(*cnnLayer);
// todo: calculate proper scale factor where we need to expand it a bit to be safe to stay in int16 weights
// set the initial value
- float result = activation_scale_factor;
+ float result = (inputsSize == 2 ? activation_scale_factor : low_prec_activation_scale_factor);
if (layer.isIdentity()) {
// #define accurate_identity_scale_factor
#ifdef accurate_identity_scale_factor
@@ -246,11 +249,13 @@ class ScaleFactorPerLayer {
result = fabs(scale_extra) > fabs(scale_default) ? identity_scale_factor / 2 : identity_scale_factor;
#endif
- } else if (layer.isRelu() &&
- static_cast(activation_scale_factor * quantizedParams->_src_quant.GetScale())
- > std::numeric_limits::max()-1) {
+ } else if (layer.isRelu()) {
// if activation is one from relu family, we need to apply heuristic to avoid activation output overflow
- result = (activation_scale_factor * 0.5);
+ auto limit = (inputsSize == 1 ? std::numeric_limits::max() : std::numeric_limits::max()) - 1;
+
+ if (static_cast(result * quantizedParams->_src_quant.GetScale()) > limit) {
+ result *= 0.5;
+ }
} else if (layer.isPower()) {
auto powerLayer = dynamic_cast(cnnLayer);
if (!powerLayer) {
@@ -380,7 +385,7 @@ class ScaleFactorPerLayer {
(layer.isIdentity() || layer.isFakeQuantize()) && LayerInfo(prevLayer).isWeightableIdentity()) {
auto prevLayerQuant = InferenceEngine::getInjectedData(*prevLayer);
if (!fp32eq(prevLayerQuant->_src_quant.GetScale(), 1.0f) &&
- (prevLayer2 == nullptr || LayerInfo(prevLayer2).has16BOutput())) {
+ (prevLayer2 == nullptr || LayerInfo(prevLayer2).has8BOr16BOutput())) {
result = prevLayerQuant->_src_quant.GetScale();
usePrevScaleFactor = true;
}
@@ -411,7 +416,7 @@ class ScaleFactorPerLayer {
}
public :
- bool operator()(InferenceEngine::CNNLayer *cnnLayer, int weightsSize, ScaleFactorUpdateResult &result, const bool fakeQuantize) {
+ bool operator()(InferenceEngine::CNNLayer *cnnLayer, int weightsSize, int inputsSize, ScaleFactorUpdateResult &result, const bool fakeQuantize) {
if ( !cnnLayer ) {
IE_THROW() << "Incorrect Convolutional Layer pointer \n";
}
@@ -543,7 +548,13 @@ class ScaleFactorPerLayer {
}
}
- auto levels = fakeQuantize ? MAX_VAL_2B_FEAT : std::numeric_limits::max();
+ auto levels = 0;
+ if (fakeQuantize) {
+ levels = (inputsSize == 2) ? MAX_VAL_2B_FEAT : MAX_VAL_1B_FEAT;
+ } else {
+ levels = (inputsSize == 2) ? std::numeric_limits::max() : std::numeric_limits::max();
+ }
+
auto abs_val = std::max(std::abs(max_val), std::abs(min_val));
auto scale_val = static_cast(levels) / abs_val;
//TODO: use FQ formula for scale factor calculation
@@ -591,7 +602,7 @@ class ScaleFactorPerLayer {
if (!quant->_dst_quant.IsScaleSet() || fp32eq(quant->_dst_quant.GetScale(), 1.0f) ||
!fp32eq(quant->_src_quant.GetScale(), inputQuant->_dst_quant.GetScale())) {
quant->_src_quant.SetScale(inputQuant->_dst_quant.GetScale());
- auto scale = getActivationScale(cnnLayer, layerInfo, fakeQuantize);
+ auto scale = getActivationScale(cnnLayer, layerInfo, inputsSize, fakeQuantize);
quant->_dst_quant.SetScale(scale);
}
return true;
@@ -612,10 +623,12 @@ class ScaleFactorPerLayer {
template<>
class ScaleFactorPerLayer {
public:
- bool operator()(InferenceEngine::EltwiseLayer* eltwiseLayer, int weightsSize, ScaleFactorUpdateResult &result, const bool fakeQuantize) {
+ bool operator()(InferenceEngine::EltwiseLayer* eltwiseLayer, int weightsSize, int inputsSize, ScaleFactorUpdateResult &result, const bool fakeQuantize) {
if ( !eltwiseLayer ) {
THROW_GNA_EXCEPTION << "Incorrect Eltwise Layer pointer \n";
}
+ bool lowPrecision = (inputsSize == sizeof(int8_t));
+
auto in0 = InferenceEngine::CNNNetPrevLayer(eltwiseLayer, 0);
auto in1 = InferenceEngine::CNNNetPrevLayer(eltwiseLayer, 1);
@@ -640,7 +653,7 @@ class ScaleFactorPerLayer