diff --git a/.ci/azure/linux.yml b/.ci/azure/linux.yml index 493d492d04a885..bfed994a137547 100644 --- a/.ci/azure/linux.yml +++ b/.ci/azure/linux.yml @@ -33,7 +33,7 @@ jobs: INSTALL_DIR: $(WORK_DIR)/install_pkg INSTALL_TEST_DIR: $(INSTALL_DIR)/tests LAYER_TESTS_DIR: $(INSTALL_TEST_DIR)/layer_tests - SETUPVARS: $(INSTALL_DIR)/bin/setupvars.sh + SETUPVARS: $(INSTALL_DIR)/setupvars.sh steps: - script: | @@ -103,6 +103,7 @@ jobs: workingDirectory: $(WORK_DIR) displayName: 'Install dependencies' + # Should be after 'Install dependencies' because Git lfs is not installed - checkout: testdata clean: true lfs: true @@ -140,18 +141,18 @@ jobs: - script: cmake -DCMAKE_INSTALL_PREFIX=$(INSTALL_DIR) -P cmake_install.cmake workingDirectory: $(BUILD_DIR) displayName: 'Install' - + - task: CMake@1 inputs: cmakeArgs: > -GNinja $(REPO_DIR)/tests/layer_tests workingDirectory: $(BUILD_LAYER_TESTS_DIR) - + - script: ninja workingDirectory: $(BUILD_LAYER_TESTS_DIR) displayName: 'Build Layer Tests' - + - script: cmake -DCOMPONENT=tests -DCMAKE_INSTALL_PREFIX=$(INSTALL_DIR) -P cmake_install.cmake workingDirectory: $(BUILD_LAYER_TESTS_DIR) displayName: 'Install Layer Tests' @@ -166,18 +167,18 @@ jobs: cp -R $(REPO_DIR)/inference-engine/temp/opencv_4.5.2_ubuntu20/opencv/* $(INSTALL_DIR)/opencv/ workingDirectory: $(BUILD_DIR) displayName: 'Install tests' - + - script: ls -alR $(INSTALL_DIR) displayName: 'List install files' - - script: $(INSTALL_DIR)/deployment_tools/inference_engine/samples/cpp/build_samples.sh + - script: $(INSTALL_DIR)/samples/cpp/build_samples.sh workingDirectory: $(BUILD_SAMPLES_DIR) displayName: 'Build cpp samples' - - script: $(INSTALL_DIR)/deployment_tools/inference_engine/samples/c/build_samples.sh + - script: $(INSTALL_DIR)/samples/c/build_samples.sh workingDirectory: $(BUILD_SAMPLES_DIR) displayName: 'Build c samples' - + - script: rm -fr $(BUILD_DIR) displayName: 'Clean build dir' continueOnError: false @@ -188,8 +189,8 @@ jobs: continueOnError: false - script: | - export MO_ROOT=$(INSTALL_DIR)/deployment_tools/model_optimizer - . $(SETUPVARS) -pyver 3.8 && python3 -m pytest -s $(INSTALL_DIR)/deployment_tools/model_optimizer/unit_tests --junitxml=TEST-ModelOptimizer.xml + export MO_ROOT=$(INSTALL_DIR)/tools/model_optimizer + . $(SETUPVARS) -pyver 3.8 && python3 -m pytest -s $(INSTALL_DIR)/tools/model_optimizer/unit_tests --junitxml=TEST-ModelOptimizer.xml displayName: 'Model Optimizer UT' continueOnError: false @@ -253,11 +254,11 @@ jobs: . $(SETUPVARS) -pyver 3.8 && python3 -m pytest --junitxml=TEST-PythonAPI.xml displayName: 'Python API Tests' continueOnError: false - + - script: | . $(SETUPVARS) python3 -m pip install -r requirements.txt - export MO_ROOT=$(INSTALL_DIR)/deployment_tools/model_optimizer + export MO_ROOT=$(INSTALL_DIR)/tools/model_optimizer export PYTHONPATH=$(LAYER_TESTS_DIR):$PYTHONPATH python3 -m pytest tensorflow_tests/test_tf_Roll.py --ir_version=10 --junitxml=TEST-tf_Roll.xmlTEST workingDirectory: $(LAYER_TESTS_DIR) diff --git a/.ci/azure/linux_conditional_compilation.yml b/.ci/azure/linux_conditional_compilation.yml index a4063d2c9031f0..1a69b7c3dcd9a2 100644 --- a/.ci/azure/linux_conditional_compilation.yml +++ b/.ci/azure/linux_conditional_compilation.yml @@ -17,7 +17,7 @@ jobs: WORK_DIR: $(Pipeline.Workspace)/_w BUILD_DIR: $(WORK_DIR)/build INSTALL_DIR: $(WORK_DIR)/install_pkg - SETUPVARS: $(INSTALL_DIR)/bin/setupvars.sh + SETUPVARS: $(INSTALL_DIR)/setupvars.sh steps: - script: | diff --git a/.ci/azure/linux_ngraph_onnx.yml b/.ci/azure/linux_ngraph_onnx.yml index 5521d224630ad7..8218a0874cdd30 100644 --- a/.ci/azure/linux_ngraph_onnx.yml +++ b/.ci/azure/linux_ngraph_onnx.yml @@ -72,7 +72,7 @@ jobs: workingDirectory: $(WORK_DIR) displayName: 'Install dependencies' - - script: ngraph/python/tests/test_onnx/model_zoo_preprocess.sh -d $(MODELS_DIR)/models_data -o -s "$(ONNX_MODEL_ZOO_SHA)" + - script: runtime/bindings/python/tests/test_onnx/model_zoo_preprocess.sh -d $(MODELS_DIR)/models_data -o -s "$(ONNX_MODEL_ZOO_SHA)" displayName: 'Update models' condition: ne(variables['BUILD_TYPE'], 'Debug') diff --git a/.ci/azure/linux_onnxruntime.yml b/.ci/azure/linux_onnxruntime.yml index 932fb76d5cc8b9..ad5e630820ddbf 100644 --- a/.ci/azure/linux_onnxruntime.yml +++ b/.ci/azure/linux_onnxruntime.yml @@ -93,7 +93,6 @@ jobs: -DENABLE_CLDNN=OFF -DENABLE_PROFILING_ITT=OFF -DENABLE_SAMPLES=OFF - -DENABLE_SPEECH_DEMO=OFF -DNGRAPH_ONNX_FRONTEND_ENABLE=ON -DNGRAPH_DEBUG_ENABLE=OFF $(REPO_DIR) @@ -111,44 +110,44 @@ jobs: displayName: 'Install' - script: | - source $(INSTALL_DIR)/bin/setupvars.sh + source $(INSTALL_DIR)/setupvars.sh CXXFLAGS="-Wno-error=deprecated-declarations" ./build.sh --config RelWithDebInfo --use_openvino CPU_FP32 --build_shared_lib --parallel --skip_tests --build_dir $(ONNXRUNTIME_BUILD_DIR) workingDirectory: $(ONNXRUNTIME_REPO_DIR) displayName: 'Build Lin ONNX Runtime' - script: | - source $(INSTALL_DIR)/bin/setupvars.sh + source $(INSTALL_DIR)/setupvars.sh skip_tests=`tr -s '\n ' ':' < $(ONNXRUNTIME_UTILS)/skip_tests` ./onnxruntime_test_all --gtest_filter=-$skip_tests workingDirectory: $(ONNXRUNTIME_BUILD_DIR)/RelWithDebInfo displayName: 'Run onnxruntime_test_all' - script: | - source $(INSTALL_DIR)/bin/setupvars.sh + source $(INSTALL_DIR)/setupvars.sh ./onnxruntime_shared_lib_test workingDirectory: $(ONNXRUNTIME_BUILD_DIR)/RelWithDebInfo displayName: 'Run onnxruntime_shared_lib_test' - script: | - source $(INSTALL_DIR)/bin/setupvars.sh + source $(INSTALL_DIR)/setupvars.sh ./onnxruntime_global_thread_pools_test workingDirectory: $(ONNXRUNTIME_BUILD_DIR)/RelWithDebInfo displayName: 'Run onnxruntime_global_thread_pools_test' - script: | - source $(INSTALL_DIR)/bin/setupvars.sh + source $(INSTALL_DIR)/setupvars.sh ./onnxruntime_api_tests_without_env workingDirectory: $(ONNXRUNTIME_BUILD_DIR)/RelWithDebInfo displayName: 'Run onnxruntime_api_tests_without_env' - script: | - source $(INSTALL_DIR)/bin/setupvars.sh + source $(INSTALL_DIR)/setupvars.sh ./onnx_test_runner "$(ONNXRUNTIME_REPO_DIR)/cmake/external/onnx/onnx/backend/test/data/pytorch-converted" workingDirectory: $(ONNXRUNTIME_BUILD_DIR)/RelWithDebInfo displayName: 'Run pytorch-converted tests' - script: | - source $(INSTALL_DIR)/bin/setupvars.sh + source $(INSTALL_DIR)/setupvars.sh ./onnx_test_runner "$(ONNXRUNTIME_REPO_DIR)/cmake/external/onnx/onnx/backend/test/data/pytorch-operator" workingDirectory: $(ONNXRUNTIME_BUILD_DIR)/RelWithDebInfo displayName: 'Run pytorch-operator tests' diff --git a/.ci/azure/mac.yml b/.ci/azure/mac.yml index b07ff48f78ccc3..4534c08b0651d5 100644 --- a/.ci/azure/mac.yml +++ b/.ci/azure/mac.yml @@ -28,19 +28,19 @@ jobs: MODELS_PATH: $(REPO_DIR)/../testdata WORK_DIR: $(Pipeline.Workspace)/_w BUILD_DIR: $(WORK_DIR)/build - BIN_DIR: $(REPO_DIR)/bin/intel64/$(BUILD_TYPE) INSTALL_DIR: $(WORK_DIR)/install_pkg - SETUPVARS: $(INSTALL_DIR)/bin/setupvars.sh + INSTALL_TEST_DIR: $(INSTALL_DIR)/tests + SETUPVARS: $(INSTALL_DIR)/setupvars.sh steps: - script: | whoami uname -a - which python3 - python3 --version - which java - java -version - gcc --version + echo Python3 info ; which python3 ; python3 --version + echo Python info ; which python ; python --version + echo Java info ; which java ; java -version + echo gcc info ; which gcc ; gcc --version + echo cmake info ; which cmake ; cmake --version xcrun --sdk macosx --show-sdk-version env sysctl -a @@ -91,47 +91,64 @@ jobs: workingDirectory: $(BUILD_DIR) displayName: 'CMake' + - script: ls -alR $(REPO_DIR)/inference-engine/temp/ + displayName: 'List temp SDKs' + - script: ninja workingDirectory: $(BUILD_DIR) displayName: 'Build Mac' - script: ls -alR $(REPO_DIR)/bin/ - displayName: 'List files' + displayName: 'List bin files' - script: cmake -DCMAKE_INSTALL_PREFIX=$(INSTALL_DIR) -P cmake_install.cmake workingDirectory: $(BUILD_DIR) displayName: 'Install' - - script: $(BIN_DIR)/unit-test --gtest_print_time=1 --gtest_filter=-backend_api.config_unsupported:*IE_GPU*:IE_CPU.onnx_model_sigmoid:IE_CPU/GRUSequenceOp.onnx_model_gru* --gtest_output=xml:TEST-NGraphUT.xml - workingDirectory: $(BIN_DIR) + - script: ls -alR $(INSTALL_DIR) + displayName: 'List install files' + + - script: | + set -e + mkdir $(INSTALL_DIR)/opencv/ + cmake -DCMAKE_INSTALL_PREFIX=$(INSTALL_DIR) -DCOMPONENT=tests -P cmake_install.cmake + cp -R $(REPO_DIR)/inference-engine/temp/opencv_4.5.2_osx/opencv/* $(INSTALL_DIR)/opencv/ + workingDirectory: $(BUILD_DIR) + displayName: 'Install tests' + + - script: ls -alR $(INSTALL_DIR) + displayName: 'List install files' + + - script: . $(SETUPVARS) && $(INSTALL_TEST_DIR)/unit-test --gtest_print_time=1 --gtest_filter=-backend_api.config_unsupported:*IE_GPU*:IE_CPU.onnx_model_sigmoid:IE_CPU/GRUSequenceOp.onnx_model_gru* --gtest_output=xml:TEST-NGraphUT.xml + workingDirectory: $(INSTALL_TEST_DIR) displayName: 'nGraph UT' continueOnError: false - - script: $(BIN_DIR)/InferenceEngineUnitTests --gtest_print_time=1 --gtest_filter=-MKLDNNGraphStructureTests.TestNoRedundantReordersBeforeDWConvolution:TestConvolution/MKLDNNGraphConvolutionTests.TestsConvolution/0:TestConvolutionDefaultPrimitivesPriority/MKLDNNGraphConvolutionTests.TestsConvolution/0 --gtest_output=xml:TEST-InferenceEngineUnitTests.xml + - script: . $(SETUPVARS) && $(INSTALL_TEST_DIR)/InferenceEngineUnitTests --gtest_print_time=1 --gtest_filter=-MKLDNNGraphStructureTests.TestNoRedundantReordersBeforeDWConvolution:TestConvolution/MKLDNNGraphConvolutionTests.TestsConvolution/0:TestConvolutionDefaultPrimitivesPriority/MKLDNNGraphConvolutionTests.TestsConvolution/0 --gtest_output=xml:TEST-InferenceEngineUnitTests.xml displayName: 'IE UT old' continueOnError: false - - script: $(BIN_DIR)/ieUnitTests --gtest_output=xml:TEST-ieUnitTests.xml + - script: . $(SETUPVARS) && $(INSTALL_TEST_DIR)/ieUnitTests --gtest_output=xml:TEST-ieUnitTests.xml displayName: 'IE UT' continueOnError: false - - script: $(BIN_DIR)/cpuUnitTests --gtest_output=xml:TEST-cpuUnitTests.xml + - script: . $(SETUPVARS) && $(INSTALL_TEST_DIR)/cpuUnitTests --gtest_output=xml:TEST-cpuUnitTests.xml displayName: 'CPU UT' continueOnError: false - - script: $(BIN_DIR)/vpuUnitTests --gtest_output=xml:TEST-vpuUnitTests.xml + - script: . $(SETUPVARS) && $(INSTALL_TEST_DIR)/vpuUnitTests --gtest_output=xml:TEST-vpuUnitTests.xml displayName: 'VPU UT' continueOnError: false - - script: $(BIN_DIR)/onnxImporterUnitTests --gtest_output=xml:TEST-onnxImporterUnitTests.xml + - script: . $(SETUPVARS) && $(INSTALL_TEST_DIR)/onnxImporterUnitTests --gtest_output=xml:TEST-onnxImporterUnitTests.xml displayName: 'ONNX Importer UT' continueOnError: false - - script: $(BIN_DIR)/ieFuncTests --gtest_output=xml:TEST-ieFuncTests.xml + - script: . $(SETUPVARS) && $(INSTALL_TEST_DIR)/ieFuncTests --gtest_output=xml:TEST-ieFuncTests.xml displayName: 'IE FuncTests' continueOnError: false - - script: $(BIN_DIR)/cpuFuncTests --gtest_filter=*smoke*:-smoke_LPT/ReduceMinTransformation.CompareWithRefImpl/f32_Shape* --gtest_print_time=1 --gtest_output=xml:TEST-cpuFuncTests.xml + - script: . $(SETUPVARS) && $(INSTALL_TEST_DIR)/cpuFuncTests --gtest_filter=*smoke*:-smoke_LPT/ReduceMinTransformation.CompareWithRefImpl/f32_Shape* --gtest_print_time=1 --gtest_output=xml:TEST-cpuFuncTests.xml displayName: 'CPU FuncTests' continueOnError: false enabled: false @@ -139,7 +156,7 @@ jobs: - script: | export DATA_PATH=$(MODELS_PATH) export MODELS_PATH=$(MODELS_PATH) - $(BIN_DIR)/InferenceEngineCAPITests --gtest_output=xml:TEST-InferenceEngineCAPITests.xml + . $(SETUPVARS) && $(INSTALL_TEST_DIR)/InferenceEngineCAPITests --gtest_output=xml:TEST-InferenceEngineCAPITests.xml displayName: 'IE CAPITests' continueOnError: false diff --git a/.ci/azure/windows.yml b/.ci/azure/windows.yml index 3d0936c5411eeb..81c1ba956017d8 100644 --- a/.ci/azure/windows.yml +++ b/.ci/azure/windows.yml @@ -33,7 +33,7 @@ jobs: MSVC_COMPILER_PATH: C:\Program Files (x86)\Microsoft Visual Studio\2019\Enterprise\VC\Tools\MSVC\14.24.28314\bin\Hostx64\x64\cl.exe INSTALL_DIR: $(WORK_DIR)\install_pkg INSTALL_TEST_DIR: $(INSTALL_DIR)\tests - SETUPVARS: $(INSTALL_DIR)\bin\setupvars.bat + SETUPVARS: $(INSTALL_DIR)\setupvars.bat steps: - script: | @@ -122,11 +122,11 @@ jobs: - script: dir $(INSTALL_DIR) /s displayName: 'List install files' - - script: $(INSTALL_DIR)\deployment_tools\inference_engine\samples\cpp\build_samples_msvc.bat + - script: $(INSTALL_DIR)\samples\cpp\build_samples_msvc.bat workingDirectory: $(BUILD_SAMPLES_DIR) displayName: 'Build cpp samples' - - script: $(INSTALL_DIR)\deployment_tools\inference_engine\samples\c\build_samples_msvc.bat + - script: $(INSTALL_DIR)\samples\c\build_samples_msvc.bat workingDirectory: $(BUILD_SAMPLES_DIR) displayName: 'Build c samples' diff --git a/.ci/azure/windows_conditional_compilation.yml b/.ci/azure/windows_conditional_compilation.yml index 9024ede46f6018..80c89e8d20f28e 100644 --- a/.ci/azure/windows_conditional_compilation.yml +++ b/.ci/azure/windows_conditional_compilation.yml @@ -19,7 +19,7 @@ jobs: MSVS_VARS_PATH: C:\Program Files (x86)\Microsoft Visual Studio\2019\Enterprise\VC\Auxiliary\Build\vcvars64.bat MSVC_COMPILER_PATH: C:\Program Files (x86)\Microsoft Visual Studio\2019\Enterprise\VC\Tools\MSVC\14.24.28314\bin\Hostx64\x64\cl.exe INSTALL_DIR: $(WORK_DIR)\install_pkg - SETUPVARS: $(INSTALL_DIR)\bin\setupvars.bat + SETUPVARS: $(INSTALL_DIR)\setupvars.bat steps: - script: | diff --git a/.ci/openvino-onnx/Dockerfile b/.ci/openvino-onnx/Dockerfile index 314ab2c103746d..8e2365e4ebc506 100644 --- a/.ci/openvino-onnx/Dockerfile +++ b/.ci/openvino-onnx/Dockerfile @@ -65,7 +65,6 @@ RUN cmake .. \ -DENABLE_CLDNN=OFF \ -DENABLE_PROFILING_ITT=OFF \ -DENABLE_SAMPLES=OFF \ - -DENABLE_SPEECH_DEMO=OFF \ -DENABLE_PYTHON=ON \ -DPYTHON_EXECUTABLE=/usr/bin/python3 \ -DNGRAPH_ONNX_FRONTEND_ENABLE=ON \ @@ -75,8 +74,8 @@ RUN cmake .. \ RUN make -j $(nproc) install # Run tests via tox -WORKDIR /openvino/ngraph/python -ENV ngraph_DIR=/openvino/dist/deployment_tools/ngraph -ENV LD_LIBRARY_PATH=/openvino/dist/deployment_tools/ngraph/lib +WORKDIR /openvino/runtime/bindings/python +ENV OpenVINO_DIR=/openvino/dist/runtime/cmake +ENV LD_LIBRARY_PATH=/openvino/dist/runtime/lib:/openvino/dist/runtime/3rdparty/tbb/lib ENV PYTHONPATH=/openvino/bin/intel64/${BUILD_TYPE}/lib/python_api/python3.8:${PYTHONPATH} CMD tox diff --git a/.ci/openvino-onnx/Jenkinsfile b/.ci/openvino-onnx/Jenkinsfile index 2849579dcdbf6b..5581c7c2ea8ad0 100644 --- a/.ci/openvino-onnx/Jenkinsfile +++ b/.ci/openvino-onnx/Jenkinsfile @@ -94,7 +94,7 @@ def prepare_repository(String workdir) { def updateModels() { sh """ - ./ngraph/python/tests/test_onnx/model_zoo_preprocess.sh -d ${HOME}/ONNX_CI/models_data -o -s ${ONNX_MODEL_ZOO_SHA} + ./runtime/bindings/python/tests/test_onnx/model_zoo_preprocess.sh -d ${HOME}/ONNX_CI/models_data -o -s ${ONNX_MODEL_ZOO_SHA} """ } diff --git a/.github/dependabot.yml b/.github/dependabot.yml index 7d450a951830a0..cc8124c21c7b54 100644 --- a/.github/dependabot.yml +++ b/.github/dependabot.yml @@ -4,7 +4,7 @@ version: 2 updates: # Enable version updates for nGraph Python API - package-ecosystem: pip - directory: "/ngraph/python" + directory: "/runtime/bindings/python" schedule: interval: weekly day: monday diff --git a/.github/workflows/build_doc.yml b/.github/workflows/build_doc.yml index 40e02fd59b31bd..92b88967845b19 100644 --- a/.github/workflows/build_doc.yml +++ b/.github/workflows/build_doc.yml @@ -14,6 +14,7 @@ jobs: - name: Install dependencies run: | + sudo apt update sudo apt --assume-yes install libusb-1.0-0-dev graphviz texlive python3 -m pip install lxml # install doxygen diff --git a/.github/workflows/code_style.yml b/.github/workflows/code_style.yml index 8f30c986361f45..3eecdc0aff5131 100644 --- a/.github/workflows/code_style.yml +++ b/.github/workflows/code_style.yml @@ -10,10 +10,13 @@ jobs: submodules: recursive - name: Install clang-format-9 - run: sudo apt --assume-yes install clang-format-9 + run: | + sudo apt update + sudo apt --assume-yes install clang-format-9 - name: Install dependencies run: | + sudo apt update sudo apt --assume-yes install libusb-1.0-0-dev python3 -m pip install --upgrade pip python3 -m pip install -r ./inference-engine/ie_bridges/python/requirements.txt @@ -52,7 +55,9 @@ jobs: submodules: recursive - name: Install ShellCheck - run: sudo apt --assume-yes install shellcheck + run: | + sudo apt update + sudo apt --assume-yes install shellcheck - name: Install dependencies run: python3 -m pip install -r ./inference-engine/ie_bridges/python/requirements.txt @@ -75,7 +80,9 @@ jobs: submodules: recursive - name: Install Clang dependency - run: sudo apt --assume-yes install libclang-9-dev + run: | + sudo apt update + sudo apt --assume-yes install libclang-9-dev - name: Install Python-based dependencies run: python3 -m pip install -r cmake/developer_package/ncc_naming_style/requirements_dev.txt diff --git a/.github/workflows/mo.yml b/.github/workflows/mo.yml index ebc0827adb0622..00fcec9d0258d6 100644 --- a/.github/workflows/mo.yml +++ b/.github/workflows/mo.yml @@ -41,6 +41,7 @@ jobs: pip install -r requirements.txt pip install -r requirements_dev.txt # requrements for CMake + sudo apt update sudo apt --assume-yes install libusb-1.0-0-dev working-directory: model-optimizer diff --git a/.gitmodules b/.gitmodules index 095f39682640b6..8569ecbb9587ea 100644 --- a/.gitmodules +++ b/.gitmodules @@ -44,8 +44,8 @@ [submodule "thirdparty/protobuf"] path = thirdparty/protobuf/protobuf url = https://github.com/protocolbuffers/protobuf.git -[submodule "ngraph/python/pybind11"] - path = ngraph/python/pybind11 +[submodule "runtime/bindings/python/thirdparty/pybind11"] + path = runtime/bindings/python/thirdparty/pybind11 url = https://github.com/pybind/pybind11.git [submodule "thirdparty/ittapi/ittapi"] path = thirdparty/ittapi/ittapi diff --git a/CMakeLists.txt b/CMakeLists.txt index 61a96ae9f4c816..7ead5a53212142 100644 --- a/CMakeLists.txt +++ b/CMakeLists.txt @@ -26,13 +26,15 @@ endif() # resolving dependencies for the project message (STATUS "PROJECT ............................... " ${PROJECT_NAME}) message (STATUS "CMAKE_BINARY_DIR ...................... " ${CMAKE_BINARY_DIR}) -message (STATUS "OpenVINO_SOURCE_DIR .... .......... " ${OpenVINO_SOURCE_DIR}) +message (STATUS "OpenVINO_SOURCE_DIR ................... " ${OpenVINO_SOURCE_DIR}) message (STATUS "CMAKE_GENERATOR ....................... " ${CMAKE_GENERATOR}) message (STATUS "CMAKE_C_COMPILER_ID ................... " ${CMAKE_C_COMPILER_ID}) message (STATUS "CMAKE_BUILD_TYPE ...................... " ${CMAKE_BUILD_TYPE}) # remove file with exported developer targets to force its regeneration +file(REMOVE "${CMAKE_BINARY_DIR}/ngraph/ngraphTargets.cmake") file(REMOVE "${CMAKE_BINARY_DIR}/InferenceEngineTargets.cmake") +file(REMOVE "${CMAKE_BINARY_DIR}/OpenVINOTargets.cmake") foreach(component IN LISTS openvino_export_components) file(REMOVE "${CMAKE_BINARY_DIR}/${component}_dev_targets.cmake") unset(${component} CACHE) @@ -83,6 +85,7 @@ include(cmake/test_model_zoo.cmake) add_subdirectory(thirdparty) add_subdirectory(openvino) add_subdirectory(ngraph) +add_subdirectory(runtime) add_subdirectory(inference-engine) # for Template plugin diff --git a/CODEOWNERS b/CODEOWNERS index 165bc7455631f2..bc7beb0841ef38 100644 --- a/CODEOWNERS +++ b/CODEOWNERS @@ -44,7 +44,6 @@ azure-pipelines.yml @openvinotoolkit/openvino-admins /inference-engine/tests/functional/plugin/myriad/ @openvinotoolkit/openvino-ie-vpu-maintainers @openvinotoolkit/openvino-ie-tests-maintainers /inference-engine/tests/unit/vpu/ @openvinotoolkit/openvino-ie-vpu-maintainers @openvinotoolkit/openvino-ie-tests-maintainers /inference-engine/tests/unit/engines/vpu/ @openvinotoolkit/openvino-ie-vpu-maintainers @openvinotoolkit/openvino-ie-tests-maintainers -/inference-engine/tools/vpu/ @openvinotoolkit/openvino-ie-vpu-maintainers /inference-engine/scripts/run_tests_myriad_multistick.sh @openvinotoolkit/openvino-ie-vpu-maintainers # IE GNA: @@ -72,6 +71,7 @@ azure-pipelines.yml @openvinotoolkit/openvino-admins # Documentation /docs/ @openvinotoolkit/openvino-docs-maintainers +/docs/template_plugin/ @openvinotoolkit/openvino-ie-template-maintainers *.md @openvinotoolkit/openvino-docs-maintainers # Control 3d party dependencies diff --git a/README.md b/README.md index c445e5209b11cc..7c4b3cdd3a707c 100644 --- a/README.md +++ b/README.md @@ -1,5 +1,5 @@ # OpenVINO™ Toolkit -[![Stable release](https://img.shields.io/badge/version-2021.4-green.svg)](https://github.com/openvinotoolkit/openvino/releases/tag/2021.4) +[![Stable release](https://img.shields.io/badge/version-2021.4.1-green.svg)](https://github.com/openvinotoolkit/openvino/releases/tag/2021.4.1) [![Apache License Version 2.0](https://img.shields.io/badge/license-Apache_2.0-green.svg)](LICENSE) ![GitHub branch checks state](https://img.shields.io/github/checks-status/openvinotoolkit/openvino/master?label=GitHub%20checks) ![Azure DevOps builds (branch)](https://img.shields.io/azure-devops/build/openvinoci/b2bab62f-ab2f-4871-a538-86ea1be7d20f/13?label=Public%20CI) diff --git a/cmake/dependencies.cmake b/cmake/dependencies.cmake index 82f98b4c515ff3..b4946d85d11d85 100644 --- a/cmake/dependencies.cmake +++ b/cmake/dependencies.cmake @@ -89,7 +89,7 @@ if(THREADING STREQUAL "OMP") ie_cpack_add_component(omp REQUIRED) file(GLOB_RECURSE source_list "${OMP}/*${CMAKE_SHARED_LIBRARY_SUFFIX}*") install(FILES ${source_list} - DESTINATION "deployment_tools/inference_engine/external/omp/lib" + DESTINATION "runtime/3rdparty/omp/lib" COMPONENT omp) endif() @@ -298,44 +298,3 @@ if(ENABLE_GNA) update_deps_cache(GNA "${GNA}" "Path to GNA root folder") debug_message(STATUS "gna=" ${GNA}) endif() - -if(ENABLE_SPEECH_DEMO) - reset_deps_cache(SPEECH_LIBS_AND_DEMOS) - if(DEFINED ENV{THIRDPARTY_SERVER_PATH}) - set(IE_PATH_TO_DEPS "$ENV{THIRDPARTY_SERVER_PATH}") - elseif(DEFINED THIRDPARTY_SERVER_PATH) - set(IE_PATH_TO_DEPS "${THIRDPARTY_SERVER_PATH}") - else() - message(WARNING "Unable to locate Speech Demo") - endif() - if(DEFINED IE_PATH_TO_DEPS) - if(WIN32 AND X86_64) - RESOLVE_DEPENDENCY(SPEECH_LIBS_AND_DEMOS - ARCHIVE_WIN "speech_demo_1.0.0.780_windows.zip" - VERSION_REGEX ".*_([0-9]+.[0-9]+.[0-9]+.[0-9]+).*" - TARGET_PATH "${TEMP}/speech_demo_1.0.0.780" - SHA256 "957bd274a1f6dc1d83a46879c7ef3b3b06f17d11af85cc45c18919051d145abd") - debug_message(STATUS "speech_libs_and_demos=" ${SPEECH_LIBS_AND_DEMOS}) - elseif(LINUX AND X86_64) - if(LINUX_OS_NAME STREQUAL "CentOS 7" OR CMAKE_CXX_COMPILER_VERSION VERSION_LESS "4.9") - RESOLVE_DEPENDENCY(SPEECH_LIBS_AND_DEMOS - ARCHIVE_LIN "speech_demo_1.0.0.780_centos.tgz" - VERSION_REGEX ".*_([0-9]+.[0-9]+.[0-9]+.[0-9]+).*" - TARGET_PATH "${TEMP}/speech_demo_1.0.0.780" - SHA256 "6d8d1111d0e662fe71d71cd3debad2995f6fb6fe5df3b92196dae06ff7abdf44") - debug_message(STATUS "speech_libs_and_demos=" ${SPEECH_LIBS_AND_DEMOS}) - else() - RESOLVE_DEPENDENCY(SPEECH_LIBS_AND_DEMOS - ARCHIVE_LIN "speech_demo_1.0.0.780_linux.tgz" - VERSION_REGEX ".*_([0-9]+.[0-9]+.[0-9]+.[0-9]+).*" - TARGET_PATH "${TEMP}/speech_demo_1.0.0.780" - SHA256 "0ec6f1e47c00d781dc918af5d3055ab474ff47b9978dd6fe2add73e3339b0763") - debug_message(STATUS "speech_libs_and_demos=" ${SPEECH_LIBS_AND_DEMOS}) - endif() - else() - message(FATAL_ERROR "Speech Demo is not available on current platform") - endif() - unset(IE_PATH_TO_DEPS) - endif() - update_deps_cache(SPEECH_LIBS_AND_DEMOS "${SPEECH_LIBS_AND_DEMOS}" "Path to SPEECH_LIBS_AND_DEMOS root folder") -endif() diff --git a/cmake/developer_package/packaging.cmake b/cmake/developer_package/packaging.cmake index 4095a16157c068..7708de5c77b921 100644 --- a/cmake/developer_package/packaging.cmake +++ b/cmake/developer_package/packaging.cmake @@ -10,16 +10,15 @@ include(CPackComponent) # # Set library directory for cpack # -set(IE_CPACK_IE_DIR deployment_tools/inference_engine) function(ie_cpack_set_library_dir) if(WIN32) - set(IE_CPACK_LIBRARY_PATH ${IE_CPACK_IE_DIR}/lib/${ARCH_FOLDER}/$ PARENT_SCOPE) - set(IE_CPACK_RUNTIME_PATH ${IE_CPACK_IE_DIR}/bin/${ARCH_FOLDER}/$ PARENT_SCOPE) - set(IE_CPACK_ARCHIVE_PATH ${IE_CPACK_IE_DIR}/lib/${ARCH_FOLDER}/$ PARENT_SCOPE) + set(IE_CPACK_LIBRARY_PATH runtime/lib/${ARCH_FOLDER}/$ PARENT_SCOPE) + set(IE_CPACK_RUNTIME_PATH runtime/bin/${ARCH_FOLDER}/$ PARENT_SCOPE) + set(IE_CPACK_ARCHIVE_PATH runtime/lib/${ARCH_FOLDER}/$ PARENT_SCOPE) else() - set(IE_CPACK_LIBRARY_PATH ${IE_CPACK_IE_DIR}/lib/${ARCH_FOLDER} PARENT_SCOPE) - set(IE_CPACK_RUNTIME_PATH ${IE_CPACK_IE_DIR}/lib/${ARCH_FOLDER} PARENT_SCOPE) - set(IE_CPACK_ARCHIVE_PATH ${IE_CPACK_IE_DIR}/lib/${ARCH_FOLDER} PARENT_SCOPE) + set(IE_CPACK_LIBRARY_PATH runtime/lib/${ARCH_FOLDER} PARENT_SCOPE) + set(IE_CPACK_RUNTIME_PATH runtime/lib/${ARCH_FOLDER} PARENT_SCOPE) + set(IE_CPACK_ARCHIVE_PATH runtime/lib/${ARCH_FOLDER} PARENT_SCOPE) endif() endfunction() diff --git a/cmake/developer_package/plugins/create_plugin_file.cmake b/cmake/developer_package/plugins/create_plugin_file.cmake index 88f33904bee30a..cb28967d7960b2 100644 --- a/cmake/developer_package/plugins/create_plugin_file.cmake +++ b/cmake/developer_package/plugins/create_plugin_file.cmake @@ -2,6 +2,8 @@ # SPDX-License-Identifier: Apache-2.0 # +cmake_policy(SET CMP0007 NEW) + set(newContent " ") if(IE_PLUGIN_PROPERTIES) @@ -9,10 +11,11 @@ if(IE_PLUGIN_PROPERTIES) ") foreach(props IN LISTS IE_PLUGIN_PROPERTIES) - string(REPLACE "," ";" props "${props}") + string(REPLACE ":" ";" props "${props}") list(GET props 0 key) list(GET props 1 value) + set(newContent "${newContent} ") endforeach() diff --git a/cmake/developer_package/plugins/plugins.cmake b/cmake/developer_package/plugins/plugins.cmake index cec023f3062513..3f83954cfa7f8c 100644 --- a/cmake/developer_package/plugins/plugins.cmake +++ b/cmake/developer_package/plugins/plugins.cmake @@ -20,19 +20,18 @@ endif() # # ie_add_plugin(NAME # DEVICE_NAME -# SOURCES -# OBJECT_LIBRARIES -# VERSION_DEFINES_FOR -# SKIP_INSTALL +# [PSEUDO] +# [DEFAULT_CONFIG ] +# [SOURCES ] +# [OBJECT_LIBRARIES ] +# [VERSION_DEFINES_FOR ] +# [SKIP_INSTALL] # ) # function(ie_add_plugin) - set(options - SKIP_INSTALL - ADD_CLANG_FORMAT - ) + set(options SKIP_INSTALL ADD_CLANG_FORMAT PSEUDO_PLUGIN) set(oneValueArgs NAME DEVICE_NAME VERSION_DEFINES_FOR) - set(multiValueArgs SOURCES OBJECT_LIBRARIES CPPLINT_FILTERS) + set(multiValueArgs DEFAULT_CONFIG SOURCES OBJECT_LIBRARIES CPPLINT_FILTERS) cmake_parse_arguments(IE_PLUGIN "${options}" "${oneValueArgs}" "${multiValueArgs}" ${ARGN}) if(NOT IE_PLUGIN_NAME) @@ -45,41 +44,73 @@ function(ie_add_plugin) # create and configure target - if(IE_PLUGIN_VERSION_DEFINES_FOR) - addVersionDefines(${IE_PLUGIN_VERSION_DEFINES_FOR} CI_BUILD_NUMBER) - endif() + if(NOT IE_PLUGIN_PSEUDO_PLUGIN) + if(IE_PLUGIN_VERSION_DEFINES_FOR) + addVersionDefines(${IE_PLUGIN_VERSION_DEFINES_FOR} CI_BUILD_NUMBER) + endif() - set(input_files ${IE_PLUGIN_SOURCES}) - foreach(obj_lib IN LISTS IE_PLUGIN_OBJECT_LIBRARIES) - list(APPEND input_files $) - add_cpplint_target(${obj_lib}_cpplint FOR_TARGETS ${obj_lib}) - endforeach() + set(input_files ${IE_PLUGIN_SOURCES}) + foreach(obj_lib IN LISTS IE_PLUGIN_OBJECT_LIBRARIES) + list(APPEND input_files $) + add_cpplint_target(${obj_lib}_cpplint FOR_TARGETS ${obj_lib}) + endforeach() - add_library(${IE_PLUGIN_NAME} MODULE ${input_files}) - target_compile_definitions(${IE_PLUGIN_NAME} PRIVATE IMPLEMENT_INFERENCE_ENGINE_PLUGIN) + add_library(${IE_PLUGIN_NAME} MODULE ${input_files}) + target_compile_definitions(${IE_PLUGIN_NAME} PRIVATE IMPLEMENT_INFERENCE_ENGINE_PLUGIN) - ie_add_vs_version_file(NAME ${IE_PLUGIN_NAME} - FILEDESCRIPTION "Inference Engine ${IE_PLUGIN_DEVICE_NAME} device plugin library") + ie_add_vs_version_file(NAME ${IE_PLUGIN_NAME} + FILEDESCRIPTION "Inference Engine ${IE_PLUGIN_DEVICE_NAME} device plugin library") - if(TARGET IE::inference_engine_plugin_api) - target_link_libraries(${IE_PLUGIN_NAME} PRIVATE IE::inference_engine_plugin_api) - else() - target_link_libraries(${IE_PLUGIN_NAME} PRIVATE inference_engine_plugin_api) - endif() + if(TARGET IE::inference_engine_plugin_api) + target_link_libraries(${IE_PLUGIN_NAME} PRIVATE IE::inference_engine_plugin_api) + else() + target_link_libraries(${IE_PLUGIN_NAME} PRIVATE inference_engine_plugin_api) + endif() - if(WIN32) - set_target_properties(${IE_PLUGIN_NAME} PROPERTIES COMPILE_PDB_NAME ${IE_PLUGIN_NAME}) - endif() + if(WIN32) + set_target_properties(${IE_PLUGIN_NAME} PROPERTIES COMPILE_PDB_NAME ${IE_PLUGIN_NAME}) + endif() - set(custom_filter "") - foreach(filter IN LISTS IE_PLUGIN_CPPLINT_FILTERS) - string(CONCAT custom_filter "${custom_filter}" "," "${filter}") - endforeach() + set(custom_filter "") + foreach(filter IN LISTS IE_PLUGIN_CPPLINT_FILTERS) + string(CONCAT custom_filter "${custom_filter}" "," "${filter}") + endforeach() - if (IE_PLUGIN_ADD_CLANG_FORMAT) - add_clang_format_target(${IE_PLUGIN_NAME}_clang FOR_TARGETS ${IE_PLUGIN_NAME}) - else() - add_cpplint_target(${IE_PLUGIN_NAME}_cpplint FOR_TARGETS ${IE_PLUGIN_NAME} CUSTOM_FILTERS ${custom_filter}) + if (IE_PLUGIN_ADD_CLANG_FORMAT) + add_clang_format_target(${IE_PLUGIN_NAME}_clang FOR_TARGETS ${IE_PLUGIN_NAME}) + else() + add_cpplint_target(${IE_PLUGIN_NAME}_cpplint FOR_TARGETS ${IE_PLUGIN_NAME} CUSTOM_FILTERS ${custom_filter}) + endif() + + add_dependencies(ie_plugins ${IE_PLUGIN_NAME}) + if(TARGET inference_engine_preproc) + add_dependencies(${IE_PLUGIN_NAME} inference_engine_preproc) + endif() + + # fake dependencies to build in the following order: + # IE -> IE readers -> IE inference plugins -> IE-based apps + if(TARGET inference_engine_ir_reader) + add_dependencies(${IE_PLUGIN_NAME} inference_engine_ir_reader) + endif() + if(TARGET inference_engine_ir_v7_reader) + add_dependencies(${IE_PLUGIN_NAME} inference_engine_ir_v7_reader) + endif() + if(TARGET onnx_ngraph_frontend) + add_dependencies(${IE_PLUGIN_NAME} onnx_ngraph_frontend) + endif() + if(TARGET paddlepaddle_ngraph_frontend) + add_dependencies(${IE_PLUGIN_NAME} paddlepaddle_ngraph_frontend) + endif() + + # install rules + if(NOT IE_PLUGIN_SKIP_INSTALL) + string(TOLOWER "${IE_PLUGIN_DEVICE_NAME}" install_component) + ie_cpack_add_component(${install_component} REQUIRED DEPENDS core) + + install(TARGETS ${IE_PLUGIN_NAME} + LIBRARY DESTINATION ${IE_CPACK_RUNTIME_PATH} + COMPONENT ${install_component}) + endif() endif() # check that plugin with such name is not registered @@ -98,33 +129,7 @@ function(ie_add_plugin) list(APPEND PLUGIN_FILES "${IE_PLUGIN_DEVICE_NAME}:${IE_PLUGIN_NAME}") set(PLUGIN_FILES "${PLUGIN_FILES}" CACHE INTERNAL "" FORCE) - - add_dependencies(ie_plugins ${IE_PLUGIN_NAME}) - if(TARGET inference_engine_preproc) - add_dependencies(${IE_PLUGIN_NAME} inference_engine_preproc) - endif() - - # fake dependencies to build in the following order: - # IE -> IE readers -> IE inference plugins -> IE-based apps - if(TARGET inference_engine_ir_reader) - add_dependencies(${IE_PLUGIN_NAME} inference_engine_ir_reader) - endif() - if(TARGET inference_engine_ir_v7_reader) - add_dependencies(${IE_PLUGIN_NAME} inference_engine_ir_v7_reader) - endif() - if(TARGET onnx_ngraph_frontend) - add_dependencies(${IE_PLUGIN_NAME} onnx_ngraph_frontend) - endif() - - # install rules - - if(NOT IE_PLUGIN_SKIP_INSTALL) - string(TOLOWER "${IE_PLUGIN_DEVICE_NAME}" install_component) - ie_cpack_add_component(${install_component} REQUIRED DEPENDS core) - - install(TARGETS ${IE_PLUGIN_NAME} - LIBRARY DESTINATION ${IE_CPACK_RUNTIME_PATH} COMPONENT ${install_component}) - endif() + set(${IE_PLUGIN_DEVICE_NAME}_CONFIG "${IE_PLUGIN_DEFAULT_CONFIG}" CACHE INTERNAL "" FORCE) endfunction() # @@ -168,7 +173,7 @@ macro(ie_register_plugins) list(GET name 1 name) # create plugin file - set(config_file_name "${CMAKE_BINARY_DIR}/plugins/${name}.xml") + set(config_file_name "${CMAKE_BINARY_DIR}/plugins/${device_name}.xml") ie_plugin_get_file_name(${name} library_name) add_custom_command(TARGET ${IE_REGISTER_MAIN_TARGET} POST_BUILD @@ -176,9 +181,10 @@ macro(ie_register_plugins) "${CMAKE_COMMAND}" -D "IE_CONFIG_OUTPUT_FILE=${config_file_name}" -D "IE_DEVICE_NAME=${device_name}" + -D "IE_PLUGIN_PROPERTIES=${${device_name}_CONFIG}" -D "IE_PLUGIN_LIBRARY_NAME=${library_name}" -P "${IEDevScripts_DIR}/plugins/create_plugin_file.cmake" - COMMENT "Register ${name} plugin" + COMMENT "Register ${device_name} device as ${library_name}" VERBATIM) list(APPEND plugin_files_local "${config_file_name}") diff --git a/cmake/features.cmake b/cmake/features.cmake index cf9fbebbe9410a..450b0d255ccc05 100644 --- a/cmake/features.cmake +++ b/cmake/features.cmake @@ -98,8 +98,6 @@ ie_dependent_option (ENABLE_FUNCTIONAL_TESTS "functional tests" ON "ENABLE_TESTS ie_dependent_option (ENABLE_SAMPLES "console samples are part of inference engine package" ON "NOT MINGW" OFF) -ie_dependent_option (ENABLE_SPEECH_DEMO "enable speech demo integration" ON "NOT APPLE;NOT ANDROID;X86_64" OFF) - ie_option (ENABLE_OPENCV "enables OpenCV" ON) ie_option (ENABLE_V7_SERIALIZE "enables serialization to IR v7" OFF) diff --git a/cmake/templates/InferenceEngineConfig.cmake.in b/cmake/templates/InferenceEngineConfig.cmake.in index 43408483f9af6e..c6a69a907f2874 100644 --- a/cmake/templates/InferenceEngineConfig.cmake.in +++ b/cmake/templates/InferenceEngineConfig.cmake.in @@ -25,92 +25,32 @@ @PACKAGE_INIT@ -# -# Common functions -# - if(NOT DEFINED CMAKE_FIND_PACKAGE_NAME) set(CMAKE_FIND_PACKAGE_NAME InferenceEngine) - set(_need_package_name_reset ON) + set(_ie_need_package_name_reset ON) endif() -# we have to use our own version of find_dependency because of support cmake 3.7 -macro(_ie_find_dependency dep) - set(cmake_fd_quiet_arg) - if(${CMAKE_FIND_PACKAGE_NAME}_FIND_QUIETLY) - set(cmake_fd_quiet_arg QUIET) - endif() - set(cmake_fd_required_arg) - if(${CMAKE_FIND_PACKAGE_NAME}_FIND_REQUIRED) - set(cmake_fd_required_arg REQUIRED) - endif() - - get_property(cmake_fd_alreadyTransitive GLOBAL PROPERTY - _CMAKE_${dep}_TRANSITIVE_DEPENDENCY) - - find_package(${dep} ${ARGN} - ${cmake_fd_quiet_arg} - ${cmake_fd_required_arg}) - - if(NOT DEFINED cmake_fd_alreadyTransitive OR cmake_fd_alreadyTransitive) - set_property(GLOBAL PROPERTY _CMAKE_${dep}_TRANSITIVE_DEPENDENCY TRUE) - endif() - - if(NOT ${dep}_FOUND) - set(${CMAKE_FIND_PACKAGE_NAME}_NOT_FOUND_MESSAGE "${CMAKE_FIND_PACKAGE_NAME} could not be found because dependency ${dep} could not be found.") - set(${CMAKE_FIND_PACKAGE_NAME}_FOUND False) - return() - endif() - - set(cmake_fd_required_arg) - set(cmake_fd_quiet_arg) -endmacro() - -function(_ie_target_no_deprecation_error) - if(NOT MSVC) - if(CMAKE_CXX_COMPILER_ID STREQUAL "Intel") - set(flags "-diag-warning=1786") - else() - set(flags "-Wno-error=deprecated-declarations") - endif() - if(CMAKE_CROSSCOMPILING) - set_target_properties(${ARGV} PROPERTIES - INTERFACE_LINK_OPTIONS "-Wl,--allow-shlib-undefined") - endif() - - set_target_properties(${ARGV} PROPERTIES INTERFACE_COMPILE_OPTIONS ${flags}) - endif() -endfunction() +# need to store current PACKAGE_PREFIX_DIR, because it's overwritten by sub-package one +set(_ie_package_prefix_dir "${PACKAGE_PREFIX_DIR}") -# -# Inference Engine config -# +include(CMakeFindDependencyMacro) -# need to store current PACKAGE_PREFIX_DIR, because it's overwritten by ngraph one -set(IE_PACKAGE_PREFIX_DIR "${PACKAGE_PREFIX_DIR}") +find_dependency(OpenVINO + PATHS "${CMAKE_CURRENT_LIST_DIR}" + NO_CMAKE_FIND_ROOT_PATH + NO_DEFAULT_PATH) -set(THREADING "@THREADING@") -if(THREADING STREQUAL "TBB" OR THREADING STREQUAL "TBB_AUTO" AND NOT TBB_FOUND) - set_and_check(_tbb_dir "@PACKAGE_IE_TBB_DIR@") - _ie_find_dependency(TBB - COMPONENTS tbb tbbmalloc - CONFIG - PATHS ${TBBROOT}/cmake - ${_tbb_dir} - NO_CMAKE_FIND_ROOT_PATH - NO_DEFAULT_PATH) +# create targets with old names for compatibility +if(TARGET openvino::runtime AND NOT TARGET IE::inference_engine) + add_library(IE::inference_engine INTERFACE IMPORTED) + set_target_properties(IE::inference_engine PROPERTIES + INTERFACE_LINK_LIBRARIES openvino::runtime) endif() -set_and_check(_ngraph_dir "@PACKAGE_IE_NGRAPH_DIR@") -_ie_find_dependency(ngraph - CONFIG - PATHS ${_ngraph_dir} - NO_CMAKE_FIND_ROOT_PATH - NO_DEFAULT_PATH) - -if(NOT TARGET inference_engine) - set(_ie_as_external_package ON) - include("${CMAKE_CURRENT_LIST_DIR}/InferenceEngineTargets.cmake") +if(TARGET openvino::runtime::c AND NOT TARGET IE::inference_engine_c_api) + add_library(IE::inference_engine_c_api INTERFACE IMPORTED) + set_target_properties(IE::inference_engine_c_api PROPERTIES + INTERFACE_LINK_LIBRARIES openvino::runtime::c) endif() # mark components as available @@ -126,29 +66,21 @@ unset(InferenceEngine_LIBRARIES) foreach(comp IN LISTS ${CMAKE_FIND_PACKAGE_NAME}_FIND_COMPONENTS) # check if the component is available if(${CMAKE_FIND_PACKAGE_NAME}_${comp}_FOUND) - set(pcomp ${comp}) - if(_ie_as_external_package) - set(pcomp IE::${comp}) - endif() + set(pcomp IE::${comp}) list(APPEND InferenceEngine_LIBRARIES ${pcomp}) endif() endforeach() -if(_ie_as_external_package) - _ie_target_no_deprecation_error(${InferenceEngine_LIBRARIES}) -endif() -unset(_ie_as_external_package) - # restore PACKAGE_PREFIX_DIR -set(PACKAGE_PREFIX_DIR ${IE_PACKAGE_PREFIX_DIR}) -unset(IE_PACKAGE_PREFIX_DIR) +set(PACKAGE_PREFIX_DIR ${_ie_package_prefix_dir}) +unset(_ie_package_prefix_dir) set_and_check(InferenceEngine_INCLUDE_DIRS "@PACKAGE_IE_INCLUDE_DIR@") check_required_components(${CMAKE_FIND_PACKAGE_NAME}) -if(_need_package_name_reset) +if(_ie_need_package_name_reset) unset(CMAKE_FIND_PACKAGE_NAME) - unset(_need_package_name_reset) + unset(_ie_need_package_name_reset) endif() diff --git a/cmake/templates/InferenceEngineDeveloperPackageConfig.cmake.in b/cmake/templates/InferenceEngineDeveloperPackageConfig.cmake.in index 72af5ca89cadac..0f1f0d5e374f01 100644 --- a/cmake/templates/InferenceEngineDeveloperPackageConfig.cmake.in +++ b/cmake/templates/InferenceEngineDeveloperPackageConfig.cmake.in @@ -44,10 +44,30 @@ find_dependency(InferenceEngine NO_CMAKE_FIND_ROOT_PATH NO_DEFAULT_PATH) +find_dependency(ngraph + PATHS "${CMAKE_CURRENT_LIST_DIR}/ngraph" + NO_CMAKE_FIND_ROOT_PATH + NO_DEFAULT_PATH) + +# create targets with old names for compatibility +if(TARGET openvino::core AND NOT TARGET IE::core) + add_library(IE::core INTERFACE IMPORTED) + set_target_properties(IE::core PROPERTIES + INTERFACE_LINK_LIBRARIES openvino::core) +endif() + +if(TARGET openvino::runtime AND NOT TARGET IE::runtime) + add_library(IE::runtime INTERFACE IMPORTED) + set_target_properties(IE::runtime PROPERTIES + INTERFACE_LINK_LIBRARIES openvino::runtime) +endif() + # WA for cmake: it exports ngraph as IE::ngraph in the IE export list # while we already have ngraph export in its own export list as ngraph::ngraph -set_property(TARGET ngraph::ngraph PROPERTY IMPORTED_GLOBAL TRUE) -add_library(IE::ngraph ALIAS ngraph::ngraph) +if(TARGET ngraph::ngraph AND NOT TARGET IE::ngraph) + add_library(IE::ngraph INTERFACE IMPORTED) + set_target_properties(IE::ngraph PROPERTIES INTERFACE_LINK_LIBRARIES ngraph::ngraph) +endif() foreach(component @openvino_export_components@) include("${CMAKE_CURRENT_LIST_DIR}/${component}_dev_targets.cmake") diff --git a/cmake/templates/OpenVINOConfig-version.cmake.in b/cmake/templates/OpenVINOConfig-version.cmake.in new file mode 100644 index 00000000000000..4e42995a83013f --- /dev/null +++ b/cmake/templates/OpenVINOConfig-version.cmake.in @@ -0,0 +1,21 @@ +# Copyright (C) 2018-2021 Intel Corporation +# SPDX-License-Identifier: Apache-2.0 +# + +set(PACKAGE_VERSION_MAJOR @IE_VERSION_MAJOR@) +set(PACKAGE_VERSION_MINOR @IE_VERSION_MINOR@) +set(PACKAGE_VERSION_PATCH @IE_VERSION_PATCH@) +set(PACKAGE_VERSION "${PACKAGE_VERSION_MAJOR}.${PACKAGE_VERSION_MINOR}.${PACKAGE_VERSION_PATCH}") + +set(PACKAGE_VERSION_EXACT False) +set(PACKAGE_VERSION_COMPATIBLE False) + +if(PACKAGE_FIND_VERSION VERSION_EQUAL PACKAGE_VERSION) + set(PACKAGE_VERSION_EXACT True) + set(PACKAGE_VERSION_COMPATIBLE True) +endif() + +if(PACKAGE_FIND_VERSION_MAJOR EQUAL PACKAGE_VERSION_MAJOR AND + PACKAGE_FIND_VERSION VERSION_LESS PACKAGE_VERSION) + set(PACKAGE_VERSION_COMPATIBLE True) +endif() diff --git a/cmake/templates/OpenVINOConfig.cmake.in b/cmake/templates/OpenVINOConfig.cmake.in new file mode 100644 index 00000000000000..14fc57b36c258d --- /dev/null +++ b/cmake/templates/OpenVINOConfig.cmake.in @@ -0,0 +1,203 @@ +# Copyright (C) 2018-2021 Intel Corporation +# SPDX-License-Identifier: Apache-2.0 +# +# FindOpenVINO +# ------ +# +# Provides OpenVINO runtime for model creation and inference, frontend libraries +# to convert models from framework specific formats. +# +# The following components are supported: +# +# * `Runtime`: OpenVINO C++ and C Core & Inference Runtime, frontend manager +# * `ONNX`: OpenVINO ONNX frontend +# * `PaddlePaddle`: OpenVINO PaddlePaddle frontend +# +# If no components are specified, `Runtime` component is provided: +# +# find_package(OpenVINO REQUIRED) # only Runtime component +# +# If specific components are required: +# +# find_package(OpenVINO REQUIRED COMPONENTS Runtime ONNX) +# +# Imported Targets: +# ------ +# +# Runtime targets: +# +# `openvino::runtime` +# The OpenVINO C++ Core & Inference Runtime +# +# `openvino::runtime::c` +# The OpenVINO C Inference Runtime +# +# `openvino::core` +# The OpenVINO C++ Core Runtime +# +# Frontend specific targets: +# +# `openvino::frontend::manager` +# OpenVINO frontend manager +# +# `openvino::frontend::onnx` +# ONNX FrontEnd target (optional) +# +# `openvino::frontend::paddlepaddle` +# PaddlePaddle FrontEnd target (optional) +# +# Result variables: +# ------ +# +# The module sets the following variables in your project: +# +# `OpenVINO_FOUND` +# System has OpenVINO Runtime installed +# +# `OpenVINO_Runtime_FOUND` +# OpenVINO C++ Core & Inference Runtime is available +# +# `OpenVINO_Frontend_ONNX_FOUND` +# OpenVINO ONNX frontend is available +# +# `OpenVINO_Frontend_PaddlePaddle_FOUND` +# OpenVINO PaddlePaddle frontend is available +# +# OpenVINO version variables: +# +# `OpenVINO_VERSION_MAJOR` +# Major version component +# +# `OpenVINO_VERSION_MINOR` +# minor version component +# +# `OpenVINO_VERSION_PATCH` +# Patch version component +# + +@PACKAGE_INIT@ + +# +# Common functions +# + +if(NOT DEFINED CMAKE_FIND_PACKAGE_NAME) + set(CMAKE_FIND_PACKAGE_NAME OpenVINO) + set(_need_package_name_reset ON) +endif() + +# we have to use our own version of find_dependency because of support cmake 3.7 +macro(_ov_find_dependency dep) + set(cmake_fd_quiet_arg) + if(${CMAKE_FIND_PACKAGE_NAME}_FIND_QUIETLY) + set(cmake_fd_quiet_arg QUIET) + endif() + set(cmake_fd_required_arg) + if(${CMAKE_FIND_PACKAGE_NAME}_FIND_REQUIRED) + set(cmake_fd_required_arg REQUIRED) + endif() + + get_property(cmake_fd_alreadyTransitive GLOBAL PROPERTY + _CMAKE_${dep}_TRANSITIVE_DEPENDENCY) + + find_package(${dep} ${ARGN} + ${cmake_fd_quiet_arg} + ${cmake_fd_required_arg}) + + if(NOT DEFINED cmake_fd_alreadyTransitive OR cmake_fd_alreadyTransitive) + set_property(GLOBAL PROPERTY _CMAKE_${dep}_TRANSITIVE_DEPENDENCY TRUE) + endif() + + if(NOT ${dep}_FOUND) + set(${CMAKE_FIND_PACKAGE_NAME}_NOT_FOUND_MESSAGE "${CMAKE_FIND_PACKAGE_NAME} could not be found because dependency ${dep} could not be found.") + set(${CMAKE_FIND_PACKAGE_NAME}_FOUND False) + return() + endif() + + set(cmake_fd_required_arg) + set(cmake_fd_quiet_arg) +endmacro() + +function(_ov_target_no_deprecation_error) + if(NOT MSVC) + if(CMAKE_CXX_COMPILER_ID STREQUAL "Intel") + set(flags "-diag-warning=1786") + else() + set(flags "-Wno-error=deprecated-declarations") + endif() + if(CMAKE_CROSSCOMPILING) + set_target_properties(${ARGV} PROPERTIES + INTERFACE_LINK_OPTIONS "-Wl,--allow-shlib-undefined") + endif() + + set_target_properties(${ARGV} PROPERTIES INTERFACE_COMPILE_OPTIONS ${flags}) + endif() +endfunction() + +# +# OpenVINO config +# + +# need to store current PACKAGE_PREFIX_DIR, because it's overwritten by sub-package one +set(_ov_package_prefix_dir "${PACKAGE_PREFIX_DIR}") + +set(THREADING "@THREADING@") +if(THREADING STREQUAL "TBB" OR THREADING STREQUAL "TBB_AUTO" AND NOT TBB_FOUND) + set_and_check(_tbb_dir "@PACKAGE_IE_TBB_DIR@") + _ov_find_dependency(TBB + COMPONENTS tbb tbbmalloc + CONFIG + PATHS ${TBBROOT}/cmake + ${_tbb_dir} + NO_CMAKE_FIND_ROOT_PATH + NO_DEFAULT_PATH) +endif() + +if(NOT TARGET inference_engine) + set(_ov_as_external_package ON) + include("${CMAKE_CURRENT_LIST_DIR}/OpenVINOTargets.cmake") +endif() + +# +# Components +# + +set(${CMAKE_FIND_PACKAGE_NAME}_Runtime_FOUND ON) + +set(${CMAKE_FIND_PACKAGE_NAME}_ONNX_FOUND @NGRAPH_ONNX_FRONTEND_ENABLE@) +set(${CMAKE_FIND_PACKAGE_NAME}_PaddlePaddle_FOUND @NGRAPH_PDPD_FRONTEND_ENABLE@) + +set(${CMAKE_FIND_PACKAGE_NAME}_Frontend_ONNX_FOUND ${${CMAKE_FIND_PACKAGE_NAME}_ONNX_FOUND}) +set(${CMAKE_FIND_PACKAGE_NAME}_Frontend_PaddlePaddle_FOUND ${${CMAKE_FIND_PACKAGE_NAME}_PaddlePaddle_FOUND}) + +# if no components specified, only Runtime is provided +if(NOT ${CMAKE_FIND_PACKAGE_NAME}_FIND_COMPONENTS) + set(${CMAKE_FIND_PACKAGE_NAME}_FIND_COMPONENTS Runtime) +endif() + +# +# Apply common functions +# + +foreach(target openvino::runtime openvino::runtime::c openvino::core + openvino::frontend::manager openvino::frontend::onnx + openvino::frontend::paddlepaddle) + if(TARGET ${target} AND _ov_as_external_package) + _ov_target_no_deprecation_error(${target}) + endif() +endforeach() +unset(_ov_as_external_package) + +# restore PACKAGE_PREFIX_DIR +set(PACKAGE_PREFIX_DIR ${_ov_package_prefix_dir}) +unset(_ov_package_prefix_dir) + +check_required_components(${CMAKE_FIND_PACKAGE_NAME}) + +if(_need_package_name_reset) + unset(CMAKE_FIND_PACKAGE_NAME) + unset(_need_package_name_reset) +endif() + +unset(${CMAKE_FIND_PACKAGE_NAME}_PaddlePaddle_FOUND) +unset(${CMAKE_FIND_PACKAGE_NAME}_ONNX_FOUND) diff --git a/cmake/templates/ngraphConfig.cmake.in b/cmake/templates/ngraphConfig.cmake.in index 1c17cbeb4cc49a..a94e6b50a58933 100644 --- a/cmake/templates/ngraphConfig.cmake.in +++ b/cmake/templates/ngraphConfig.cmake.in @@ -37,28 +37,56 @@ @PACKAGE_INIT@ -if(NOT TARGET ngraph) - include("${CMAKE_CURRENT_LIST_DIR}/ngraphTargets.cmake") +include(CMakeFindDependencyMacro) + +find_dependency(OpenVINO + PATHS "${CMAKE_CURRENT_LIST_DIR}" + "${CMAKE_CURRENT_LIST_DIR}/ngraph" + NO_CMAKE_FIND_ROOT_PATH + NO_DEFAULT_PATH) + +# create targets with old names for compatibility +if(TARGET openvino::core AND NOT TARGET ngraph::ngraph) + add_library(ngraph::ngraph INTERFACE IMPORTED) + set_target_properties(ngraph::ngraph PROPERTIES + INTERFACE_LINK_LIBRARIES openvino::core) +endif() + +if(TARGET openvino::frontend::manager AND NOT TARGET ngraph::frontend_manager) + add_library(ngraph::frontend_manager INTERFACE IMPORTED) + set_target_properties(ngraph::frontend_manager PROPERTIES + INTERFACE_LINK_LIBRARIES openvino::frontend::manager) +endif() + +if(TARGET openvino::frontend::onnx AND NOT TARGET ngraph::onnx_ngraph_frontend) + add_library(ngraph::onnx_ngraph_frontend INTERFACE IMPORTED) + set_target_properties(ngraph::onnx_ngraph_frontend PROPERTIES + INTERFACE_LINK_LIBRARIES openvino::frontend::onnx) +endif() + +if(TARGET openvino::frontend::paddlepaddle AND NOT TARGET ngraph::paddlepaddle_ngraph_frontend) + add_library(ngraph::paddlepaddle_ngraph_frontend INTERFACE IMPORTED) + set_target_properties(ngraph::paddlepaddle_ngraph_frontend PROPERTIES + INTERFACE_LINK_LIBRARIES openvino::frontend::paddlepaddle) endif() set(ngraph_ngraph_FOUND ON) set(NGRAPH_LIBRARIES ngraph::ngraph) -set(ngraph_onnx_ngraph_frontend_FOUND @NGRAPH_ONNX_FRONTEND_ENABLE@) +set(ngraph_onnx_ngraph_frontend_FOUND ${OpenVINO_Frontend_ONNX_FOUND}) +set(ngraph_onnx_importer_FOUND ${OpenVINO_Frontend_ONNX_FOUND}) -# ngraph::onnx_importer target and variables are deprecated -set(ngraph_onnx_importer_FOUND @NGRAPH_ONNX_FRONTEND_ENABLE@) if(ngraph_onnx_importer_FOUND) set(ONNX_IMPORTER_LIBRARIES ngraph::onnx_ngraph_frontend) + # ngraph::onnx_importer target and variables are deprecated + # but need to create a dummy target for BW compatibility if(NOT TARGET ngraph::onnx_importer) add_library(ngraph::onnx_importer INTERFACE IMPORTED) set_target_properties(ngraph::onnx_importer PROPERTIES - INTERFACE_LINK_LIBRARIES ngraph::onnx_ngraph_frontend - ) + INTERFACE_LINK_LIBRARIES ngraph::onnx_ngraph_frontend) endif() endif() -set(ngraph_paddlepaddle_frontend_FOUND @NGRAPH_PDPD_FRONTEND_ENABLE@) -set(ir_frontend_FOUND @IR_FRONTEND_ENABLE@) +set(ngraph_paddlepaddle_frontend_FOUND ${OpenVINO_Frontend_PaddlePaddle_FOUND}) check_required_components(ngraph) diff --git a/docs/CMakeLists.txt b/docs/CMakeLists.txt index 4d3135903de149..6eac6cc9ecbffa 100644 --- a/docs/CMakeLists.txt +++ b/docs/CMakeLists.txt @@ -9,20 +9,12 @@ if(NOT ENABLE_DOCKER) add_subdirectory(snippets) - # Detect nGraph - find_package(ngraph QUIET - PATHS "${CMAKE_BINARY_DIR}/ngraph" - NO_DEFAULT_PATH) - if(NOT ngraph_FOUND) - set(ngraph_DIR ${CMAKE_BINARY_DIR}/ngraph) - endif() - - # Detect InferenceEngine - find_package(InferenceEngine QUIET + # Detect OpenVINO + find_package(OpenVINO QUIET PATHS "${CMAKE_BINARY_DIR}" NO_DEFAULT_PATH) - if(NOT InferenceEngine_FOUND) - set(InferenceEngine_DIR ${CMAKE_BINARY_DIR}) + if(NOT OpenVINO_FOUND) + set(OpenVINO_DIR ${CMAKE_BINARY_DIR}) endif() if(NGRAPH_ONNX_FRONTEND_ENABLE) @@ -72,7 +64,7 @@ function(build_docs) set(C_API "${IE_SOURCE_DIR}/ie_bridges/c/include") set(PLUGIN_API_DIR "${DOCS_BUILD_DIR}/IE_PLUGIN_DG") set(NGRAPH_DIR "${OpenVINO_SOURCE_DIR}/ngraph") - set(NGRAPH_PY_DIR "${NGRAPH_DIR}/python/src/ngraph/") + set(NGRAPH_PY_DIR "${OpenVINO_SOURCE_DIR}/runtime/bindings/python/src/compatibility/ngraph/") set(NGRAPH_CPP_DIR "${NGRAPH_DIR}/core/include/" "${NGRAPH_DIR}/frontend/onnx_import/include") # Preprocessing scripts diff --git a/docs/HOWTO/Custom_Layers_Guide.md b/docs/HOWTO/Custom_Layers_Guide.md index cda4ed1c968f47..4bea76f5902baa 100644 --- a/docs/HOWTO/Custom_Layers_Guide.md +++ b/docs/HOWTO/Custom_Layers_Guide.md @@ -313,7 +313,7 @@ operation for the CPU plugin. The code of the library is described in the [Exte To build the extension, run the following:
```bash mkdir build && cd build -source /opt/intel/openvino_2021/bin/setupvars.sh +source /opt/intel/openvino_2022/setupvars.sh cmake .. -DCMAKE_BUILD_TYPE=Release make --jobs=$(nproc) ``` diff --git a/docs/IE_DG/Cross_Check_Tool.md b/docs/IE_DG/Cross_Check_Tool.md index 495afa790fcccc..d53d3dddfe47de 100644 --- a/docs/IE_DG/Cross_Check_Tool.md +++ b/docs/IE_DG/Cross_Check_Tool.md @@ -8,11 +8,11 @@ The Cross Check Tool can compare metrics per layer or all over the model. On Linux* OS, before running the Cross Check Tool binary, make sure your application can find the Deep Learning Inference Engine libraries. -Navigate to the `/deployment_tools/inference_engine/bin` folder and run the `setvars.sh` script to +Navigate to the `` folder and run the `setupvars.sh` script to set all necessary environment variables: ```sh -source setvars.sh +source setupvars.sh ``` ## Running the Cross Check Tool diff --git a/docs/IE_DG/Deep_Learning_Inference_Engine_DevGuide.md b/docs/IE_DG/Deep_Learning_Inference_Engine_DevGuide.md index 0f07f5503811f5..e2960c5dd87394 100644 --- a/docs/IE_DG/Deep_Learning_Inference_Engine_DevGuide.md +++ b/docs/IE_DG/Deep_Learning_Inference_Engine_DevGuide.md @@ -1,7 +1,5 @@ # Inference Engine Developer Guide {#openvino_docs_IE_DG_Deep_Learning_Inference_Engine_DevGuide} -> **NOTE:** [Intel® System Studio](https://software.intel.com/content/www/us/en/develop/tools/oneapi/commercial-base-iot.html) (click "Intel® System Studio Users" tab) is an all-in-one, cross-platform tool suite, purpose-built to simplify system bring-up and improve system and IoT device application performance on Intel® platforms. If you are using the Intel® Distribution of OpenVINO™ with Intel® System Studio, go to [Get Started with Intel® System Studio](https://software.intel.com/en-us/articles/get-started-with-openvino-and-intel-system-studio-2019). - This Guide provides an overview of the Inference Engine describing the typical workflow for performing inference of a pre-trained and optimized deep learning model and a set of sample applications. > **NOTE:** Before you perform inference with the Inference Engine, your models should be converted to the Inference Engine format using the Model Optimizer or built directly in runtime using nGraph API. To learn about how to use Model Optimizer, refer to the [Model Optimizer Developer Guide](../MO_DG/Deep_Learning_Model_Optimizer_DevGuide.md). To learn about the pre-trained and optimized models delivered with the OpenVINO™ toolkit, refer to [Pre-Trained Models](@ref omz_models_group_intel). @@ -81,9 +79,9 @@ Make sure those libraries are in your computer's path or in the place you pointe * Windows: `PATH` * macOS: `DYLD_LIBRARY_PATH` -On Linux and macOS, use the script `bin/setupvars.sh` to set the environment variables. +On Linux and macOS, use the script `setupvars.sh` to set the environment variables. -On Windows, run the `bin\setupvars.bat` batch file to set the environment variables. +On Windows, run the `setupvars.bat` batch file to set the environment variables. To learn more about supported devices and corresponding plugins, see the [Supported Devices](supported_plugins/Supported_Devices.md) chapter. @@ -111,10 +109,8 @@ The common workflow contains the following steps: 8. **Get the output** - After inference is completed, get the output memory or read the memory you provided earlier. Do this with the `InferenceEngine::IInferRequest::GetBlob()` method. ## Video: Inference Engine Concept -[![](https://img.youtube.com/vi/e6R13V8nbak/0.jpg)](https://www.youtube.com/watch?v=e6R13V8nbak) -\htmlonly + -\endhtmlonly ## Further Reading diff --git a/docs/IE_DG/Extensibility_DG/AddingNGraphOps.md b/docs/IE_DG/Extensibility_DG/AddingNGraphOps.md index 8ca911f7d0cda9..ed4d65595320a5 100644 --- a/docs/IE_DG/Extensibility_DG/AddingNGraphOps.md +++ b/docs/IE_DG/Extensibility_DG/AddingNGraphOps.md @@ -1,6 +1,6 @@ # Custom nGraph Operation {#openvino_docs_IE_DG_Extensibility_DG_AddingNGraphOps} -Inference Engine Extension API allows you to register operation sets (opsets) with custom nGraph operations to support models with operations which OpenVINO™ does not support out-of-the-box. +The Inference Engine Extension API allows you to register operation sets (opsets) with custom nGraph operations to support models with operations that OpenVINO™ does not support out-of-the-box. ## Operation Class diff --git a/docs/IE_DG/Extensibility_DG/Building.md b/docs/IE_DG/Extensibility_DG/Building.md index d1f62cb53a822d..be93c5a06d3bc9 100644 --- a/docs/IE_DG/Extensibility_DG/Building.md +++ b/docs/IE_DG/Extensibility_DG/Building.md @@ -14,6 +14,6 @@ To build an extension library, run the commands below: $ cd template_extension $ mkdir build $ cd build -$ cmake -DInferenceEngine_DIR=[IE_DIR] -Dngraph_DIR=[NGRAPH_DIR] ../ +$ cmake -DOpenVINO_DIR=[OpenVINO_DIR] ../ $ cmake --build . ``` diff --git a/docs/IE_DG/Extensibility_DG/Custom_ONNX_Ops.md b/docs/IE_DG/Extensibility_DG/Custom_ONNX_Ops.md index a9a9841cac457f..eb7183f0dc246b 100644 --- a/docs/IE_DG/Extensibility_DG/Custom_ONNX_Ops.md +++ b/docs/IE_DG/Extensibility_DG/Custom_ONNX_Ops.md @@ -50,8 +50,8 @@ The example below demonstrates how to unregister an operator from the destructor ## Requirements for Building with CMake -A program that uses the `register_operator` functionality requires `ngraph::ngraph` and `ngraph::onnx_ngraph_frontend` libraries in addition to the Inference Engine. -The `onnx_ngraph_frontend` is a component of the `ngraph` package , so `find_package(ngraph REQUIRED COMPONENTS onnx_ngraph_frontend)` can find both. +A program that uses the `register_operator` functionality requires `openvino::core` and `openvino::frontend::onnx` libraries in addition to the OpenVINO Inference Runtime. +The `onnx_ngraph_frontend` is a component of the `OpenVINO` package , so `find_package(OpenVINO REQUIRED COMPONENTS ONNX)` can find both. Those libraries need to be passed to the `target_link_libraries` command in the CMakeLists.txt file. See CMakeLists.txt below for reference: diff --git a/docs/IE_DG/Extensibility_DG/Extension.md b/docs/IE_DG/Extensibility_DG/Extension.md index 178d0099df68ee..e941cb9c13c1a8 100644 --- a/docs/IE_DG/Extensibility_DG/Extension.md +++ b/docs/IE_DG/Extensibility_DG/Extension.md @@ -25,5 +25,6 @@ Also, an `Extension` object should implement the following methods: Implement the InferenceEngine::IExtension::getOpSets method if the extension contains custom layers. Read [Custom nGraph Operation](AddingNGraphOps.md) for more information. -To integrate execution kernels to the extension library, read [How to Implement Custom CPU Operations](CPU_Kernel.md). -To register a custom ONNX\* operator to the extension library, read [Custom ONNX Operators](Custom_ONNX_Ops.md). +To understand how to integrate execution kernels to the extension library, read the [documentation about development of custom CPU kernels](CPU_Kernel.md). + +To understand how to register custom ONNX operator to the extension library, read the [documentation about custom ONNX operators](Custom_ONNX_Ops.md). diff --git a/docs/IE_DG/Extensibility_DG/GPU_Kernel.md b/docs/IE_DG/Extensibility_DG/GPU_Kernel.md index d9fd809f8e4227..f206c2c0bcb41e 100644 --- a/docs/IE_DG/Extensibility_DG/GPU_Kernel.md +++ b/docs/IE_DG/Extensibility_DG/GPU_Kernel.md @@ -4,7 +4,7 @@ The GPU codepath abstracts many details about OpenCL\*. You need to provide the There are two options of using the custom operation configuration file: -* Include a section with your kernels into the global automatically-loaded `cldnn_global_custom_kernels/cldnn_global_custom_kernels.xml` file, which is hosted in the `/deployment_tools/inference_engine/bin/intel64/{Debug/Release}` folder +* Include a section with your kernels into the global automatically-loaded `cldnn_global_custom_kernels/cldnn_global_custom_kernels.xml` file, which is hosted in the `/runtime/bin` folder * Call the `InferenceEngine::Core::SetConfig()` method from your application with the `InferenceEngine::PluginConfigParams::KEY_CONFIG_FILE` key and the configuration file name as a value before loading the network that uses custom operations to the plugin: @snippet snippets/GPU_Kernel.cpp part0 diff --git a/docs/IE_DG/Extensibility_DG/VPU_Kernel.md b/docs/IE_DG/Extensibility_DG/VPU_Kernel.md index 033097598317bf..4dca14ce50233e 100644 --- a/docs/IE_DG/Extensibility_DG/VPU_Kernel.md +++ b/docs/IE_DG/Extensibility_DG/VPU_Kernel.md @@ -15,18 +15,18 @@ To customize your topology with an OpenCL layer, follow the steps below: > **NOTE:** OpenCL compiler, targeting Intel® Neural Compute Stick 2 for the SHAVE* processor only, is redistributed with OpenVINO. OpenCL support is provided by ComputeAorta*, and is distributed under a license agreement between Intel® and Codeplay* Software Ltd. -The OpenCL toolchain for the Intel® Neural Compute Stick 2 supports offline compilation only, so first compile OpenCL C code using the standalone `clc` compiler. You can find the compiler binary at `/deployment_tools/tools/cl_compiler`. +The OpenCL toolchain for the Intel® Neural Compute Stick 2 supports offline compilation only, so first compile OpenCL C code using the standalone `clc` compiler. You can find the compiler binary at `/tools/cl_compiler`. > **NOTE:** By design, custom OpenCL layers support any OpenCL kernels written with 1.2 version assumed. It also supports half float extension and is optimized for this type, because it is a native type for Intel® Movidius™ VPUs. 1. Prior to running a compilation, make sure that the following variables are set: - * `SHAVE_MA2X8XLIBS_DIR=/deployment_tools/tools/cl_compiler/lib/` - * `SHAVE_LDSCRIPT_DIR=/deployment_tools/tools/cl_compiler/ldscripts/` - * `SHAVE_MYRIAD_LD_DIR=/deployment_tools/tools/cl_compiler/bin/` - * `SHAVE_MOVIASM_DIR=/deployment_tools/tools/cl_compiler/bin/` + * `SHAVE_MA2X8XLIBS_DIR=/tools/cl_compiler/lib/` + * `SHAVE_LDSCRIPT_DIR=/tools/cl_compiler/ldscripts/` + * `SHAVE_MYRIAD_LD_DIR=/tools/cl_compiler/bin/` + * `SHAVE_MOVIASM_DIR=/tools/cl_compiler/bin/` 2. Run the compilation with the command below. You should use `--strip-binary-header` to make an OpenCL runtime-agnostic binary runnable with the Inference Engine. ```bash -cd /deployment_tools/tools/cl_compiler/bin +cd /tools/cl_compiler/bin ./clc --strip-binary-header custom_layer.cl -o custom_layer.bin ``` diff --git a/docs/IE_DG/Glossary.md b/docs/IE_DG/Glossary.md index 41e2b1b1dab0b9..706e71591852d8 100644 --- a/docs/IE_DG/Glossary.md +++ b/docs/IE_DG/Glossary.md @@ -19,7 +19,6 @@ Glossary {#openvino_docs_IE_DG_Glossary} | ELU | Exponential Linear rectification Unit | | FCN | Fully Convolutional Network | | FP | Floating Point | -| FPGA | Field-Programmable Gate Array | | GCC | GNU Compiler Collection | | GPU | Graphics Processing Unit | | HD | High Definition | diff --git a/docs/IE_DG/InferenceEngine_QueryAPI.md b/docs/IE_DG/InferenceEngine_QueryAPI.md index 8588e00e5ceb62..90fa6bfb2cb17c 100644 --- a/docs/IE_DG/InferenceEngine_QueryAPI.md +++ b/docs/IE_DG/InferenceEngine_QueryAPI.md @@ -29,8 +29,6 @@ The function returns list of available devices, for example: ``` MYRIAD.1.2-ma2480 MYRIAD.1.4-ma2480 -FPGA.0 -FPGA.1 CPU GPU.0 GPU.1 diff --git a/docs/IE_DG/Int8Inference.md b/docs/IE_DG/Int8Inference.md index 889af6a53278b1..2577e7dc4ecab7 100644 --- a/docs/IE_DG/Int8Inference.md +++ b/docs/IE_DG/Int8Inference.md @@ -1,12 +1,5 @@ # Low-Precision 8-bit Integer Inference {#openvino_docs_IE_DG_Int8Inference} -## Table of Contents -1. [Supported devices](#supported-devices) -2. [Low-Precision 8-bit Integer Inference Workflow](#low-precision-8-bit-integer-inference-workflow) -3. [Prerequisites](#prerequisites) -4. [Inference](#inference) -5. [Results analysis](#results-analysis) - ## Supported devices Low-precision 8-bit inference is optimized for: @@ -24,34 +17,35 @@ Low-precision 8-bit inference is optimized for: ## Low-Precision 8-bit Integer Inference Workflow -8-bit computations (referred to as `int8`) offer better performance compared to the results of inference in higher precision (for example, `fp32`), because they allow loading more data into a single processor instruction. Usually the cost for significant boost is a reduced accuracy. However, it is proved that an accuracy drop can be negligible and depends on task requirements, so that the application engineer can set up the maximum accuracy drop that is acceptable. +8-bit computations (referred to as `int8`) offer better performance compared to the results of inference in higher precision (for example, `fp32`), because they allow loading more data into a single processor instruction. Usually the cost for significant boost is reduced accuracy. However, it is proved that an accuracy drop can be negligible and depends on task requirements, so that the application engineer can set up the maximum accuracy drop that is acceptable. For 8-bit integer computations, a model must be quantized. Quantized models can be downloaded from [Overview of OpenVINO™ Toolkit Intel's Pre-Trained Models](@ref omz_models_group_intel). If the model is not quantized, you can use the [Post-Training Optimization Tool](@ref pot_README) to quantize the model. The quantization process adds [FakeQuantize](../ops/quantization/FakeQuantize_1.md) layers on activations and weights for most layers. Read more about mathematical computations in the [Uniform Quantization with Fine-Tuning](https://github.com/openvinotoolkit/nncf/blob/develop/docs/compression_algorithms/Quantization.md). When you pass the quantized IR to the OpenVINO™ plugin, the plugin automatically recognizes it as a quantized model and performs 8-bit inference. Note, if you pass a quantized model to another plugin that does not support 8-bit inference but supports all operations from the model, the model is inferred in precision that this plugin supports. -In *Runtime stage* stage, the quantized model is loaded to the plugin. The plugin uses `Low Precision Transformation` component to update the model to infer it in low precision: - - Update `FakeQuantize` layers to have quantized output tensors in low precision range and add dequantization layers to compensate the update. Dequantization layers are pushed through as many layers as possible to have more layers in low precision. After that, most layers have quantized input tensors in low precision range and can be inferred in low precision. Ideally, dequantization layers should be fused in the next `FakeQuantize` layer. - - Weights are quantized and stored in `Constant` layers. +In *Runtime stage*, the quantized model is loaded to the plugin. The plugin uses the `Low Precision Transformation` component to update the model to infer it in low precision: + - Update `FakeQuantize` layers to have quantized output tensors in a low precision range and add dequantization layers to compensate the update. Dequantization layers are pushed through as many layers as possible to have more layers in low precision. After that, most layers quantized input tensors in the low precision range and can be inferred in low precision. Ideally, dequantization layers should be fused in the next `FakeQuantize` layer. + - Quantize weights and store them in `Constant` layers. ## Prerequisites -Let's explore quantized [TensorFlow* implementation of ResNet-50](https://github.com/openvinotoolkit/open_model_zoo/tree/master/models/public/resnet-50-tf) model. Use [Model Downloader](@ref omz_tools_downloader) tool to download the `fp16` model from [OpenVINO™ Toolkit - Open Model Zoo repository](https://github.com/openvinotoolkit/open_model_zoo): +Let's explore the quantized [TensorFlow* implementation of ResNet-50](https://github.com/openvinotoolkit/open_model_zoo/tree/master/models/public/resnet-50-tf) model. Use the [Model Downloader](@ref omz_tools_downloader) tool to download the `fp16` model from [OpenVINO™ Toolkit - Open Model Zoo repository](https://github.com/openvinotoolkit/open_model_zoo): ```sh -./downloader.py --name resnet-50-tf --precisions FP16-INT8 +cd $INTEL_OPENVINO_DIR/deployment_tools/tools/model_downloader +./downloader.py --name resnet-50-tf --precisions FP16-INT8 --output_dir ``` -After that you should quantize model by the [Model Quantizer](@ref omz_tools_downloader) tool. +After that, you should quantize the model by the [Model Quantizer](@ref omz_tools_downloader) tool. For the dataset, you can choose to download the ImageNet dataset from [here](https://www.image-net.org/download.php). ```sh -./quantizer.py --model_dir public/resnet-50-tf --dataset_dir --precisions=FP16-INT8 +./quantizer.py --model_dir --name public/resnet-50-tf --dataset_dir --precisions=FP16-INT8 ``` ## Inference -The simplest way to infer the model and collect performance counters is [C++ Benchmark Application](../../inference-engine/samples/benchmark_app/README.md). +The simplest way to infer the model and collect performance counters is the [C++ Benchmark Application](../../inference-engine/samples/benchmark_app/README.md). ```sh ./benchmark_app -m resnet-50-tf.xml -d CPU -niter 1 -api sync -report_type average_counters -report_folder pc_report_dir ``` -If you infer the model with the OpenVINO™ CPU plugin and collect performance counters, all operations (except last not quantized SoftMax) are executed in INT8 precision. +If you infer the model with the Inference Engine CPU plugin and collect performance counters, all operations (except the last non-quantized SoftMax) are executed in INT8 precision. ## Results analysis diff --git a/docs/IE_DG/Integrate_with_customer_application_new_API.md b/docs/IE_DG/Integrate_with_customer_application_new_API.md index 93482a9093887e..870d840c95cd21 100644 --- a/docs/IE_DG/Integrate_with_customer_application_new_API.md +++ b/docs/IE_DG/Integrate_with_customer_application_new_API.md @@ -35,7 +35,7 @@ Integration process includes the following steps: @snippet snippets/Integrate_with_customer_application_new_API.cpp part1 -**Or read the model from ONNX format** (.onnx and .prototxt are supported formats). You can find more information about the ONNX format support in the document [ONNX format support in the OpenVINO™](./ONNX_Support.md). +**Or read the model from ONNX format**. You can find more information about the ONNX format support in the document [ONNX format support in the OpenVINO™](./ONNX_Support.md). @snippet snippets/Integrate_with_customer_application_new_API.cpp part2 @@ -173,7 +173,7 @@ Note that casting `Blob` to `TBlob` via `std::dynamic_pointer_cast` is not the r ## Build Your Application For details about building your application, refer to the CMake files for the sample applications. -All samples source code is located in the `/openvino/inference_engine/samples` directory, where `INSTALL_DIR` is the OpenVINO™ installation directory. +All samples source code is located in the `/samples` directory, where `INSTALL_DIR` is the OpenVINO™ installation directory. ### CMake project creation @@ -193,14 +193,13 @@ build/ - build directory ``` cmake cmake_minimum_required(VERSION 3.0.0) project(project_name) -find_package(ngraph REQUIRED) -find_package(InferenceEngine REQUIRED) +find_package(OpenVINO REQUIRED) find_package(OpenCV REQUIRED) add_executable(${PROJECT_NAME} src/main.cpp) -target_link_libraries(${PROJECT_NAME} PRIVATE ${InferenceEngine_LIBRARIES} ${OpenCV_LIBS} ${NGRAPH_LIBRARIES}) +target_link_libraries(${PROJECT_NAME} PRIVATE openvino::runtime ${OpenCV_LIBS}) ``` 3. **To build your project** using CMake with the default build tools currently available on your machine, execute the following commands: -> **NOTE**: Make sure you set environment variables first by running `/bin/setupvars.sh` (or setupvars.bat for Windows)`. Otherwise the `InferenceEngine_DIR` and `OpenCV_DIR` variables won't be configured properly to pass `find_package` calls. +> **NOTE**: Make sure you set environment variables first by running `/setupvars.sh` (or setupvars.bat for Windows)`. Otherwise the `InferenceEngine_DIR` and `OpenCV_DIR` variables won't be configured properly to pass `find_package` calls. ```sh cd build/ cmake ../project diff --git a/docs/IE_DG/Intro_to_Performance.md b/docs/IE_DG/Intro_to_Performance.md index 48d1ea5c56cff0..ca360d0d06f1c0 100644 --- a/docs/IE_DG/Intro_to_Performance.md +++ b/docs/IE_DG/Intro_to_Performance.md @@ -34,7 +34,7 @@ Refer to the [Benchmark App](../../inference-engine/samples/benchmark_app/README ## Using Caching API for first inference latency optimization Since with the 2021.4 release, Inference Engine provides an ability to enable internal caching of loaded networks. This can significantly reduce load network latency for some devices at application startup. -Internally caching uses plugin's Export/ImportNetwork flow, like it is done for [Compile tool](../../inference-engine/tools/compile_tool/README.md), using the regular ReadNetwork/LoadNetwork API. +Internally caching uses plugin's Export/ImportNetwork flow, like it is done for [Compile tool](../../tools/compile_tool/README.md), using the regular ReadNetwork/LoadNetwork API. Refer to the [Model Caching Overview](Model_caching_overview.md) for more detailed explanation. ## Using Async API diff --git a/docs/IE_DG/Legal_Information.md b/docs/IE_DG/Legal_Information.md deleted file mode 100644 index 3b39dba5810fa4..00000000000000 --- a/docs/IE_DG/Legal_Information.md +++ /dev/null @@ -1,12 +0,0 @@ -# Legal Information {#openvino_docs_IE_DG_Legal_Information} - -No license (express or implied, by estoppel or otherwise) to any intellectual property rights is granted by this document.
-Intel disclaims all express and implied warranties, including without limitation, the implied warranties of merchantability, fitness for a particular purpose, and non-infringement, as well as any warranty arising from course of performance, course of dealing, or usage in trade.
-This document contains information on products, services and/or processes in development. All information provided here is subject to change without notice. Contact your Intel representative to obtain the latest forecast, schedule, specifications and roadmaps.
-The products and services described may contain defects or errors known as errata which may cause deviations from published specifications. Current characterized errata are available on request.
-Copies of documents which have an order number and are referenced in this document may be obtained by calling 1-800-548-4725 or by visiting [www.intel.com/design/literature.htm](http://www.intel.com/design/literature.htm).
-Intel, Intel logo, Intel Core, VTune, Xeon are trademarks of Intel Corporation in the U.S. and other countries.
-\* Other names and brands may be claimed as the property of others.
-Copyright © 2016-2018 Intel Corporation.
-This software and the related documents are Intel copyrighted materials, and your use of them is governed by the express license under which they were provided to you (License). Unless the License provides otherwise, you may not use, modify, copy, publish, distribute, disclose or transmit this software or the related documents without Intel's prior written permission.
-This software and the related documents are provided as is, with no express or implied warranties, other than those that are expressly stated in the License.
diff --git a/docs/IE_DG/Model_caching_overview.md b/docs/IE_DG/Model_caching_overview.md index 10d3d6cf99e302..d480d7626d9859 100644 --- a/docs/IE_DG/Model_caching_overview.md +++ b/docs/IE_DG/Model_caching_overview.md @@ -20,7 +20,7 @@ As described in [Inference Engine Developer Guide](Deep_Learning_Inference_Engin Step #5 can potentially perform several time-consuming device-specific optimizations and network compilations, and such delays can lead to bad user experience on application startup. To avoid this, some devices offer -Import/Export network capability, and it is possible to either use [Compile tool](../../inference-engine/tools/compile_tool/README.md) +Import/Export network capability, and it is possible to either use [Compile tool](../../tools/compile_tool/README.md) or enable model caching to export compiled network automatically. Reusing cached networks can significantly reduce load network time. diff --git a/docs/IE_DG/Samples_Overview.md b/docs/IE_DG/Samples_Overview.md index f9e21cf5e4dcce..db989aac76f596 100644 --- a/docs/IE_DG/Samples_Overview.md +++ b/docs/IE_DG/Samples_Overview.md @@ -3,9 +3,9 @@ The Inference Engine sample applications are simple console applications that show how to utilize specific Inference Engine capabilities within an application, assist developers in executing specific tasks such as loading a model, running inference, querying specific device capabilities and etc. After installation of Intel® Distribution of OpenVINO™ toolkit, С, C++ and Python* sample applications are available in the following directories, respectively: -* `/inference_engine/samples/c` -* `/inference_engine/samples/cpp` -* `/inference_engine/samples/python` +* `/samples/c` +* `/samples/cpp` +* `/samples/python` Inference Engine sample applications include the following: @@ -64,7 +64,7 @@ The officially supported Linux* build environment is the following: > **NOTE**: For building samples from the open-source version of OpenVINO™ toolkit, see the [build instructions on GitHub](https://github.com/openvinotoolkit/openvino/wiki/BuildingCode). -To build the C or C++ sample applications for Linux, go to the `/inference_engine/samples/c` or `/inference_engine/samples/cpp` directory, respectively, and run the `build_samples.sh` script: +To build the C or C++ sample applications for Linux, go to the `/samples/c` or `/samples/cpp` directory, respectively, and run the `build_samples.sh` script: ```sh build_samples.sh ``` @@ -91,11 +91,11 @@ cd build 3. Run CMake to generate the Make files for release or debug configuration. For example, for C++ samples: - For release configuration: ```sh - cmake -DCMAKE_BUILD_TYPE=Release /inference_engine/samples/cpp + cmake -DCMAKE_BUILD_TYPE=Release /samples/cpp ``` - For debug configuration: ```sh - cmake -DCMAKE_BUILD_TYPE=Debug /inference_engine/samples/cpp + cmake -DCMAKE_BUILD_TYPE=Debug /samples/cpp ``` 4. Run `make` to build the samples: ```sh @@ -109,12 +109,12 @@ for the debug configuration — in `/intel64/Debug/`. The recommended Windows* build environment is the following: * Microsoft Windows* 10 -* Microsoft Visual Studio* 2017, or 2019 +* Microsoft Visual Studio* 2017, or 2019. Make sure that C++ CMake tools for Windows is [enabled](https://docs.microsoft.com/en-us/cpp/build/cmake-projects-in-visual-studio?view=msvc-160#:~:text=The%20Visual%20C%2B%2B%20Tools%20for,Visual%20Studio%20generators%20are%20supported). * CMake* version 3.10 or higher > **NOTE**: If you want to use Microsoft Visual Studio 2019, you are required to install CMake 3.14. -To build the C or C++ sample applications on Windows, go to the `\inference_engine\samples\c` or `\inference_engine\samples\cpp` directory, respectively, and run the `build_samples_msvc.bat` batch file: +To build the C or C++ sample applications on Windows, go to the `\samples\c` or `\samples\cpp` directory, respectively, and run the `build_samples_msvc.bat` batch file: ```sh build_samples_msvc.bat ``` @@ -123,7 +123,7 @@ By default, the script automatically detects the highest Microsoft Visual Studio a solution for a sample code. Optionally, you can also specify the preferred Microsoft Visual Studio version to be used by the script. Supported versions are `VS2017` and `VS2019`. For example, to build the C++ samples using the Microsoft Visual Studio 2017, use the following command: ```sh -\inference_engine\samples\cpp\build_samples_msvc.bat VS2017 +\samples\cpp\build_samples_msvc.bat VS2017 ``` Once the build is completed, you can find sample binaries in the following folders: @@ -144,7 +144,7 @@ The officially supported macOS* build environment is the following: > **NOTE**: For building samples from the open-source version of OpenVINO™ toolkit, see the [build instructions on GitHub](https://github.com/openvinotoolkit/openvino/wiki/BuildingCode). -To build the C or C++ sample applications for macOS, go to the `/inference_engine/samples/c` or `/inference_engine/samples/cpp` directory, respectively, and run the `build_samples.sh` script: +To build the C or C++ sample applications for macOS, go to the `/samples/c` or `/samples/cpp` directory, respectively, and run the `build_samples.sh` script: ```sh build_samples.sh ``` @@ -177,11 +177,11 @@ cd build 3. Run CMake to generate the Make files for release or debug configuration. For example, for C++ samples: - For release configuration: ```sh - cmake -DCMAKE_BUILD_TYPE=Release /inference_engine/samples/cpp + cmake -DCMAKE_BUILD_TYPE=Release /samples/cpp ``` - For debug configuration: ```sh - cmake -DCMAKE_BUILD_TYPE=Debug /inference_engine/samples/cpp + cmake -DCMAKE_BUILD_TYPE=Debug /samples/cpp ``` 4. Run `make` to build the samples: ```sh @@ -199,7 +199,7 @@ Before running compiled binary files, make sure your application can find the Inference Engine and OpenCV libraries. Run the `setupvars` script to set all necessary environment variables: ```sh -source /bin/setupvars.sh +source /setupvars.sh ``` **(Optional)**: The OpenVINO environment variables are removed when you close the @@ -212,7 +212,7 @@ vi /.bashrc 2. Add this line to the end of the file: ```sh -source /opt/intel/openvino_2021/bin/setupvars.sh +source /opt/intel/openvino_2022/setupvars.sh ``` 3. Save and close the file: press the **Esc** key, type `:wq` and press the **Enter** key. @@ -228,7 +228,7 @@ Before running compiled binary files, make sure your application can find the Inference Engine and OpenCV libraries. Use the `setupvars` script, which sets all necessary environment variables: ```sh -\bin\setupvars.bat +\setupvars.bat ``` To debug or run the samples on Windows in Microsoft Visual Studio, make sure you @@ -240,7 +240,7 @@ For example, for the **Debug** configuration, go to the project's variable in the **Environment** field to the following: ```sh -PATH=\deployment_tools\inference_engine\bin\intel64\Debug;\opencv\bin;%PATH% +PATH=\runtime\bin;\opencv\bin;%PATH% ``` where `` is the directory in which the OpenVINO toolkit is installed. diff --git a/docs/IE_DG/ShapeInference.md b/docs/IE_DG/ShapeInference.md index dcc4b5c3f8837b..a265f2e9703e2e 100644 --- a/docs/IE_DG/ShapeInference.md +++ b/docs/IE_DG/ShapeInference.md @@ -33,7 +33,7 @@ If a model has a hard-coded batch dimension, use `InferenceEngine::CNNNetwork::s Inference Engine takes three kinds of a model description as an input, which are converted into an `InferenceEngine::CNNNetwork` object: 1. [Intermediate Representation (IR)](../MO_DG/IR_and_opsets.md) through `InferenceEngine::Core::ReadNetwork` -2. [ONNX model](../IE_DG/OnnxImporterTutorial.md) through `InferenceEngine::Core::ReadNetwork` +2. [ONNX model](../IE_DG/ONNX_Support.md) through `InferenceEngine::Core::ReadNetwork` 3. [nGraph function](../nGraph_DG/nGraph_dg.md) through the constructor of `InferenceEngine::CNNNetwork` `InferenceEngine::CNNNetwork` keeps an `ngraph::Function` object with the model description internally. diff --git a/docs/IE_DG/Tools_Overview.md b/docs/IE_DG/Tools_Overview.md index f0741105387617..e3acfa7fb483b7 100644 --- a/docs/IE_DG/Tools_Overview.md +++ b/docs/IE_DG/Tools_Overview.md @@ -6,11 +6,11 @@ The OpenVINO™ toolkit installation includes the following tools: |Tool | Location in the Installation Directory| |-----------------------------------------------------------------------------|---------------------------------------| -|[Accuracy Checker Tool](@ref omz_tools_accuracy_checker) | `/deployment_tools/tools/open_model_zoo/tools/accuracy_checker`| -|[Post-Training Optimization Tool](@ref pot_README) | `/deployment_tools/tools/post_training_optimization_toolkit`| -|[Model Downloader](@ref omz_tools_downloader) | `/deployment_tools/tools/model_downloader`| -|[Cross Check Tool](../../inference-engine/tools/cross_check_tool/README.md) | `/deployment_tools/tools/cross_check_tool`| -|[Compile Tool](../../inference-engine/tools/compile_tool/README.md) | `/deployment_tools/inference_engine/lib/intel64/`| +|[Accuracy Checker Tool](@ref omz_tools_accuracy_checker) | `/tools/accuracy_checker`| +|[Post-Training Optimization Tool](@ref pot_README) | `/tools/post_training_optimization_toolkit`| +|[Model Downloader](@ref omz_tools_downloader) | `/extras/open_model_zoo/tools/downloader`| +|[Cross Check Tool](../../tools/cross_check_tool/README.md) | `/tools/cross_check_tool`| +|[Compile Tool](../../tools/compile_tool/README.md) | `/tools/compile_tool`| ## See Also diff --git a/docs/IE_DG/inference_engine_intro.md b/docs/IE_DG/inference_engine_intro.md index 89d80654fe4480..3ad44b99144736 100644 --- a/docs/IE_DG/inference_engine_intro.md +++ b/docs/IE_DG/inference_engine_intro.md @@ -84,9 +84,9 @@ Make sure those libraries are in your computer's path or in the place you pointe * Windows: `PATH` * macOS: `DYLD_LIBRARY_PATH` -On Linux and macOS, use the script `bin/setupvars.sh` to set the environment variables. +On Linux and macOS, use the script `setupvars.sh` to set the environment variables. -On Windows, run the `bin\setupvars.bat` batch file to set the environment variables. +On Windows, run the `setupvars.bat` batch file to set the environment variables. To learn more about supported devices and corresponding plugins, see the [Supported Devices](supported_plugins/Supported_Devices.md) chapter. diff --git a/docs/IE_DG/supported_plugins/CPU.md b/docs/IE_DG/supported_plugins/CPU.md index 8f75a792adeeb2..12b005099ba092 100644 --- a/docs/IE_DG/supported_plugins/CPU.md +++ b/docs/IE_DG/supported_plugins/CPU.md @@ -105,17 +105,18 @@ These are general options, also supported by other plugins: | Parameter name | Parameter values | Default | Description | | :--- | :--- | :--- | :----------------------------------------------------------------------------------------------------------------------------| -| KEY_EXCLUSIVE_ASYNC_REQUESTS | YES/NO | NO | Forces async requests (also from different executable networks) to execute serially. This prevents potential oversubscription| -| KEY_PERF_COUNT | YES/NO | NO | Enables gathering performance counters | +| `KEY_EXCLUSIVE_ASYNC_REQUESTS` | `YES`/`NO` | `NO` | Forces async requests (also from different executable networks) to execute serially. This prevents potential oversubscription| +| `KEY_PERF_COUNT` | `YES`/`NO` | `NO` | Enables gathering performance counters | CPU-specific settings: -| Parameter name | Parameter values | Default | Description | -| :--- | :--- | :--- | :--- | -| KEY_CPU_THREADS_NUM | positive integer values| 0 | Specifies the number of threads that CPU plugin should use for inference. Zero (default) means using all (logical) cores| -| KEY_CPU_BIND_THREAD | YES/NUMA/NO | YES | Binds inference threads to CPU cores. 'YES' (default) binding option maps threads to cores - this works best for static/synthetic scenarios like benchmarks. The 'NUMA' binding is more relaxed, binding inference threads only to NUMA nodes, leaving further scheduling to specific cores to the OS. This option might perform better in the real-life/contended scenarios. Note that for the latency-oriented cases (number of the streams is less or equal to the number of NUMA nodes, see below) both YES and NUMA options limit number of inference threads to the number of hardware cores (ignoring hyper-threading) on the multi-socket machines. | -| KEY_CPU_THROUGHPUT_STREAMS | KEY_CPU_THROUGHPUT_NUMA, KEY_CPU_THROUGHPUT_AUTO, or positive integer values| 1 | Specifies number of CPU "execution" streams for the throughput mode. Upper bound for the number of inference requests that can be executed simultaneously. All available CPU cores are evenly distributed between the streams. The default value is 1, which implies latency-oriented behavior for single NUMA-node machine, with all available cores processing requests one by one. On the multi-socket (multiple NUMA nodes) machine, the best latency numbers usually achieved with a number of streams matching the number of NUMA-nodes.
KEY_CPU_THROUGHPUT_NUMA creates as many streams as needed to accommodate NUMA and avoid associated penalties.
KEY_CPU_THROUGHPUT_AUTO creates bare minimum of streams to improve the performance; this is the most portable option if you don't know how many cores your target machine has (and what would be the optimal number of streams). Note that your application should provide enough parallel slack (for example, run many inference requests) to leverage the throughput mode.
Non-negative integer value creates the requested number of streams. If a number of streams is 0, no internal streams are created and user threads are interpreted as stream master threads.| -| KEY_ENFORCE_BF16 | YES/NO| YES | The name for setting to execute in bfloat16 precision whenever it is possible. This option lets plugin know to downscale the precision where it sees performance benefits from bfloat16 execution. Such option does not guarantee accuracy of the network, you need to verify the accuracy in this mode separately, based on performance and accuracy results. It should be your decision whether to use this option or not. | + +| Parameter name | Parameter values | Default | Description | +| :--- | :--- | :--- |:-----------------------------------------------------------------------------| +| `KEY_CPU_THREADS_NUM` | `positive integer values`| `0` | Specifies the number of threads that CPU plugin should use for inference. Zero (default) means using all (logical) cores| +| `KEY_CPU_BIND_THREAD` | `YES`/`NUMA`/`NO` | `YES` | Binds inference threads to CPU cores. 'YES' (default) binding option maps threads to cores - this works best for static/synthetic scenarios like benchmarks. The 'NUMA' binding is more relaxed, binding inference threads only to NUMA nodes, leaving further scheduling to specific cores to the OS. This option might perform better in the real-life/contended scenarios. Note that for the latency-oriented cases (number of the streams is less or equal to the number of NUMA nodes, see below) both YES and NUMA options limit number of inference threads to the number of hardware cores (ignoring hyper-threading) on the multi-socket machines. | +| `KEY_CPU_THROUGHPUT_STREAMS` | `KEY_CPU_THROUGHPUT_NUMA`, `KEY_CPU_THROUGHPUT_AUTO`, or `positive integer values`| `1` | Specifies number of CPU "execution" streams for the throughput mode. Upper bound for the number of inference requests that can be executed simultaneously. All available CPU cores are evenly distributed between the streams. The default value is 1, which implies latency-oriented behavior for single NUMA-node machine, with all available cores processing requests one by one. On the multi-socket (multiple NUMA nodes) machine, the best latency numbers usually achieved with a number of streams matching the number of NUMA-nodes.
`KEY_CPU_THROUGHPUT_NUMA` creates as many streams as needed to accommodate NUMA and avoid associated penalties.
`KEY_CPU_THROUGHPUT_AUTO` creates bare minimum of streams to improve the performance; this is the most portable option if you don't know how many cores your target machine has (and what would be the optimal number of streams). Note that your application should provide enough parallel slack (for example, run many inference requests) to leverage the throughput mode.
Non-negative integer value creates the requested number of streams. If a number of streams is 0, no internal streams are created and user threads are interpreted as stream master threads.| +| `KEY_ENFORCE_BF16` | `YES`/`NO`| `YES` | The name for setting to execute in bfloat16 precision whenever it is possible. This option lets plugin know to downscale the precision where it sees performance benefits from bfloat16 execution. Such option does not guarantee accuracy of the network, you need to verify the accuracy in this mode separately, based on performance and accuracy results. It should be your decision whether to use this option or not. | > **NOTE**: To disable all internal threading, use the following set of configuration parameters: `KEY_CPU_THROUGHPUT_STREAMS=0`, `KEY_CPU_THREADS_NUM=1`, `KEY_CPU_BIND_THREAD=NO`. diff --git a/docs/IE_DG/supported_plugins/GPU.md b/docs/IE_DG/supported_plugins/GPU.md index cc12be98a121e1..ab84dfbac06a9f 100644 --- a/docs/IE_DG/supported_plugins/GPU.md +++ b/docs/IE_DG/supported_plugins/GPU.md @@ -99,23 +99,24 @@ The plugin supports the configuration parameters listed below. All parameters must be set before calling InferenceEngine::Core::LoadNetwork() in order to take effect. When specifying key values as raw strings (that is, when using Python API), omit the `KEY_` prefix. + | Parameter Name | Parameter Values | Default | Description | |---------------------|-----------------------------|-----------------|-----------------------------------------------------------| | `KEY_CACHE_DIR` | `""` | `""` | Specifies a directory where compiled OCL binaries can be cached. First model loading generates the cache, and all subsequent LoadNetwork calls use precompiled kernels which significantly improves load time. If empty - caching is disabled | | `KEY_PERF_COUNT` | `YES` / `NO` | `NO` | Collect performance counters during inference | | `KEY_CONFIG_FILE` | `" [ ...]"` | `""` | Load custom layer configuration files | -| `KEY_GPU_PLUGIN_PRIORITY` | `<0-3>` | `0` | OpenCL queue priority (before usage, make sure your OpenCL driver supports appropriate extension)
Higher value means higher priority for OpenCL queue. 0 disables the setting. | -| `KEY_GPU_PLUGIN_THROTTLE` | `<0-3>` | `0` | OpenCL queue throttling (before usage, make sure your OpenCL driver supports appropriate extension)
Lower value means lower driver thread priority and longer sleep time for it. 0 disables the setting. | -| `KEY_CLDNN_ENABLE_FP16_FOR_QUANTIZED_MODELS` | `YES` / `NO` | `YES` | Allows using FP16+INT8 mixed precision mode, so non-quantized parts of a model will be executed in FP16 precision for FP16 IR. Does not affect quantized FP32 IRs | -| `KEY_GPU_NV12_TWO_INPUTS` | `YES` / `NO` | `NO` | Controls preprocessing logic for nv12 input. If it's set to YES, then device graph will expect that user will set biplanar nv12 blob as input wich will be directly passed to device execution graph. Otherwise, preprocessing via GAPI is used to convert NV12->BGR, thus GPU graph have to expect single input | -| `KEY_GPU_THROUGHPUT_STREAMS` | `KEY_GPU_THROUGHPUT_AUTO`, or positive integer| 1 | Specifies a number of GPU "execution" streams for the throughput mode (upper bound for a number of inference requests that can be executed simultaneously).
This option is can be used to decrease GPU stall time by providing more effective load from several streams. Increasing the number of streams usually is more effective for smaller topologies or smaller input sizes. Note that your application should provide enough parallel slack (e.g. running many inference requests) to leverage full GPU bandwidth. Additional streams consume several times more GPU memory, so make sure the system has enough memory available to suit parallel stream execution. Multiple streams might also put additional load on CPU. If CPU load increases, it can be regulated by setting an appropriate `KEY_GPU_PLUGIN_THROTTLE` option value (see above). If your target system has relatively weak CPU, keep throttling low.
The default value is 1, which implies latency-oriented behavior.
`KEY_GPU_THROUGHPUT_AUTO` creates bare minimum of streams to improve the performance; this is the most portable option if you are not sure how many resources your target machine has (and what would be the optimal number of streams).
A positive integer value creates the requested number of streams. | -| `KEY_EXCLUSIVE_ASYNC_REQUESTS` | `YES` / `NO` | `NO` | Forces async requests (also from different executable networks) to execute serially.| -| `KEY_GPU_MAX_NUM_THREADS` | `integer value` | `maximum # of HW threads available in host environment` | Specifies the number of CPU threads that can be used for GPU engine, e.g, JIT compilation of GPU kernels or cpu kernel processing within GPU plugin. The default value is set as the number of maximum available threads in host environment to minimize the time for LoadNetwork, where the GPU kernel build time occupies a large portion. Note that if the specified value is larger than the maximum available # of threads or less than zero, it is set as maximum available # of threads. It can be specified with a smaller number than the available HW threads according to the usage scenario, e.g., when the user wants to assign more CPU threads while GPU plugin is running. Note that setting this value with lower number will affect not only the network loading time but also the cpu layers of GPU networks that are optimized with multi-threading. | -| `KEY_GPU_ENABLE_LOOP_UNROLLING` | `YES` / `NO` | `YES` | Enables recurrent layers such as TensorIterator or Loop with fixed iteration count to be unrolled. It is turned on by default. Turning this key on will achieve better inference performance for loops with not too many iteration counts (less than 16, as a rule of thumb). Turning this key off will achieve better performance for both graph loading time and inference time with many iteration counts (greater than 16). Note that turning this key on will increase the graph loading time in proportion to the iteration counts. Thus, this key should be turned off if graph loading time is considered to be most important target to optimize. | -| `KEY_CLDNN_PLUGIN_PRIORITY` | `<0-3>` | `0` | OpenCL queue priority (before usage, make sure your OpenCL driver supports appropriate extension)
Higher value means higher priority for OpenCL queue. 0 disables the setting. **Deprecated**. Please use KEY_GPU_PLUGIN_PRIORITY | -| `KEY_CLDNN_PLUGIN_THROTTLE` | `<0-3>` | `0` | OpenCL queue throttling (before usage, make sure your OpenCL driver supports appropriate extension)
Lower value means lower driver thread priority and longer sleep time for it. 0 disables the setting. **Deprecated**. Please use KEY_GPU_PLUGIN_THROTTLE | -| `KEY_CLDNN_GRAPH_DUMPS_DIR` | `""` | `""` | clDNN graph optimizer stages dump output directory (in GraphViz format) **Deprecated**. Will be removed in the next release | -| `KEY_CLDNN_SOURCES_DUMPS_DIR` | `""` | `""` | Final optimized clDNN OpenCL sources dump output directory. **Deprecated**. Will be removed in the next release | +| `KEY_GPU_PLUGIN_`
`PRIORITY` | `<0-3>` | `0` | OpenCL queue priority (before usage, make sure your OpenCL driver supports appropriate extension)
Higher value means higher priority for OpenCL queue. 0 disables the setting. | +| `KEY_GPU_PLUGIN_`
`THROTTLE` | `<0-3>` | `0` | OpenCL queue throttling (before usage, make sure your OpenCL driver supports appropriate extension)
Lower value means lower driver thread priority and longer sleep time for it. 0 disables the setting. | +| `KEY_CLDNN_ENABLE_`
`FP16_FOR_QUANTIZED_`
`MODELS` | `YES` / `NO` | `YES` | Allows using FP16+INT8 mixed precision mode, so non-quantized parts of a model will be executed in FP16 precision for FP16 IR. Does not affect quantized FP32 IRs | +| `KEY_GPU_NV12_`
`TWO_INPUTS` | `YES` / `NO` | `NO` | Controls preprocessing logic for nv12 input. If it's set to YES, then device graph will expect that user will set biplanar nv12 blob as input wich will be directly passed to device execution graph. Otherwise, preprocessing via GAPI is used to convert NV12->BGR, thus GPU graph have to expect single input | +| `KEY_GPU_THROUGHPUT_`
`STREAMS` | `KEY_GPU_THROUGHPUT_AUTO`, or positive integer| 1 | Specifies a number of GPU "execution" streams for the throughput mode (upper bound for a number of inference requests that can be executed simultaneously).
This option is can be used to decrease GPU stall time by providing more effective load from several streams. Increasing the number of streams usually is more effective for smaller topologies or smaller input sizes. Note that your application should provide enough parallel slack (e.g. running many inference requests) to leverage full GPU bandwidth. Additional streams consume several times more GPU memory, so make sure the system has enough memory available to suit parallel stream execution. Multiple streams might also put additional load on CPU. If CPU load increases, it can be regulated by setting an appropriate `KEY_GPU_PLUGIN_THROTTLE` option value (see above). If your target system has relatively weak CPU, keep throttling low.
The default value is 1, which implies latency-oriented behavior.
`KEY_GPU_THROUGHPUT_AUTO` creates bare minimum of streams to improve the performance; this is the most portable option if you are not sure how many resources your target machine has (and what would be the optimal number of streams).
A positive integer value creates the requested number of streams. | +| `KEY_EXCLUSIVE_ASYNC_`
`REQUESTS` | `YES` / `NO` | `NO` | Forces async requests (also from different executable networks) to execute serially.| +| `KEY_GPU_MAX_NUM_`
`THREADS` | `integer value` | `maximum # of HW threads available in host environment` | Specifies the number of CPU threads that can be used for GPU engine, e.g, JIT compilation of GPU kernels or cpu kernel processing within GPU plugin. The default value is set as the number of maximum available threads in host environment to minimize the time for LoadNetwork, where the GPU kernel build time occupies a large portion. Note that if the specified value is larger than the maximum available # of threads or less than zero, it is set as maximum available # of threads. It can be specified with a smaller number than the available HW threads according to the usage scenario, e.g., when the user wants to assign more CPU threads while GPU plugin is running. Note that setting this value with lower number will affect not only the network loading time but also the cpu layers of GPU networks that are optimized with multi-threading. | +| `KEY_GPU_ENABLE_`
`LOOP_UNROLLING` | `YES` / `NO` | `YES` | Enables recurrent layers such as TensorIterator or Loop with fixed iteration count to be unrolled. It is turned on by default. Turning this key on will achieve better inference performance for loops with not too many iteration counts (less than 16, as a rule of thumb). Turning this key off will achieve better performance for both graph loading time and inference time with many iteration counts (greater than 16). Note that turning this key on will increase the graph loading time in proportion to the iteration counts. Thus, this key should be turned off if graph loading time is considered to be most important target to optimize. | +| `KEY_CLDNN_PLUGIN_`
`PRIORITY` | `<0-3>` | `0` | OpenCL queue priority (before usage, make sure your OpenCL driver supports appropriate extension)
Higher value means higher priority for OpenCL queue. 0 disables the setting. **Deprecated**. Please use KEY_GPU_PLUGIN_PRIORITY | +| `KEY_CLDNN_PLUGIN_`
`THROTTLE` | `<0-3>` | `0` | OpenCL queue throttling (before usage, make sure your OpenCL driver supports appropriate extension)
Lower value means lower driver thread priority and longer sleep time for it. 0 disables the setting. **Deprecated**. Please use KEY_GPU_PLUGIN_THROTTLE | +| `KEY_CLDNN_GRAPH_`
`DUMPS_DIR` | `""` | `""` | clDNN graph optimizer stages dump output directory (in GraphViz format) **Deprecated**. Will be removed in the next release | +| `KEY_CLDNN_SOURCES_`
`DUMPS_DIR` | `""` | `""` | Final optimized clDNN OpenCL sources dump output directory. **Deprecated**. Will be removed in the next release | | `KEY_DUMP_KERNELS` | `YES` / `NO` | `NO` | Dump the final kernels used for custom layers. **Deprecated**. Will be removed in the next release | | `KEY_TUNING_MODE` | `TUNING_DISABLED`
`TUNING_CREATE`
`TUNING_USE_EXISTING` | `TUNING_DISABLED` | Disable inference kernel tuning
Create tuning file (expect much longer runtime)
Use an existing tuning file. **Deprecated**. Will be removed in the next release | | `KEY_TUNING_FILE` | `""` | `""` | Tuning file to create / use. **Deprecated**. Will be removed in the next release | diff --git a/docs/IE_DG/supported_plugins/HETERO.md b/docs/IE_DG/supported_plugins/HETERO.md index f2b7521457e294..3d965e109f24e7 100644 --- a/docs/IE_DG/supported_plugins/HETERO.md +++ b/docs/IE_DG/supported_plugins/HETERO.md @@ -23,7 +23,7 @@ If transmitting data from one part of a network to another part in heterogeneous In this case, you can define heaviest part manually and set the affinity to avoid sending data back and forth many times during one inference. ## Annotation of Layers per Device and Default Fallback Policy -Default fallback policy decides which layer goes to which device automatically according to the support in dedicated plugins (FPGA, GPU, CPU, MYRIAD). +Default fallback policy decides which layer goes to which device automatically according to the support in dedicated plugins (GPU, CPU, MYRIAD). Another way to annotate a network is to set affinity manually using ngraph::Node::get_rt_info with key `"affinity"`: @@ -46,25 +46,16 @@ If you rely on the default affinity distribution, you can avoid calling In During loading of the network to heterogeneous plugin, network is divided to separate parts and loaded to dedicated plugins. Intermediate blobs between these sub graphs are allocated automatically in the most efficient way. -## Execution Precision -Precision for inference in heterogeneous plugin is defined by -* Precision of IR. -* Ability of final plugins to execute in precision defined in IR - -Examples: -* If you want to execute GPU with CPU fallback with FP16 on GPU, you need to use only FP16 IR. -* If you want to execute on FPGA with CPU fallback, you can use any precision for IR. The execution on FPGA is defined by bitstream, the execution on CPU happens in FP32. - Samples can be used with the following command: ```sh -./object_detection_sample_ssd -m /ModelSSD.xml -i /picture.jpg -d HETERO:FPGA,CPU +./object_detection_sample_ssd -m /ModelSSD.xml -i /picture.jpg -d HETERO:GPU,CPU ``` where: - `HETERO` stands for heterogeneous plugin -- `FPGA,CPU` points to fallback policy with priority on FPGA and fallback to CPU +- `GPU,CPU` points to fallback policy with priority on GPU and fallback to CPU -You can point more than two devices: `-d HETERO:FPGA,GPU,CPU` +You can point more than two devices: `-d HETERO:GPU,GPU,CPU` ## Analyzing Heterogeneous Execution After enabling of KEY_HETERO_DUMP_GRAPH_DOT config key, you can dump GraphViz* `.dot` files with annotations of devices per layer. diff --git a/docs/IE_DG/supported_plugins/MULTI.md b/docs/IE_DG/supported_plugins/MULTI.md index a3f7dc2afc9a89..cebc03ba135fdc 100644 --- a/docs/IE_DG/supported_plugins/MULTI.md +++ b/docs/IE_DG/supported_plugins/MULTI.md @@ -96,10 +96,8 @@ Notice that you can use the FP16 IR to work with multi-device (as CPU automatica Also notice that no demos are (yet) fully optimized for the multi-device, by means of supporting the OPTIMAL_NUMBER_OF_INFER_REQUESTS metric, using the GPU streams/throttling, and so on. ## Video: MULTI Plugin -[![](https://img.youtube.com/vi/xbORYFEmrqU/0.jpg)](https://www.youtube.com/watch?v=xbORYFEmrqU) -\htmlonly + -\endhtmlonly ## See Also * [Supported Devices](Supported_Devices.md) diff --git a/docs/Legal_Information.md b/docs/Legal_Information.md index 2f3526f2902677..2936ae2a949665 100644 --- a/docs/Legal_Information.md +++ b/docs/Legal_Information.md @@ -1,22 +1,20 @@ # Legal Information {#openvino_docs_Legal_Information} -This software and the related documents are Intel copyrighted materials, and your use of them is governed by the express license (the “License”) under which they were provided to you. No license (express or implied, by estoppel or otherwise) to any intellectual property rights is granted by this document. Unless the License provides otherwise, you may not use, modify, copy, publish, distribute, disclose or transmit this software or the related documents without Intel's prior written permission. This software and the related documents are provided as is, with no express or implied warranties, other than those that are expressly stated in the License. Intel disclaims all express and implied warranties, including without limitation, the implied warranties of merchantability, fitness for a particular purpose, and non-infringement, as well as any warranty arising from course of performance, course of dealing, or usage in trade. - -This document contains information on products, services and/or processes in development. All information provided here is subject to change without notice. Contact your Intel representative to obtain the latest forecast, schedule, specifications and roadmaps. The products and services described may contain defects or errors known as errata which may cause deviations from published specifications. Current characterized errata are available on request. Copies of documents which have an order number and are referenced in this document may be obtained by calling 1-800-548-4725 or by visiting [www.intel.com/design/literature.htm](https://www.intel.com/design/literature.htm). - Performance varies by use, configuration and other factors. Learn more at [www.intel.com/PerformanceIndex](https://www.intel.com/PerformanceIndex). - -Performance results are based on testing as of dates shown in configurations and may not reflect all publicly available updates. See backup for configuration details. No product or component can be absolutely secure. - -Your costs and results may vary. - + +Performance results are based on testing as of dates shown in configurations and may not reflect all publicly available updates. See backup for configuration details. No product or component can be absolutely secure. + +Your costs and results may vary. + Intel technologies may require enabled hardware, software or service activation. -© Intel Corporation. Intel, the Intel logo, and other Intel marks are trademarks of Intel Corporation or its subsidiaries. \*Other names and brands may be claimed as the property of others. +OpenCL and the OpenCL logo are trademarks of Apple Inc. used by permission by Khronos. +© Intel Corporation. Intel, the Intel logo, and other Intel marks are trademarks of Intel Corporation or its subsidiaries. Other names and brands may be claimed as the property of others. + ## OpenVINO™ Logo To build equity around the project, the OpenVINO logo was created for both Intel and community usage. The logo may only be used to represent the OpenVINO toolkit and offerings built using the OpenVINO toolkit. - + ## Logo Usage Guidelines The OpenVINO logo must be used in connection with truthful, non-misleading references to the OpenVINO toolkit, and for no other purpose. -Modification of the logo or use of any separate element(s) of the logo alone is not allowed. +Modification of the logo or use of any separate element(s) of the logo alone is not allowed. \ No newline at end of file diff --git a/docs/MO_DG/Deep_Learning_Model_Optimizer_DevGuide.md b/docs/MO_DG/Deep_Learning_Model_Optimizer_DevGuide.md index 2aed66ba719934..378d559f895805 100644 --- a/docs/MO_DG/Deep_Learning_Model_Optimizer_DevGuide.md +++ b/docs/MO_DG/Deep_Learning_Model_Optimizer_DevGuide.md @@ -1,136 +1,54 @@ # Model Optimizer Developer Guide {#openvino_docs_MO_DG_Deep_Learning_Model_Optimizer_DevGuide} +## Introduction + Model Optimizer is a cross-platform command-line tool that facilitates the transition between the training and deployment environment, performs static model analysis, and adjusts deep learning models for optimal execution on end-point target devices. -Model Optimizer process assumes you have a network model trained using a supported deep learning framework. The scheme below illustrates the typical workflow for deploying a trained deep learning model: +Model Optimizer process assumes you have a network model trained using supported deep learning frameworks: Caffe*, TensorFlow*, Kaldi*, MXNet* or converted to the ONNX* format. Model Optimizer produces an Intermediate Representation (IR) of the network, which can be inferred with the [Inference Engine](../IE_DG/Deep_Learning_Inference_Engine_DevGuide.md). + +> **NOTE**: Model Optimizer does not infer models. Model Optimizer is an offline tool that runs before the inference takes place. + +The scheme below illustrates the typical workflow for deploying a trained deep learning model: ![](img/workflow_steps.png) -Model Optimizer produces an Intermediate Representation (IR) of the network, which can be read, loaded, and inferred with the Inference Engine. The Inference Engine API offers a unified API across a number of supported Intel® platforms. The Intermediate Representation is a pair of files describing the model: +The IR is a pair of files describing the model: * .xml - Describes the network topology * .bin - Contains the weights and biases binary data. -> **TIP**: You also can work with the Model Optimizer inside the OpenVINO™ [Deep Learning Workbench](@ref workbench_docs_Workbench_DG_Introduction) (DL Workbench). -> [DL Workbench](@ref workbench_docs_Workbench_DG_Introduction) is a platform built upon OpenVINO™ and provides a web-based graphical environment that enables you to optimize, fine-tune, analyze, visualize, and compare -> performance of deep learning models on various Intel® architecture -> configurations. In the DL Workbench, you can use most of OpenVINO™ toolkit components. ->
-> Proceed to an [easy installation from Docker](@ref workbench_docs_Workbench_DG_Install_from_Docker_Hub) to get started. - -## What's New in the Model Optimizer in this Release? - -* Common changes: - * Implemented several optimization transformations to replace sub-graphs of operations with HSwish, Mish, Swish and SoftPlus operations. - * Model Optimizer generates IR keeping shape-calculating sub-graphs **by default**. Previously, this behavior was triggered if the "--keep_shape_ops" command line parameter was provided. The key is ignored in this release and will be deleted in the next release. To trigger the legacy behavior to generate an IR for a fixed input shape (folding ShapeOf operations and shape-calculating sub-graphs to Constant), use the "--static_shape" command line parameter. Changing model input shape using the Inference Engine API in runtime may fail for such an IR. - * Fixed Model Optimizer conversion issues resulted in non-reshapeable IR using the Inference Engine reshape API. - * Enabled transformations to fix non-reshapeable patterns in the original networks: - * Hardcoded Reshape - * In Reshape(2D)->MatMul pattern - * Reshape->Transpose->Reshape when the pattern can be fused to the ShuffleChannels or DepthToSpace operation - * Hardcoded Interpolate - * In Interpolate->Concat pattern - * Added a dedicated requirements file for TensorFlow 2.X as well as the dedicated install prerequisites scripts. - * Replaced the SparseToDense operation with ScatterNDUpdate-4. -* ONNX*: - * Enabled an ability to specify the model output **tensor** name using the "--output" command line parameter. - * Added support for the following operations: - * Acosh - * Asinh - * Atanh - * DepthToSpace-11, 13 - * DequantizeLinear-10 (zero_point must be constant) - * HardSigmoid-1,6 - * QuantizeLinear-10 (zero_point must be constant) - * ReduceL1-11, 13 - * ReduceL2-11, 13 - * Resize-11, 13 (except mode="nearest" with 5D+ input, mode="tf_crop_and_resize", and attributes exclude_outside and extrapolation_value with non-zero values) - * ScatterND-11, 13 - * SpaceToDepth-11, 13 -* TensorFlow*: - * Added support for the following operations: - * Acosh - * Asinh - * Atanh - * CTCLoss - * EuclideanNorm - * ExtractImagePatches - * FloorDiv -* MXNet*: - * Added support for the following operations: - * Acosh - * Asinh - * Atanh -* Kaldi*: - * Fixed bug with ParallelComponent support. Now it is fully supported with no restrictions. - -> **NOTE:** -> [Intel® System Studio](https://software.intel.com/en-us/system-studio) is an all-in-one, cross-platform tool suite, purpose-built to simplify system bring-up and improve system and IoT device application performance on Intel® platforms. If you are using the Intel® Distribution of OpenVINO™ with Intel® System Studio, go to [Get Started with Intel® System Studio](https://software.intel.com/en-us/articles/get-started-with-openvino-and-intel-system-studio-2019). - -## Table of Contents - -* [Preparing and Optimizing your Trained Model with Model Optimizer](prepare_model/Prepare_Trained_Model.md) - * [Configuring Model Optimizer](prepare_model/Config_Model_Optimizer.md) - * [Converting a Model to Intermediate Representation (IR)](prepare_model/convert_model/Converting_Model.md) - * [Converting a Model Using General Conversion Parameters](prepare_model/convert_model/Converting_Model_General.md) - * [Converting Your Caffe* Model](prepare_model/convert_model/Convert_Model_From_Caffe.md) - * [Converting Your TensorFlow* Model](prepare_model/convert_model/Convert_Model_From_TensorFlow.md) - * [Converting BERT from TensorFlow](prepare_model/convert_model/tf_specific/Convert_BERT_From_Tensorflow.md) - * [Converting GNMT from TensorFlow](prepare_model/convert_model/tf_specific/Convert_GNMT_From_Tensorflow.md) - * [Converting YOLO from DarkNet to TensorFlow and then to IR](prepare_model/convert_model/tf_specific/Convert_YOLO_From_Tensorflow.md) - * [Converting Wide and Deep Models from TensorFlow](prepare_model/convert_model/tf_specific/Convert_WideAndDeep_Family_Models.md) - * [Converting FaceNet from TensorFlow](prepare_model/convert_model/tf_specific/Convert_FaceNet_From_Tensorflow.md) - * [Converting DeepSpeech from TensorFlow](prepare_model/convert_model/tf_specific/Convert_DeepSpeech_From_Tensorflow.md) - * [Converting Language Model on One Billion Word Benchmark from TensorFlow](prepare_model/convert_model/tf_specific/Convert_lm_1b_From_Tensorflow.md) - * [Converting Neural Collaborative Filtering Model from TensorFlow*](prepare_model/convert_model/tf_specific/Convert_NCF_From_Tensorflow.md) - * [Converting TensorFlow* Object Detection API Models](prepare_model/convert_model/tf_specific/Convert_Object_Detection_API_Models.md) - * [Converting TensorFlow*-Slim Image Classification Model Library Models](prepare_model/convert_model/tf_specific/Convert_Slim_Library_Models.md) - * [Converting CRNN Model from TensorFlow*](prepare_model/convert_model/tf_specific/Convert_CRNN_From_Tensorflow.md) - * [Converting Your MXNet* Model](prepare_model/convert_model/Convert_Model_From_MxNet.md) - * [Converting a Style Transfer Model from MXNet](prepare_model/convert_model/mxnet_specific/Convert_Style_Transfer_From_MXNet.md) - * [Converting Your Kaldi* Model](prepare_model/convert_model/Convert_Model_From_Kaldi.md) - * [Converting Your ONNX* Model](prepare_model/convert_model/Convert_Model_From_ONNX.md) - * [Converting Faster-RCNN ONNX* Model](prepare_model/convert_model/onnx_specific/Convert_Faster_RCNN.md) - * [Converting Mask-RCNN ONNX* Model](prepare_model/convert_model/onnx_specific/Convert_Mask_RCNN.md) - * [Converting GPT2 ONNX* Model](prepare_model/convert_model/onnx_specific/Convert_GPT2.md) - * [Converting Your PyTorch* Model](prepare_model/convert_model/Convert_Model_From_PyTorch.md) - * [Converting F3Net PyTorch* Model](prepare_model/convert_model/pytorch_specific/Convert_F3Net.md) - * [Converting QuartzNet PyTorch* Model](prepare_model/convert_model/pytorch_specific/Convert_QuartzNet.md) - * [Converting YOLACT PyTorch* Model](prepare_model/convert_model/pytorch_specific/Convert_YOLACT.md) - * [Model Optimizations Techniques](prepare_model/Model_Optimization_Techniques.md) - * [Cutting parts of the model](prepare_model/convert_model/Cutting_Model.md) - * [Sub-graph Replacement in Model Optimizer](prepare_model/customize_model_optimizer/Subgraph_Replacement_Model_Optimizer.md) - * [Supported Framework Layers](prepare_model/Supported_Frameworks_Layers.md) - * [Intermediate Representation and Operation Sets](IR_and_opsets.md) - * [Operations Specification](../ops/opset.md) - * [Intermediate Representation suitable for INT8 inference](prepare_model/convert_model/IR_suitable_for_INT8_inference.md) - * [Model Optimizer Extensibility](prepare_model/customize_model_optimizer/Customize_Model_Optimizer.md) - * [Extending Model Optimizer with New Primitives](prepare_model/customize_model_optimizer/Extending_Model_Optimizer_with_New_Primitives.md) - * [Extending Model Optimizer with Caffe Python Layers](prepare_model/customize_model_optimizer/Extending_Model_Optimizer_with_Caffe_Python_Layers.md) - * [Extending Model Optimizer with Custom MXNet* Operations](prepare_model/customize_model_optimizer/Extending_MXNet_Model_Optimizer_with_New_Primitives.md) - * [Legacy Mode for Caffe* Custom Layers](prepare_model/customize_model_optimizer/Legacy_Mode_for_Caffe_Custom_Layers.md) - * [Model Optimizer Frequently Asked Questions](prepare_model/Model_Optimizer_FAQ.md) - -* [Known Issues](Known_Issues_Limitations.md) - -**Typical Next Step:** [Preparing and Optimizing your Trained Model with Model Optimizer](prepare_model/Prepare_Trained_Model.md) - -## Video: Model Optimizer Concept - -[![](https://img.youtube.com/vi/Kl1ptVb7aI8/0.jpg)](https://www.youtube.com/watch?v=Kl1ptVb7aI8) -\htmlonly - -\endhtmlonly - -## Video: Model Optimizer Basic Operation -[![](https://img.youtube.com/vi/BBt1rseDcy0/0.jpg)](https://www.youtube.com/watch?v=BBt1rseDcy0) -\htmlonly - -\endhtmlonly +Below is a simple command running Model Optimizer to generate an IR for the input model: + +```sh +python3 mo.py --input_model INPUT_MODEL +``` +To learn about all Model Optimizer parameters and conversion technics, see the [Converting a Model to IR](prepare_model/convert_model/Converting_Model.md) page. + +> **TIP**: You can quick start with the Model Optimizer inside the OpenVINO™ [Deep Learning Workbench](@ref +> openvino_docs_get_started_get_started_dl_workbench) (DL Workbench). +> [DL Workbench](@ref workbench_docs_Workbench_DG_Introduction) is the OpenVINO™ toolkit UI that enables you to +> import a model, analyze its performance and accuracy, visualize the outputs, optimize and prepare the model for +> deployment on various Intel® platforms. + +## Videos + + + + + + + + + + + + +
+ + + + + +
Model Optimizer Concept.
Duration: 3:56
Model Optimizer Basic
Operation
.
Duration: 2:57.
Choosing the Right Precision.
Duration: 4:18.
-## Video: Choosing the Right Precision -[![](https://img.youtube.com/vi/RF8ypHyiKrY/0.jpg)](https://www.youtube.com/watch?v=RF8ypHyiKrY) -\htmlonly - -\endhtmlonly diff --git a/docs/MO_DG/img/small_IR_graph_demonstration.png b/docs/MO_DG/img/small_IR_graph_demonstration.png index 91a3fe385ae32f..332c11fdb65b66 100644 --- a/docs/MO_DG/img/small_IR_graph_demonstration.png +++ b/docs/MO_DG/img/small_IR_graph_demonstration.png @@ -1,3 +1,3 @@ version https://git-lfs.github.com/spec/v1 -oid sha256:c8ae479880ab43cdb12eeb2fbaaf3b7861f786413c583eeba906c5fdf4b66730 -size 30696 +oid sha256:e8a86ea362473121a266c0ec1257c8d428a4bb6438fecdc9d4a4f1ff5cfc9047 +size 26220 diff --git a/docs/MO_DG/img/workflow_steps.png b/docs/MO_DG/img/workflow_steps.png index 6bf780127ad14c..fee04b7cb33ebe 100644 --- a/docs/MO_DG/img/workflow_steps.png +++ b/docs/MO_DG/img/workflow_steps.png @@ -1,3 +1,3 @@ version https://git-lfs.github.com/spec/v1 -oid sha256:5e22bc22d614c7335ae461a8ce449ea8695973d755faca718cf74b95972c94e2 -size 19773 +oid sha256:5281f26cbaa468dc4cafa4ce2fde35d338fe0f658bbb796abaaf793e951939f6 +size 13943 diff --git a/docs/MO_DG/prepare_model/Config_Model_Optimizer.md b/docs/MO_DG/prepare_model/Config_Model_Optimizer.md index 9b978d750aa586..186b8ddabd5b4e 100644 --- a/docs/MO_DG/prepare_model/Config_Model_Optimizer.md +++ b/docs/MO_DG/prepare_model/Config_Model_Optimizer.md @@ -1,8 +1,6 @@ -# Configuring the Model Optimizer {#openvino_docs_MO_DG_prepare_model_Config_Model_Optimizer} +# Installing Model Optimizer Pre-Requisites {#openvino_docs_MO_DG_prepare_model_Config_Model_Optimizer} -You must configure the Model Optimizer for the framework that was used to train -the model. This section tells you how to configure the Model Optimizer either -through scripts or by using a manual process. +Before running the Model Optimizer, you must install the Model Optimizer pre-requisites for the framework that was used to train the model. This section tells you how to install the pre-requisites either through scripts or by using a manual process. ## Using Configuration Scripts @@ -12,7 +10,7 @@ dependencies and provide the fastest and easiest way to configure the Model Optimizer. To configure all three frameworks, go to the -`/deployment_tools/model_optimizer/install_prerequisites` +`/tools/model_optimizer/install_prerequisites` directory and run: * For Linux\* OS: @@ -37,7 +35,7 @@ install_prerequisites.bat ``` To configure a specific framework, go to the -`/deployment_tools/model_optimizer/install_prerequisites` +`/tools/model_optimizer/install_prerequisites` directory and run: * For Caffe\* on Linux: @@ -103,7 +101,7 @@ framework at a time. 1. Go to the Model Optimizer directory: ```shell -cd /deployment_tools/model_optimizer/ +cd /tools/model_optimizer/ ``` 2. **Strongly recommended for all global Model Optimizer dependency installations**: Create and activate a virtual environment. While not required, this step is @@ -154,6 +152,10 @@ pip3 install -r requirements_onnx.txt ``` ## Using the protobuf Library in the Model Optimizer for Caffe\* +
+ Click to expand + + These procedures require: @@ -166,7 +168,7 @@ By default, the library executes pure Python\* language implementation, which is slow. These steps show how to use the faster C++ implementation of the protobuf library on Windows OS or Linux OS. -### Using the protobuf Library on Linux\* OS +#### Using the protobuf Library on Linux\* OS To use the C++ implementation of the protobuf library on Linux, it is enough to set up the environment variable: @@ -174,12 +176,12 @@ set up the environment variable: export PROTOCOL_BUFFERS_PYTHON_IMPLEMENTATION=cpp ``` -### Using the protobuf Library on Windows\* OS +#### Using the protobuf Library on Windows\* OS On Windows, pre-built protobuf packages for Python versions 3.4, 3.5, 3.6, and 3.7 are provided with the installation package and can be found in the -`\deployment_tools\model_optimizer\install_prerequisites` +`\tools\model_optimizer\install_prerequisites` folder. Please note that they are not installed with the `install_prerequisites.bat` installation script due to possible issues with `pip`, and you can install them at your own discretion. Make sure @@ -196,7 +198,7 @@ To install the protobuf package: 1. Open the command prompt as administrator. 2. Go to the `install_prerequisites` folder of the OpenVINO toolkit installation directory: ```sh -cd \deployment_tools\model_optimizer\install_prerequisites +cd \tools\model_optimizer\install_prerequisites ``` 3. Run the following command to install the protobuf for Python 3.6. If @@ -262,6 +264,8 @@ python3 -m easy_install dist/protobuf-3.6.1-py3.6-win-amd64.egg set PROTOCOL_BUFFERS_PYTHON_IMPLEMENTATION=cpp ``` +
+ ## See Also * [Converting a Model to Intermediate Representation (IR)](convert_model/Converting_Model.md) diff --git a/docs/MO_DG/prepare_model/Model_Optimizer_FAQ.md b/docs/MO_DG/prepare_model/Model_Optimizer_FAQ.md index bb599cf93b5632..cd41e9da21d0a8 100644 --- a/docs/MO_DG/prepare_model/Model_Optimizer_FAQ.md +++ b/docs/MO_DG/prepare_model/Model_Optimizer_FAQ.md @@ -28,7 +28,7 @@ For example, to add the description of the `CustomReshape` layer, which is an ar 2. Generate a new parser: ```shell -cd /deployment_tools/model_optimizer/mo/front/caffe/proto +cd /tools/model_optimizer/mo/front/caffe/proto python3 generate_caffe_pb2.py --input_proto /src/caffe/proto/caffe.proto ``` where `PATH_TO_CUSTOM_CAFFE` is the path to the root directory of custom Caffe\*. @@ -66,7 +66,7 @@ The mean file that you provide for the Model Optimizer must be in a `.binaryprot #### 7. What does the message "Invalid proto file: there is neither 'layer' nor 'layers' top-level messages" mean? -The structure of any Caffe\* topology is described in the `caffe.proto` file of any Caffe version. For example, in the Model Optimizer, you can find the following proto file, used by default: `/deployment_tools/model_optimizer/mo/front/caffe/proto/my_caffe.proto`. There you can find the structure: +The structure of any Caffe\* topology is described in the `caffe.proto` file of any Caffe version. For example, in the Model Optimizer, you can find the following proto file, used by default: `/tools/model_optimizer/mo/front/caffe/proto/my_caffe.proto`. There you can find the structure: ``` message NetParameter { // ... some other parameters @@ -81,7 +81,7 @@ This means that any topology should contain layers as top-level structures in `p #### 8. What does the message "Old-style inputs (via 'input_dims') are not supported. Please specify inputs via 'input_shape'" mean? -The structure of any Caffe\* topology is described in the `caffe.proto` file for any Caffe version. For example, in the Model Optimizer you can find the following `.proto` file, used by default: `/deployment_tools/model_optimizer/mo/front/caffe/proto/my_caffe.proto`. There you can find the structure: +The structure of any Caffe\* topology is described in the `caffe.proto` file for any Caffe version. For example, in the Model Optimizer you can find the following `.proto` file, used by default: `/tools/model_optimizer/mo/front/caffe/proto/my_caffe.proto`. There you can find the structure: ```sh message NetParameter { diff --git a/docs/MO_DG/prepare_model/Prepare_Trained_Model.md b/docs/MO_DG/prepare_model/Prepare_Trained_Model.md deleted file mode 100644 index a74d1b789a2f34..00000000000000 --- a/docs/MO_DG/prepare_model/Prepare_Trained_Model.md +++ /dev/null @@ -1,63 +0,0 @@ -# Preparing and Optimizing Your Trained Model {#openvino_docs_MO_DG_prepare_model_Prepare_Trained_Model} - -Inference Engine enables _deploying_ your network model trained with any of supported deep learning frameworks: Caffe\*, TensorFlow\*, Kaldi\*, MXNet\* or converted to the ONNX\* format. To perform the inference, the Inference Engine does not operate with the original model, but with its Intermediate Representation (IR), which is optimized for execution on end-point target devices. To generate an IR for your trained model, the Model Optimizer tool is used. - -## How the Model Optimizer Works - -Model Optimizer loads a model into memory, reads it, builds the internal representation of the model, optimizes it, and produces the Intermediate Representation. Intermediate Representation is the only format the Inference Engine accepts. - -> **NOTE**: Model Optimizer does not infer models. Model Optimizer is an offline tool that runs before the inference takes place. - -Model Optimizer has two main purposes: - -* **Produce a valid Intermediate Representation**. If this main conversion artifact is not valid, the Inference Engine cannot run. The primary responsibility of the Model Optimizer is to produce the two files (`.xml` and `.bin`) that form the Intermediate Representation. -* **Produce an optimized Intermediate Representation**. Pre-trained models contain layers that are important for training, such as the `Dropout` layer. These layers are useless during inference and might increase the inference time. In many cases, these operations can be automatically removed from the resulting Intermediate Representation. However, if a group of operations can be represented as a single mathematical operation, and thus as a single operation node in a model graph, the Model Optimizer recognizes such patterns and replaces this group of operation nodes with the only one operation. The result is an Intermediate Representation that has fewer operation nodes than the original model. This decreases the inference time. - -To produce a valid Intermediate Representation, the Model Optimizer must be able to read the original model operations, handle their properties and represent them in Intermediate Representation format, while maintaining validity of the resulting Intermediate Representation. The resulting model consists of operations described in the [Operations Specification](../../ops/opset.md). - -## What You Need to Know about Your Model - -Many common layers exist across known frameworks and neural network topologies. Examples of these layers are `Convolution`, `Pooling`, and `Activation`. To read the original model and produce the Intermediate Representation of a model, the Model Optimizer must be able to work with these layers. - -The full list of them depends on the framework and can be found in the [Supported Framework Layers](Supported_Frameworks_Layers.md) section. If your topology contains only layers from the list of layers, as is the case for the topologies used by most users, the Model Optimizer easily creates the Intermediate Representation. After that you can proceed to work with the Inference Engine. - -However, if you use a topology with layers that are not recognized by the Model Optimizer out of the box, see [Custom Layers in the Model Optimizer](customize_model_optimizer/Customize_Model_Optimizer.md) to learn how to work with custom layers. - -## Model Optimizer Directory Structure - -After installation with OpenVINO™ toolkit or Intel® Deep Learning Deployment Toolkit, the Model Optimizer folder has the following structure (some directories omitted for clarity): -``` -|-- model_optimizer - |-- extensions - |-- front - Front-End framework agnostic transformations (operations output shapes are not defined yet). - |-- caffe - Front-End Caffe-specific transformations and Caffe layers extractors - |-- CustomLayersMapping.xml.example - example of file for registering custom Caffe layers (compatible with the 2017R3 release) - |-- kaldi - Front-End Kaldi-specific transformations and Kaldi operations extractors - |-- mxnet - Front-End MxNet-specific transformations and MxNet symbols extractors - |-- onnx - Front-End ONNX-specific transformations and ONNX operators extractors - |-- tf - Front-End TensorFlow-specific transformations, TensorFlow operations extractors, sub-graph replacements configuration files. - |-- middle - Middle-End framework agnostic transformations (layers output shapes are defined). - |-- back - Back-End framework agnostic transformations (preparation for IR generation). - |-- mo - |-- back - Back-End logic: contains IR emitting logic - |-- front - Front-End logic: contains matching between Framework-specific layers and IR specific, calculation of output shapes for each registered layer - |-- graph - Graph utilities to work with internal IR representation - |-- middle - Graph transformations - optimizations of the model - |-- pipeline - Sequence of steps required to create IR for each framework - |-- utils - Utility functions - |-- tf_call_ie_layer - Source code that enables TensorFlow fallback in Inference Engine during model inference - |-- mo.py - Centralized entry point that can be used for any supported framework - |-- mo_caffe.py - Entry point particularly for Caffe - |-- mo_kaldi.py - Entry point particularly for Kaldi - |-- mo_mxnet.py - Entry point particularly for MXNet - |-- mo_onnx.py - Entry point particularly for ONNX - |-- mo_tf.py - Entry point particularly for TensorFlow -``` - -The following sections provide the information about how to use the Model Optimizer, from configuring the tool and generating an IR for a given model to customizing the tool for your needs: - -* [Configuring Model Optimizer](Config_Model_Optimizer.md) -* [Converting a Model to Intermediate Representation](convert_model/Converting_Model.md) -* [Custom Layers in Model Optimizer](customize_model_optimizer/Customize_Model_Optimizer.md) -* [Model Optimization Techniques](Model_Optimization_Techniques.md) -* [Model Optimizer Frequently Asked Questions](Model_Optimizer_FAQ.md) diff --git a/docs/MO_DG/prepare_model/Supported_Frameworks_Layers.md b/docs/MO_DG/prepare_model/Supported_Frameworks_Layers.md index a09df51a56a34d..c01c71752210e6 100644 --- a/docs/MO_DG/prepare_model/Supported_Frameworks_Layers.md +++ b/docs/MO_DG/prepare_model/Supported_Frameworks_Layers.md @@ -67,6 +67,7 @@ Standard MXNet\* symbols: | _minus_scalar | No | | _mul_scalar | No | | _plus_scalar | No | +| _random_uniform | Operation provides sequence from uniform distribution, but exact values won't match. | | _rnn_param_concat | No | | _arange | No | | _contrib_AdaptiveAvgPooling2D | Converted to the Average Pooling with fixed paddings | @@ -272,6 +273,8 @@ Standard TensorFlow\* operations: | PlaceholderWithDefault | No | | Prod | No | | QueueDequeueUpToV2 | Supported only when it is part of a sub-graph of the special form | +| RandomUniform | No | +| RandomUniformInt | No | | Range | No | | Rank | No | | RealDiv | No | @@ -568,6 +571,7 @@ Standard ONNX\* operators: | RNN | No | | ROIAlign | No | | Range | No | +| RandomUniform | Operation provides sequence from uniform distribution, but exact values won't match. | | Reciprocal | No | | ReduceL1 | No | | ReduceL2 | No | diff --git a/docs/MO_DG/prepare_model/convert_model/Convert_Model_From_Caffe.md b/docs/MO_DG/prepare_model/convert_model/Convert_Model_From_Caffe.md index 4c257d1689ea23..229205f7b68166 100644 --- a/docs/MO_DG/prepare_model/convert_model/Convert_Model_From_Caffe.md +++ b/docs/MO_DG/prepare_model/convert_model/Convert_Model_From_Caffe.md @@ -38,7 +38,7 @@ A summary of the steps for optimizing and deploying a model that was trained wit To convert a Caffe\* model: -1. Go to the `$INTEL_OPENVINO_DIR/deployment_tools/model_optimizer` directory. +1. Go to the `$INTEL_OPENVINO_DIR/tools/model_optimizer` directory. 2. Use the `mo.py` script to simply convert a model, specifying the path to the input model `.caffemodel` file and the path to an output directory with write permissions: ```sh python3 mo.py --input_model .caffemodel --output_dir diff --git a/docs/MO_DG/prepare_model/convert_model/Convert_Model_From_Kaldi.md b/docs/MO_DG/prepare_model/convert_model/Convert_Model_From_Kaldi.md index 20f2511dcbf148..3aac41fbd67874 100644 --- a/docs/MO_DG/prepare_model/convert_model/Convert_Model_From_Kaldi.md +++ b/docs/MO_DG/prepare_model/convert_model/Convert_Model_From_Kaldi.md @@ -33,7 +33,7 @@ A summary of the steps for optimizing and deploying a model that was trained wit To convert a Kaldi\* model: -1. Go to the `/deployment_tools/model_optimizer` directory. +1. Go to the `/tools/model_optimizer` directory. 2. Use the `mo.py` script to simply convert a model with the path to the input model `.nnet` or `.mdl` file and to an output directory where you have write permissions: ```sh python3 mo.py --input_model .nnet --output_dir diff --git a/docs/MO_DG/prepare_model/convert_model/Convert_Model_From_MxNet.md b/docs/MO_DG/prepare_model/convert_model/Convert_Model_From_MxNet.md index 4b8c1816e8b318..6ac304aa5c236b 100644 --- a/docs/MO_DG/prepare_model/convert_model/Convert_Model_From_MxNet.md +++ b/docs/MO_DG/prepare_model/convert_model/Convert_Model_From_MxNet.md @@ -27,14 +27,12 @@ A summary of the steps for optimizing and deploying a model that was trained wit |SSD-ResNet-50| [Repo](https://github.com/zhreshold/mxnet-ssd), [Symbol + Params](https://github.com/zhreshold/mxnet-ssd/releases/download/v0.6/resnet50_ssd_512_voc0712_trainval.zip)| |SSD-VGG-16-300| [Repo](https://github.com/zhreshold/mxnet-ssd), [Symbol + Params](https://github.com/zhreshold/mxnet-ssd/releases/download/v0.5-beta/vgg16_ssd_300_voc0712_trainval.zip)| |SSD-Inception v3| [Repo](https://github.com/zhreshold/mxnet-ssd), [Symbol + Params](https://github.com/zhreshold/mxnet-ssd/releases/download/v0.7-alpha/ssd_inceptionv3_512_voc0712trainval.zip)| -|FCN8 (Semantic Segmentation)| [Repo](https://github.com/apache/incubator-mxnet/tree/master/example/fcn-xs), [Symbol](https://www.dropbox.com/sh/578n5cxej7ofd6m/AAA9SFCBN8R_uL2CnAd3WQ5ia/FCN8s_VGG16-symbol.json?dl=0), [Params](https://www.dropbox.com/sh/578n5cxej7ofd6m/AABHWZHCtA2P6iR6LUflkxb_a/FCN8s_VGG16-0019-cpu.params?dl=0)| |MTCNN part 1 (Face Detection)| [Repo](https://github.com/pangyupo/mxnet_mtcnn_face_detection), [Symbol](https://github.com/pangyupo/mxnet_mtcnn_face_detection/blob/master/model/det1-symbol.json), [Params](https://github.com/pangyupo/mxnet_mtcnn_face_detection/blob/master/model/det1-0001.params)| |MTCNN part 2 (Face Detection)| [Repo](https://github.com/pangyupo/mxnet_mtcnn_face_detection), [Symbol](https://github.com/pangyupo/mxnet_mtcnn_face_detection/blob/master/model/det2-symbol.json), [Params](https://github.com/pangyupo/mxnet_mtcnn_face_detection/blob/master/model/det2-0001.params)| |MTCNN part 3 (Face Detection)| [Repo](https://github.com/pangyupo/mxnet_mtcnn_face_detection), [Symbol](https://github.com/pangyupo/mxnet_mtcnn_face_detection/blob/master/model/det3-symbol.json), [Params](https://github.com/pangyupo/mxnet_mtcnn_face_detection/blob/master/model/det3-0001.params)| |MTCNN part 4 (Face Detection)| [Repo](https://github.com/pangyupo/mxnet_mtcnn_face_detection), [Symbol](https://github.com/pangyupo/mxnet_mtcnn_face_detection/blob/master/model/det4-symbol.json), [Params](https://github.com/pangyupo/mxnet_mtcnn_face_detection/blob/master/model/det4-0001.params)| |Lightened_moon| [Repo](https://github.com/tornadomeet/mxnet-face/tree/master/model/lightened_moon), [Symbol](https://github.com/tornadomeet/mxnet-face/blob/master/model/lightened_moon/lightened_moon_fuse-symbol.json), [Params](https://github.com/tornadomeet/mxnet-face/blob/master/model/lightened_moon/lightened_moon_fuse-0082.params)| |RNN-Transducer| [Repo](https://github.com/HawkAaron/mxnet-transducer) | -|word_lm| [Repo](https://github.com/apache/incubator-mxnet/tree/master/example/rnn/word_lm) | **Other supported topologies** @@ -45,7 +43,7 @@ A summary of the steps for optimizing and deploying a model that was trained wit To convert an MXNet\* model: -1. Go to the `/deployment_tools/model_optimizer` directory. +1. Go to the `/tools/model_optimizer` directory. 2. To convert an MXNet\* model contained in a `model-file-symbol.json` and `model-file-0000.params`, run the Model Optimizer launch script `mo.py`, specifying a path to the input model file and a path to an output directory with write permissions: ```sh python3 mo_mxnet.py --input_model model-file-0000.params --output_dir diff --git a/docs/MO_DG/prepare_model/convert_model/Convert_Model_From_ONNX.md b/docs/MO_DG/prepare_model/convert_model/Convert_Model_From_ONNX.md index 79f740b55ecdd4..6ab9ef30e43782 100644 --- a/docs/MO_DG/prepare_model/convert_model/Convert_Model_From_ONNX.md +++ b/docs/MO_DG/prepare_model/convert_model/Convert_Model_From_ONNX.md @@ -59,7 +59,7 @@ The Model Optimizer process assumes you have an ONNX model that was directly dow To convert an ONNX\* model: -1. Go to the `/deployment_tools/model_optimizer` directory. +1. Go to the `/tools/model_optimizer` directory. 2. Use the `mo.py` script to simply convert a model with the path to the input model `.nnet` file and an output directory where you have write permissions: ```sh python3 mo.py --input_model .onnx --output_dir diff --git a/docs/MO_DG/prepare_model/convert_model/Convert_Model_From_Paddle.md b/docs/MO_DG/prepare_model/convert_model/Convert_Model_From_Paddle.md index 65f5c8fbbab1ba..d2d75aefb08541 100644 --- a/docs/MO_DG/prepare_model/convert_model/Convert_Model_From_Paddle.md +++ b/docs/MO_DG/prepare_model/convert_model/Convert_Model_From_Paddle.md @@ -29,7 +29,7 @@ A summary of the steps for optimizing and deploying a model that was trained wit To convert a Paddle\* model: -1. Go to the `$INTEL_OPENVINO_DIR/deployment_tools/model_optimizer` directory. +1. Go to the `$INTEL_OPENVINO_DIR/tools/model_optimizer` directory. 2. Use the `mo.py` script to simply convert a model, specifying the framework, the path to the input model `.pdmodel` file and the path to an output directory with write permissions: ```sh python3 mo.py --input_model .pdmodel --output_dir --framework=paddle diff --git a/docs/MO_DG/prepare_model/convert_model/Convert_Model_From_TensorFlow.md b/docs/MO_DG/prepare_model/convert_model/Convert_Model_From_TensorFlow.md index 7e29a7668b2f24..d5124fab21b0e6 100644 --- a/docs/MO_DG/prepare_model/convert_model/Convert_Model_From_TensorFlow.md +++ b/docs/MO_DG/prepare_model/convert_model/Convert_Model_From_TensorFlow.md @@ -37,7 +37,7 @@ Detailed information on how to convert models from the TensorFlow 1 Detection Model Zoo is available in the [Converting TensorFlow Object Detection API Models](tf_specific/Convert_Object_Detection_API_Models.md) chapter. The table below contains models from the Object Detection Models zoo that are supported. +Detailed information on how to convert models from the TensorFlow 1 Object Detection Models Zoo and TensorFlow 2 Object Detection Models Zoo is available in the [Converting TensorFlow Object Detection API Models](tf_specific/Convert_Object_Detection_API_Models.md) chapter. The table below contains models from the Object Detection Models Zoo that are supported. | Model Name| TensorFlow 1 Object Detection API Models| | :------------- | -----:| @@ -178,7 +178,7 @@ There are three ways to store non-frozen TensorFlow models and load them to the To convert such a TensorFlow model: - 1. Go to the `/deployment_tools/model_optimizer` directory + 1. Go to the `/tools/model_optimizer` directory 2. Run the `mo_tf.py` script with the path to the checkpoint file to convert a model and an output directory where you have write permissions: * If input model is in `.pb` format:
@@ -200,7 +200,7 @@ python3 mo_tf.py --input_model .pbtxt --input_checkpoint /deployment_tools/model_optimizer` directory + 1. Go to the `/tools/model_optimizer` directory 2. Run the `mo_tf.py` script with a path to the MetaGraph `.meta` file and a writable output directory to convert a model:
```sh python3 mo_tf.py --input_meta_graph .meta --output_dir @@ -212,7 +212,7 @@ python3 mo_tf.py --input_meta_graph .meta --output_dir /deployment_tools/model_optimizer` directory + 1. Go to the `/tools/model_optimizer` directory 2. Run the `mo_tf.py` script with a path to the SavedModel directory and a writable output directory to convert a model:
```sh python3 mo_tf.py --saved_model_dir --output_dir @@ -251,7 +251,7 @@ Where: To convert a TensorFlow model: -1. Go to the `/deployment_tools/model_optimizer` directory +1. Go to the `/tools/model_optimizer` directory 2. Use the `mo_tf.py` script to simply convert a model with the path to the input model `.pb` file and a writable output directory: ```sh python3 mo_tf.py --input_model .pb --output_dir @@ -342,7 +342,7 @@ Below are the instructions on how to convert each of them. A model in the SavedModel format consists of a directory with a `saved_model.pb` file and two subfolders: `variables` and `assets`. To convert such a model: -1. Go to the `/deployment_tools/model_optimizer` directory. +1. Go to the `/tools/model_optimizer` directory. 2. Run the `mo_tf.py` script with a path to the SavedModel directory and a writable output directory: ```sh python3 mo_tf.py --saved_model_dir --output_dir @@ -405,10 +405,8 @@ Refer to [Supported Framework Layers ](../Supported_Frameworks_Layers.md) for th The Model Optimizer provides explanatory messages if it is unable to run to completion due to issues like typographical errors, incorrectly used options, or other issues. The message describes the potential cause of the problem and gives a link to the [Model Optimizer FAQ](../Model_Optimizer_FAQ.md). The FAQ has instructions on how to resolve most issues. The FAQ also includes links to relevant sections in the Model Optimizer Developer Guide to help you understand what went wrong. ## Video: Converting a TensorFlow Model -[![](https://img.youtube.com/vi/QW6532LtiTc/0.jpg)](https://www.youtube.com/watch?v=QW6532LtiTc) -\htmlonly + -\endhtmlonly ## Summary In this document, you learned: diff --git a/docs/MO_DG/prepare_model/convert_model/Converting_Model.md b/docs/MO_DG/prepare_model/convert_model/Converting_Model.md index 26ce1289b8c04e..78acbd694e139c 100644 --- a/docs/MO_DG/prepare_model/convert_model/Converting_Model.md +++ b/docs/MO_DG/prepare_model/convert_model/Converting_Model.md @@ -1,39 +1,20 @@ # Converting a Model to Intermediate Representation (IR) {#openvino_docs_MO_DG_prepare_model_convert_model_Converting_Model} -Use the mo.py script from the `/deployment_tools/model_optimizer` directory to run the Model Optimizer and convert the model to the Intermediate Representation (IR). -The simplest way to convert a model is to run mo.py with a path to the input model file and an output directory where you have write permissions: +Use the mo.py script from the `/tools/model_optimizer` directory to run the Model Optimizer and convert the model to the Intermediate Representation (IR): ```sh python3 mo.py --input_model INPUT_MODEL --output_dir ``` +You need to have have write permissions for an output directory. -> **NOTE**: Some models require using additional arguments to specify conversion parameters, such as `--scale`, `--scale_values`, `--mean_values`, `--mean_file`. To learn about when you need to use these parameters, refer to [Converting a Model Using General Conversion Parameters](Converting_Model_General.md). - -The mo.py script is the universal entry point that can deduce the framework that has produced the input model by a standard extension of the model file: - -* `.caffemodel` - Caffe\* models -* `.pb` - TensorFlow\* models -* `.params` - MXNet\* models -* `.onnx` - ONNX\* models -* `.nnet` - Kaldi\* models. - -If the model files do not have standard extensions, you can use the ``--framework {tf,caffe,kaldi,onnx,mxnet,paddle}`` option to specify the framework type explicitly. - -For example, the following commands are equivalent: -```sh -python3 mo.py --input_model /user/models/model.pb -``` -```sh -python3 mo.py --framework tf --input_model /user/models/model.pb -``` +> **NOTE**: Some models require using additional arguments to specify conversion parameters, such as `--input_shape`, `--scale`, `--scale_values`, `--mean_values`, `--mean_file`. To learn about when you need to use these parameters, refer to [Converting a Model Using General Conversion Parameters](Converting_Model_General.md). To adjust the conversion process, you may use general parameters defined in the [Converting a Model Using General Conversion Parameters](Converting_Model_General.md) and Framework-specific parameters for: -* [Caffe](Convert_Model_From_Caffe.md), -* [TensorFlow](Convert_Model_From_TensorFlow.md), -* [MXNet](Convert_Model_From_MxNet.md), -* [ONNX](Convert_Model_From_ONNX.md), -* [Kaldi](Convert_Model_From_Kaldi.md). -* [Paddle](Convert_Model_From_Paddle.md). +* [Caffe](Convert_Model_From_Caffe.md) +* [TensorFlow](Convert_Model_From_TensorFlow.md) +* [MXNet](Convert_Model_From_MxNet.md) +* [ONNX](Convert_Model_From_ONNX.md) +* [Kaldi](Convert_Model_From_Kaldi.md) ## See Also diff --git a/docs/MO_DG/prepare_model/convert_model/Converting_Model_General.md b/docs/MO_DG/prepare_model/convert_model/Converting_Model_General.md index 2d267cda3e7172..114bf7a3ce0f68 100644 --- a/docs/MO_DG/prepare_model/convert_model/Converting_Model_General.md +++ b/docs/MO_DG/prepare_model/convert_model/Converting_Model_General.md @@ -5,7 +5,7 @@ To simply convert a model trained by any supported framework, run the Model Opti python3 mo.py --input_model INPUT_MODEL --output_dir ``` -The script is in `$INTEL_OPENVINO_DIR/deployment_tools/model_optimizer/`. The output directory must have write permissions, so you can run mo.py from the output directory or specify an output path with the `--output_dir` option. +The script is in `$INTEL_OPENVINO_DIR/tools/model_optimizer/`. The output directory must have write permissions, so you can run mo.py from the output directory or specify an output path with the `--output_dir` option. > **NOTE:** The color channel order (RGB or BGR) of an input data should match the channel order of the model training dataset. If they are different, perform the `RGB<->BGR` conversion specifying the command-line parameter: `--reverse_input_channels`. Otherwise, inference results may be incorrect. For details, refer to [When to Reverse Input Channels](#when_to_reverse_input_channels). @@ -212,8 +212,7 @@ Launch the Model Optimizer for the Caffe bvlc_alexnet model with reversed input python3 mo.py --input_model bvlc_alexnet.caffemodel --reverse_input_channels --mean_values [255,255,255] --data_type FP16 --output_dir ``` -Launch the Model Optimizer for the Caffe bvlc_alexnet model with extensions listed in specified directories, specified mean_images binaryproto. - file For more information about extensions, please refer to [this](../customize_model_optimizer/Extending_Model_Optimizer_with_New_Primitives.md) page. +Launch the Model Optimizer for the Caffe bvlc_alexnet model with extensions listed in specified directories, specified mean_images binaryproto file. For more information about extensions, please refer to [this](../customize_model_optimizer/Extending_Model_Optimizer_with_New_Primitives.md) page. ```sh python3 mo.py --input_model bvlc_alexnet.caffemodel --extensions /home/,/some/other/path/ --mean_file /path/to/binaryproto --output_dir ``` diff --git a/docs/MO_DG/prepare_model/convert_model/Cutting_Model.md b/docs/MO_DG/prepare_model/convert_model/Cutting_Model.md index d86368a9f708f5..d0248d149bc7cd 100644 --- a/docs/MO_DG/prepare_model/convert_model/Cutting_Model.md +++ b/docs/MO_DG/prepare_model/convert_model/Cutting_Model.md @@ -19,7 +19,7 @@ Model Optimizer provides command line options `--input` and `--output` to specif * `--input` option accepts a comma-separated list of layer names of the input model that should be treated as new entry points to the model. * `--output` option accepts a comma-separated list of layer names of the input model that should be treated as new exit points from the model. -The `--input` option is required for cases unrelated to model cutting. For example, when the model contains several inputs and `--input_shape` or `--mean_values` options are used, you should use the `--input` option to specify the order of input nodes for correct mapping between multiple items provided in `--input_shape` and `--mean_values` and the inputs in the model. This is out of scope. +The `--input` option is required for cases unrelated to model cutting. For example, when the model contains several inputs and `--input_shape` or `--mean_values` options are used, you should use the `--input` option to specify the order of input nodes for correct mapping between multiple items provided in `--input_shape` and `--mean_values` and the inputs in the model. Details on these options are out of scope for this document, which focuses on model cutting. Model cutting is illustrated with Inception V1. This model is in `models/research/slim` repository. [This section](Converting_Model.md) describes pre-work to prepare the model for the Model Optimizer to be ready to proceed with this chapter. @@ -39,7 +39,7 @@ In the TensorBoard, it looks the following way together with some predecessors: Convert this model and put the results in a writable output directory: ```sh -${INTEL_OPENVINO_DIR}/deployment_tools/model_optimizer +${INTEL_OPENVINO_DIR}/tools/model_optimizer python3 mo.py --input_model inception_v1.pb -b 1 --output_dir ``` (The other examples on this page assume that you first cd to the `model_optimizer` directory and add the `--output_dir` argument with a directory where you have write permissions.) diff --git a/docs/MO_DG/prepare_model/convert_model/IR_suitable_for_INT8_inference.md b/docs/MO_DG/prepare_model/convert_model/IR_suitable_for_INT8_inference.md index fa4bdb50554913..4f9baa1386cb7d 100644 --- a/docs/MO_DG/prepare_model/convert_model/IR_suitable_for_INT8_inference.md +++ b/docs/MO_DG/prepare_model/convert_model/IR_suitable_for_INT8_inference.md @@ -9,7 +9,7 @@ Intermediate Representation (IR) should be specifically formed to be suitable fo Such an IR is called a Low Precision IR and you can generate it in two ways: - [Quantize regular IR with the Post-Training Optimization tool](@ref pot_README) - Use the Model Optimizer for a model pretrained for Low Precision inference: TensorFlow\* pre-TFLite models (`.pb` model file with `FakeQuantize*` operations) and ONNX\* quantized models. -Both Tensorflow and ONNX quantized models could be prepared by [Neural Network Compression Framework](https://github.com/openvinotoolkit/nncf/blob/develop/README.md) +Both TensorFlow and ONNX quantized models could be prepared by [Neural Network Compression Framework](https://github.com/openvinotoolkit/nncf/blob/develop/README.md). For an operation to be executed in INT8, it must have `FakeQuantize` operations as inputs. See the [specification of `FakeQuantize` operation](../../../ops/quantization/FakeQuantize_1.md) for details. @@ -17,7 +17,7 @@ See the [specification of `FakeQuantize` operation](../../../ops/quantization/Fa To execute the `Convolution` operation in INT8 on CPU, both data and weight inputs should have `FakeQuantize` as an input operation: ![](../../img/expanded_int8_Convolution_weights.png) -Low pecision IR is also suitable for FP32 and FP16 inference if a chosen plugin supports all operations of the IR, because the only difference between a Low Precision IR and FP16 or FP32 IR is the existence of `FakeQuantize` in the Low Precision IR. +Low precision IR is also suitable for FP32 and FP16 inference if a chosen plugin supports all operations of the IR, because the only difference between a Low Precision IR and FP16 or FP32 IR is the existence of `FakeQuantize` in the Low Precision IR. Plugins with Low Precision Inference support recognize these sub-graphs and quantize them during the inference time. Plugins without Low Precision support execute all operations, including `FakeQuantize`, as is in the FP32 or FP16 precision. diff --git a/docs/MO_DG/prepare_model/convert_model/mxnet_specific/Convert_Style_Transfer_From_MXNet.md b/docs/MO_DG/prepare_model/convert_model/mxnet_specific/Convert_Style_Transfer_From_MXNet.md index f0ec23d5a9f631..eb1a7094673e2f 100644 --- a/docs/MO_DG/prepare_model/convert_model/mxnet_specific/Convert_Style_Transfer_From_MXNet.md +++ b/docs/MO_DG/prepare_model/convert_model/mxnet_specific/Convert_Style_Transfer_From_MXNet.md @@ -90,6 +90,8 @@ Where the `models/13` string is composed of the following substrings: * `models/`: path to the folder that contains .nd files with pre-trained styles weights * `13`: prefix pointing to 13_decoder, which is the default decoder for the repository +>**NOTE**: If you get an error saying "No module named 'cPickle'", try running the script from this step in Python 2. Then return to Python 3 for the remaining steps. + You can choose any style from [collection of pre-trained weights](https://pan.baidu.com/s/1skMHqYp). (On the Chinese-language page, click the down arrow next to a size in megabytes. Then wait for an overlay box to appear, and click the blue button in it to download.) The `generate()` function generates `nst_vgg19-symbol.json` and `vgg19-symbol.json` files for the specified shape. In the code, it is [1024 x 768] for a 4:3 ratio, and you can specify another, for example, [224,224] for a square ratio. #### 6. Run the Model Optimizer to generate an Intermediate Representation (IR): diff --git a/docs/MO_DG/prepare_model/convert_model/pytorch_specific/Convert_F3Net.md b/docs/MO_DG/prepare_model/convert_model/pytorch_specific/Convert_F3Net.md index ffb16eb5f7cc5f..0d130197f74a2c 100644 --- a/docs/MO_DG/prepare_model/convert_model/pytorch_specific/Convert_F3Net.md +++ b/docs/MO_DG/prepare_model/convert_model/pytorch_specific/Convert_F3Net.md @@ -2,15 +2,19 @@ [F3Net](https://github.com/weijun88/F3Net): Fusion, Feedback and Focus for Salient Object Detection +## Clone the F3Net Model Repository + +To clone the repository, run the following command: +```bash +git clone http://github.com/weijun88/F3Net.git +``` + ## Download and Convert the Model to ONNX* To download the pre-trained model or train the model yourself, refer to the -[instruction](https://github.com/weijun88/F3Net/blob/master/README.md) in the F3Net model repository. Firstly, -convert the model to ONNX\* format. Create and run the script with the following content in the `src` -directory of the model repository: +[instruction](https://github.com/weijun88/F3Net/blob/master/README.md) in the F3Net model repository. First, convert the model to ONNX\* format. Create and run the script with the following content in the `src` directory of the model repository: ```python import torch - from dataset import Config from net import F3Net @@ -19,7 +23,7 @@ net = F3Net(cfg) image = torch.zeros([1, 3, 352, 352]) torch.onnx.export(net, image, 'f3net.onnx', export_params=True, do_constant_folding=True, opset_version=11) ``` -The script generates the ONNX\* model file f3net.onnx. The model conversion was tested with the repository hash commit `eecace3adf1e8946b571a4f4397681252f9dc1b8`. +The script generates the ONNX\* model file `f3net.onnx`. This model conversion was tested with the repository hash commit `eecace3adf1e8946b571a4f4397681252f9dc1b8`. ## Convert ONNX* F3Net Model to IR diff --git a/docs/MO_DG/prepare_model/convert_model/pytorch_specific/Convert_RNNT.md b/docs/MO_DG/prepare_model/convert_model/pytorch_specific/Convert_RNNT.md index a58e886d4f4230..31de647f379158 100644 --- a/docs/MO_DG/prepare_model/convert_model/pytorch_specific/Convert_RNNT.md +++ b/docs/MO_DG/prepare_model/convert_model/pytorch_specific/Convert_RNNT.md @@ -20,15 +20,15 @@ mkdir rnnt_for_openvino cd rnnt_for_openvino ``` -**Step 3**. Download pretrained weights for PyTorch implementation from https://zenodo.org/record/3662521#.YG21DugzZaQ. -For UNIX*-like systems you can use wget: +**Step 3**. Download pretrained weights for PyTorch implementation from [https://zenodo.org/record/3662521#.YG21DugzZaQ](https://zenodo.org/record/3662521#.YG21DugzZaQ). +For UNIX*-like systems you can use `wget`: ```bash wget https://zenodo.org/record/3662521/files/DistributedDataParallel_1576581068.9962234-epoch-100.pt ``` The link was taken from `setup.sh` in the `speech_recoginitin/rnnt` subfolder. You will get exactly the same weights as -if you were following the steps from https://github.com/mlcommons/inference/tree/master/speech_recognition/rnnt. +if you were following the steps from [https://github.com/mlcommons/inference/tree/master/speech_recognition/rnnt](https://github.com/mlcommons/inference/tree/master/speech_recognition/rnnt). -**Step 4**. Install required python* packages: +**Step 4**. Install required Python packages: ```bash pip3 install torch toml ``` @@ -37,7 +37,7 @@ pip3 install torch toml `export_rnnt_to_onnx.py` and run it in the current directory `rnnt_for_openvino`: > **NOTE**: If you already have a full clone of MLCommons inference repository, you need to -> specify `mlcommons_inference_path` variable. +> specify the `mlcommons_inference_path` variable. ```python import toml @@ -92,8 +92,7 @@ torch.onnx.export(model.joint, (f, g), "rnnt_joint.onnx", opset_version=12, python3 export_rnnt_to_onnx.py ``` -After completing this step, the files rnnt_encoder.onnx, rnnt_prediction.onnx, and rnnt_joint.onnx will be saved in -the current directory. +After completing this step, the files `rnnt_encoder.onnx`, `rnnt_prediction.onnx`, and `rnnt_joint.onnx` will be saved in the current directory. **Step 6**. Run the conversion command: @@ -102,6 +101,6 @@ python3 {path_to_openvino}/mo.py --input_model rnnt_encoder.onnx --input "input. python3 {path_to_openvino}/mo.py --input_model rnnt_prediction.onnx --input "input.1[1 1],1[2 1 320],2[2 1 320]" python3 {path_to_openvino}/mo.py --input_model rnnt_joint.onnx --input "0[1 1 1024],1[1 1 320]" ``` -Please note that hardcoded value for sequence length = 157 was taken from the MLCommons, but conversion to IR preserves -network [reshapeability](../../../../IE_DG/ShapeInference.md); this means you can change input shapes manually to any value either during conversion or -inference. +Please note that hardcoded value for sequence length = 157 was taken from the MLCommons but conversion to IR preserves +network [reshapeability](../../../../IE_DG/ShapeInference.md), this means you can change input shapes manually to any value either during conversion or +inference. \ No newline at end of file diff --git a/docs/MO_DG/prepare_model/convert_model/pytorch_specific/Convert_YOLACT.md b/docs/MO_DG/prepare_model/convert_model/pytorch_specific/Convert_YOLACT.md index 9fb7e1ca9e9ce3..50272a33f74d4c 100644 --- a/docs/MO_DG/prepare_model/convert_model/pytorch_specific/Convert_YOLACT.md +++ b/docs/MO_DG/prepare_model/convert_model/pytorch_specific/Convert_YOLACT.md @@ -138,7 +138,7 @@ git checkout 57b8f2d95e62e2e649b382f516ab41f949b57239 3. Set up the environment as described in `README.md`. -**Step 2**. Download a pre-trained model from the list attached in the `Evaluation` section of `README.md` document, for example `yolact_base_54_800000.pth`. +**Step 2**. Download a pre-trained model from the list attached in the `Evaluation` section of the [README.md](https://github.com/dbolya/yolact/blob/master/README.md) document, for example `yolact_base_54_800000.pth`. **Step 3**. Export the model to ONNX* format. @@ -187,5 +187,4 @@ python path/to/model_optimizer/mo.py \ --input_model /path/to/yolact.onnx \ --reverse_input_channels \ --scale 255 -``` - +``` \ No newline at end of file diff --git a/docs/MO_DG/prepare_model/convert_model/tf_specific/Convert_EfficientDet_Models.md b/docs/MO_DG/prepare_model/convert_model/tf_specific/Convert_EfficientDet_Models.md index b78ec640cba19c..fe829c1c21cbd3 100644 --- a/docs/MO_DG/prepare_model/convert_model/tf_specific/Convert_EfficientDet_Models.md +++ b/docs/MO_DG/prepare_model/convert_model/tf_specific/Convert_EfficientDet_Models.md @@ -47,9 +47,9 @@ As a result the frozen model file `savedmodeldir/efficientdet-d4_frozen.pb` will To generate the IR of the EfficientDet TensorFlow model, run:
```sh -python3 $INTEL_OPENVINO_DIR/deployment_tools/model_optimizer/mo.py \ +python3 $INTEL_OPENVINO_DIR/tools/model_optimizer/mo.py \ --input_model savedmodeldir/efficientdet-d4_frozen.pb \ ---transformations_config $INTEL_OPENVINO_DIR/deployment_tools/model_optimizer/extensions/front/tf/automl_efficientdet.json \ +--transformations_config $INTEL_OPENVINO_DIR/tools/model_optimizer/extensions/front/tf/automl_efficientdet.json \ --input_shape [1,$IMAGE_SIZE,$IMAGE_SIZE,3] \ --reverse_input_channels ``` diff --git a/docs/MO_DG/prepare_model/convert_model/tf_specific/Convert_Object_Detection_API_Models.md b/docs/MO_DG/prepare_model/convert_model/tf_specific/Convert_Object_Detection_API_Models.md index eabe4840eb885a..076fe4716cc205 100644 --- a/docs/MO_DG/prepare_model/convert_model/tf_specific/Convert_Object_Detection_API_Models.md +++ b/docs/MO_DG/prepare_model/convert_model/tf_specific/Convert_Object_Detection_API_Models.md @@ -3,20 +3,18 @@ > **NOTES**: > * Starting with the 2022.1 release, the Model Optimizer can convert the TensorFlow\* Object Detection API Faster and Mask RCNNs topologies differently. By default, the Model Optimizer adds operation "Proposal" to the generated IR. This operation needs an additional input to the model with name "image_info" which should be fed with several values describing the pre-processing applied to the input image (refer to the [Proposal](../../../../ops/detection/Proposal_4.md) operation specification for more information). However, this input is redundant for the models trained and inferred with equal size images. Model Optimizer can generate IR for such models and insert operation [DetectionOutput](../../../../ops/detection/DetectionOutput_1.md) instead of `Proposal`. The `DetectionOutput` operation does not require additional model input "image_info" and moreover, for some models the produced inference results are closer to the original TensorFlow\* model. In order to trigger new behaviour the attribute "operation_to_add" in the corresponding JSON transformation configuration file should be set to value "DetectionOutput" instead of default one "Proposal". > * Starting with the 2021.1 release, the Model Optimizer converts the TensorFlow\* Object Detection API SSDs, Faster and Mask RCNNs topologies keeping shape-calculating sub-graphs by default, so topologies can be re-shaped in the Inference Engine using dedicated reshape API. Refer to [Using Shape Inference](../../../../IE_DG/ShapeInference.md) for more information on how to use this feature. It is possible to change the both spatial dimensions of the input image and batch size. -> * To generate IRs for SSD topologies, the Model Optimizer creates a number of `PriorBoxClustered` layers instead of a constant node with prior boxes calculated for the particular input image size. This change allows you to reshape the topology in the Inference Engine using dedicated Inference Engine API. The reshaping is supported for all SSD topologies except FPNs which contain hardcoded shapes for some operations preventing from changing topology input shape. +> * To generate IRs for TF 1 SSD topologies, the Model Optimizer creates a number of `PriorBoxClustered` operations instead of a constant node with prior boxes calculated for the particular input image size. This change allows you to reshape the topology in the Inference Engine using dedicated Inference Engine API. The reshaping is supported for all SSD topologies except FPNs which contain hardcoded shapes for some operations preventing from changing topology input shape. ## How to Convert a Model -With 2018 R3 release, the Model Optimizer introduces a new approach to convert models created using the TensorFlow\* Object Detection API. Compared with the previous approach, the new process produces inference results with higher accuracy and does not require modifying any configuration files and providing intricate command line parameters. - You can download TensorFlow\* Object Detection API models from the TensorFlow 1 Detection Model Zoo or TensorFlow 2 Detection Model Zoo. NOTE: Before converting, make sure you have configured the Model Optimizer. For configuration steps, refer to [Configuring the Model Optimizer](../../Config_Model_Optimizer.md). -To convert a TensorFlow\* Object Detection API model, go to the `/deployment_tools/model_optimizer` directory and run the `mo_tf.py` script with the following required parameters: +To convert a TensorFlow\* Object Detection API model, go to the `/tools/model_optimizer` directory and run the `mo_tf.py` script with the following required parameters: * `--input_model ` --- File with a pre-trained model (binary or text .pb file after freezing) OR `--saved_model_dir ` for the TensorFlow\* 2 models -* `--transformations_config ` --- A subgraph replacement configuration file with transformations description. For the models downloaded from the TensorFlow\* Object Detection API zoo, you can find the configuration files in the `/deployment_tools/model_optimizer/extensions/front/tf` directory. Use: +* `--transformations_config ` --- A subgraph replacement configuration file with transformations description. For the models downloaded from the TensorFlow\* Object Detection API zoo, you can find the configuration files in the `/tools/model_optimizer/extensions/front/tf` directory. Use: * `ssd_v2_support.json` --- for frozen SSD topologies from the models zoo version up to 1.13.X inclusively * `ssd_support_api_v.1.14.json` --- for SSD topologies trained using the TensorFlow\* Object Detection API version 1.14 up to 1.14.X inclusively * `ssd_support_api_v.1.15.json` --- for SSD topologies trained using the TensorFlow\* Object Detection API version 1.15 up to 2.0 @@ -54,9 +52,26 @@ Additionally to the mandatory parameters listed above you can use optional conve For example, if you downloaded the [pre-trained SSD InceptionV2 topology](http://download.tensorflow.org/models/object_detection/ssd_inception_v2_coco_2018_01_28.tar.gz) and extracted archive to the directory `/tmp/ssd_inception_v2_coco_2018_01_28`, the sample command line to convert the model looks as follows: ``` -/deployment_tools/model_optimizer/mo_tf.py --input_model=/tmp/ssd_inception_v2_coco_2018_01_28/frozen_inference_graph.pb --transformations_config /deployment_tools/model_optimizer/extensions/front/tf/ssd_v2_support.json --tensorflow_object_detection_api_pipeline_config /tmp/ssd_inception_v2_coco_2018_01_28/pipeline.config --reverse_input_channels +/tools/model_optimizer/mo_tf.py --input_model=/tmp/ssd_inception_v2_coco_2018_01_28/frozen_inference_graph.pb --transformations_config /tools/model_optimizer/extensions/front/tf/ssd_v2_support.json --tensorflow_object_detection_api_pipeline_config /tmp/ssd_inception_v2_coco_2018_01_28/pipeline.config --reverse_input_channels ``` +## Important Notes About Feeding Input Images to the Samples + +Inference Engine comes with a number of samples to infer Object Detection API models including: + +* [Object Detection for SSD Sample](../../../../../inference-engine/samples/object_detection_sample_ssd/README.md) --- for RFCN, SSD and Faster R-CNNs +* [Mask R-CNN Sample for TensorFlow* Object Detection API Models](@ref omz_demos_mask_rcnn_demo_cpp) --- for Mask R-CNNs + +There are several important notes about feeding input images to the samples: + +1. Inference Engine samples stretch input image to the size of the input operation without preserving aspect ratio. This behavior is usually correct for most topologies (including SSDs), but incorrect for other models like Faster R-CNN, Mask R-CNN and R-FCN. These models usually use keeps aspect ratio resizer. The type of pre-processing is defined in the pipeline configuration file in the section `image_resizer`. If keeping aspect ratio is used, then it is necessary to resize image before passing it to the sample and optionally pad the resized image with 0s (if the attribute "pad_to_max_dimension" in the pipeline.config is equal to "true"). + +2. TensorFlow\* implementation of image resize may be different from the one implemented in the sample. Even reading input image from compressed format (like `.jpg`) could give different results in the sample and TensorFlow\*. So, if it is necessary to compare accuracy between the TensorFlow\* and the Inference Engine it is recommended to pass pre-resized input image in a non-compressed format (like `.bmp`). + +3. If you want to infer the model with the Inference Engine samples, convert the model specifying the `--reverse_input_channels` command line parameter. The samples load images in BGR channels order, while TensorFlow* models were trained with images in RGB order. When the `--reverse_input_channels` command line parameter is specified, the Model Optimizer performs first convolution or other channel dependent operation weights modification so the output will be like the image is passed with RGB channels order. + +4. Read carefully messaged printed by the Model Optimizer during a model conversion. They contain important instructions on how to prepare input data before running the inference and how to interpret the output. + ## Custom Input Shape Model Optimizer handles the command line parameter `--input_shape` for TensorFlow\* Object Detection API models in a special way depending on the image resizer type defined in the `pipeline.config` file. TensorFlow\* Object Detection API generates different `Preprocessor` sub-graph based on the image resizer type. Model Optimizer supports two types of image resizer: * `fixed_shape_resizer` --- *Stretches* input image to the specific height and width. The `pipeline.config` snippet below shows a `fixed_shape_resizer` sample definition: @@ -77,19 +92,20 @@ image_resizer { } } ``` +If an additional parameter "pad_to_max_dimension" is equal to "true" then the resized image will be padded with 0s to the square image of size "max_dimension". ### Fixed Shape Resizer Replacement -* If the `--input_shape` command line parameter is not specified, the Model Optimizer generates an input layer with the height and width as defined in the `pipeline.config`. +* If the `--input_shape` command line parameter is not specified, the Model Optimizer generates an input operation with the height and width as defined in the `pipeline.config`. -* If the `--input_shape [1, H, W, 3]` command line parameter is specified, the Model Optimizer sets the input layer height to `H` and width to `W` and convert the model. However, the conversion may fail because of the following reasons: - * The model is not reshape-able, meaning that it's not possible to change the size of the model input image. For example, SSD FPN models have `Reshape` operations with hard-coded output shapes, but the input size to these `Reshape` instances depends on the input image size. In this case, the Model Optimizer shows an error during the shape inference phase. Run the Model Optimizer with `--log_level DEBUG` to see the inferred layers output shapes to see the mismatch. +* If the `--input_shape [1, H, W, 3]` command line parameter is specified, the Model Optimizer sets the input operation height to `H` and width to `W` and convert the model. However, the conversion may fail because of the following reasons: + * The model is not reshape-able, meaning that it's not possible to change the size of the model input image. For example, SSD FPN models have `Reshape` operations with hard-coded output shapes, but the input size to these `Reshape` instances depends on the input image size. In this case, the Model Optimizer shows an error during the shape inference phase. Run the Model Optimizer with `--log_level DEBUG` to see the inferred operations output shapes to see the mismatch. * Custom input shape is too small. For example, if you specify `--input_shape [1,100,100,3]` to convert a SSD Inception V2 model, one of convolution or pooling nodes decreases input tensor spatial dimensions to non-positive values. In this case, the Model Optimizer shows error message like this: '[ ERROR ] Shape [ 1 -1 -1 256] is not fully defined for output X of "node_name".' ### Keep Aspect Ratio Resizer Replacement -* If the `--input_shape` command line parameter is not specified, the Model Optimizer generates an input layer with both height and width equal to the value of parameter `min_dimension` in the `keep_aspect_ratio_resizer`. +* If the `--input_shape` command line parameter is not specified, the Model Optimizer generates an input operation with both height and width equal to the value of parameter `min_dimension` in the `keep_aspect_ratio_resizer`. -* If the `--input_shape [1, H, W, 3]` command line parameter is specified, the Model Optimizer scales the specified input image height `H` and width `W` to satisfy the `min_dimension` and `max_dimension` constraints defined in the `keep_aspect_ratio_resizer`. The following function calculates the input layer height and width: +* If the `--input_shape [1, H, W, 3]` command line parameter is specified, the Model Optimizer scales the specified input image height `H` and width `W` to satisfy the `min_dimension` and `max_dimension` constraints defined in the `keep_aspect_ratio_resizer`. The following function calculates the input operation height and width: ```python def calculate_shape_keeping_aspect_ratio(H: int, W: int, min_dimension: int, max_dimension: int): @@ -98,955 +114,18 @@ def calculate_shape_keeping_aspect_ratio(H: int, W: int, min_dimension: int, max ratio = min(ratio_min, ratio_max) return int(round(H * ratio)), int(round(W * ratio)) ``` +The `--input_shape` command line parameter should be specified only if the "pad_to_max_dimension" does not exist of is set to "false" in the `keep_aspect_ratio_resizer`. -Models with `keep_aspect_ratio_resizer` were trained to recognize object in real aspect ratio, in contrast with most of the classification topologies trained to recognize objects stretched vertically and horizontally as well. By default, the Model Optimizer converts topologies with `keep_aspect_ratio_resizer` to consume a square input image. If the non-square image is provided as input, it is stretched without keeping aspect ratio that results to objects detection quality decrease. +Models with `keep_aspect_ratio_resizer` were trained to recognize object in real aspect ratio, in contrast with most of the classification topologies trained to recognize objects stretched vertically and horizontally as well. By default, the Model Optimizer converts topologies with `keep_aspect_ratio_resizer` to consume a square input image. If the non-square image is provided as input, it is stretched without keeping aspect ratio that results to object detection quality decrease. > **NOTE**: It is highly recommended specifying the `--input_shape` command line parameter for the models with `keep_aspect_ratio_resizer` if the input image dimensions are known in advance. -## Important Notes About Feeding Input Images to the Samples - -Inference Engine comes with a number of samples that use Object Detection API models including: - -* [Object Detection for SSD Sample](../../../../../inference-engine/samples/object_detection_sample_ssd/README.md) --- for RFCN, SSD and Faster R-CNNs -* [Mask R-CNN Sample for TensorFlow* Object Detection API Models](@ref omz_demos_mask_rcnn_demo_cpp) --- for Mask R-CNNs - -There are a number of important notes about feeding input images to the samples: - -1. Inference Engine samples stretch input image to the size of the input layer without preserving aspect ratio. This behavior is usually correct for most topologies (including SSDs), but incorrect for the following Faster R-CNN topologies: Inception ResNet, Inception V2, ResNet50 and ResNet101. Images pre-processing for these topologies keeps aspect ratio. Also all Mask R-CNN and R-FCN topologies require keeping aspect ratio. The type of pre-processing is defined in the pipeline configuration file in the section `image_resizer`. If keeping aspect ratio is required, then it is necessary to resize image before passing it to the sample. - -2. TensorFlow\* implementation of image resize may be different from the one implemented in the sample. Even reading input image from compressed format (like `.jpg`) could give different results in the sample and TensorFlow\*. So, if it is necessary to compare accuracy between the TensorFlow\* and the Inference Engine it is recommended to pass pre-scaled input image in a non-compressed format (like `.bmp`). - -3. If you want to infer the model with the Inference Engine samples, convert the model specifying the `--reverse_input_channels` command line parameter. The samples load images in BGR channels order, while TensorFlow* models were trained with images in RGB order. When the `--reverse_input_channels` command line parameter is specified, the Model Optimizer performs first convolution or other channel dependent operation weights modification so the output will be like the image is passed with RGB channels order. - - ## Detailed Explanations of Model Conversion Process This section is intended for users who want to understand how the Model Optimizer performs Object Detection API models conversion in details. The knowledge given in this section is also useful for users having complex models that are not converted with the Model Optimizer out of the box. It is highly recommended to read [Sub-Graph Replacement in Model Optimizer](../../customize_model_optimizer/Subgraph_Replacement_Model_Optimizer.md) chapter first to understand sub-graph replacement concepts which are used here. -Implementation of the sub-graph replacers for Object Detection API models is located in the file `/deployment_tools/model_optimizer/extensions/front/tf/ObjectDetectionAPI.py`. - It is also important to open the model in the [TensorBoard](https://www.tensorflow.org/guide/summaries_and_tensorboard) to see the topology structure. Model Optimizer can create an event file that can be then fed to the TensorBoard* tool. Run the Model Optimizer with providing two command line parameters: * `--input_model ` --- Path to the frozen model * `--tensorboard_logdir` --- Path to the directory where TensorBoard looks for the event files. -### SSD (Single Shot Multibox Detector) Topologies - -The SSD topologies are the simplest ones among Object Detection API topologies, so they will be analyzed first. The sub-graph replacement configuration file `ssd_v2_support.json`, which should be used to convert these models, contains three sub-graph replacements: `ObjectDetectionAPIPreprocessorReplacement`, `ObjectDetectionAPISSDPostprocessorReplacement` and `ObjectDetectionAPIOutputReplacement`. Their implementation is described below. - -#### Preprocessor Block - -All Object Detection API topologies contain `Preprocessor` block of nodes (aka ["scope"](https://www.tensorflow.org/guide/graph_viz)) that performs two tasks: - -* Scales image to the size required by the topology. -* Applies mean and scale values if needed. - -Model Optimizer cannot convert the part of the `Preprocessor` block performing scaling because the TensorFlow implementation uses `while`- loops which the Inference Engine does not support. Another reason is that the Inference Engine samples scale input images to the size of the input layer from the Intermediate Representation (IR) automatically. Given that it is necessary to cut-off the scaling part of the `Preprocessor` block and leave only operations applying mean and scale values. This task is solved using the Model Optimizer [sub-graph replacer mechanism](../../customize_model_optimizer/Subgraph_Replacement_Model_Optimizer.md). - -The `Preprocessor` block has two outputs: the tensor with pre-processed image(s) data and a tensor with pre-processed image(s) size(s). While converting the model, Model Optimizer keeps only the nodes producing the first tensor. The second tensor is a constant which can be obtained from the `pipeline.config` file to be used in other replacers. - -The implementation of the `Preprocessor` block sub-graph replacer is the following (file `/deployment_tools/model_optimizer/extensions/front/tf/ObjectDetectionAPI.py`): - -```python -class ObjectDetectionAPIPreprocessorReplacement(FrontReplacementFromConfigFileSubGraph): - """ - The class replaces the "Preprocessor" block resizing input image and applying mean/scale values. Only nodes related - to applying mean/scaling values are kept. - """ - replacement_id = 'ObjectDetectionAPIPreprocessorReplacement' - - def run_before(self): - return [Pack, Sub] - - def nodes_to_remove(self, graph: Graph, match: SubgraphMatch): - new_nodes_to_remove = match.matched_nodes_names() - # do not remove nodes that perform input image scaling and mean value subtraction - for node_to_keep in ('Preprocessor/sub', 'Preprocessor/sub/y', 'Preprocessor/mul', 'Preprocessor/mul/x'): - if node_to_keep in new_nodes_to_remove: - new_nodes_to_remove.remove(node_to_keep) - return new_nodes_to_remove - - def generate_sub_graph(self, graph: Graph, match: SubgraphMatch): - argv = graph.graph['cmd_params'] - layout = graph.graph['layout'] - if argv.tensorflow_object_detection_api_pipeline_config is None: - raise Error(missing_param_error) - pipeline_config = PipelineConfig(argv.tensorflow_object_detection_api_pipeline_config) - - sub_node = match.output_node(0)[0] - if not sub_node.has('op') or sub_node.op != 'Sub': - raise Error('The output op of the Preprocessor sub-graph is not of type "Sub". Looks like the topology is ' - 'not created with TensorFlow Object Detection API.') - - mul_node = None - if sub_node.in_node(0).has('op') and sub_node.in_node(0).op == 'Mul': - log.info('There is image scaling node in the Preprocessor block.') - mul_node = sub_node.in_node(0) - - initial_input_node_name = 'image_tensor' - if initial_input_node_name not in graph.nodes(): - raise Error('Input node "{}" of the graph is not found. Do not run the Model Optimizer with ' - '"--input" command line parameter.'.format(initial_input_node_name)) - placeholder_node = Node(graph, initial_input_node_name) - - # set default value of the batch size to 1 if user didn't specify batch size and input shape - batch_dim = get_batch_dim(layout, 4) - if argv.batch is None and placeholder_node.shape[batch_dim] == -1: - placeholder_node.shape[batch_dim] = 1 - if placeholder_node.shape[batch_dim] > 1: - print("[ WARNING ] The batch size more than 1 is supported for SSD topologies only.") - height, width = calculate_placeholder_spatial_shape(graph, match, pipeline_config) - placeholder_node.shape[get_height_dim(layout, 4)] = height - placeholder_node.shape[get_width_dim(layout, 4)] = width - - # save the pre-processed image spatial sizes to be used in the other replacers - graph.graph['preprocessed_image_height'] = placeholder_node.shape[get_height_dim(layout, 4)] - graph.graph['preprocessed_image_width'] = placeholder_node.shape[get_width_dim(layout, 4)] - - to_float_node = placeholder_node.out_node(0) - if not to_float_node.has('op') or to_float_node.op != 'Cast': - raise Error('The output of the node "{}" is not Cast operation. Cannot apply replacer.'.format( - initial_input_node_name)) - - # connect to_float_node directly with node performing scale on mean value subtraction - if mul_node is None: - create_edge(to_float_node, sub_node, 0, 0) - else: - create_edge(to_float_node, mul_node, 0, 1) - - print('The Preprocessor block has been removed. Only nodes performing mean value subtraction and scaling (if' - ' applicable) are kept.') - return {} -``` -The `run_before` function defines a list of replacers which current replacer should be run before. In this case it is `Pack` and `Sub`. The `Sub` operation is not supported by Inference Engine plugins so Model Optimizer replaces it with a combination of the `Eltwise` layer (element-wise sum) and the `ScaleShift` layer. But the `Preprocessor` replacer expects to see `Sub` node, so it should be called before the `Sub` is replaced. - -The `nodes_to_remove` function returns list of nodes that should be removed after the replacement happens. In this case it removes all nodes matched in the `Preprocessor` scope except the `Sub` and `Mul` nodes performing mean value subtraction and scaling. - -The `generate_sub_graph` function performs the following actions: - -* Lines 20-24: Reads the `pipeline.config` configuration file to get the model hyper-parameters and other attributes. -* Lines 25-29: Checks that the output node of the `Preprocessor` scope is of type `Sub`. -* Lines 31-34: Checks that the input of the `Sub` node is of type `Mul`. This information is needed to correctly connect the input node of the topology later. -* Lines 36-50: Finds the topology input (placeholder) node and sets its weight and height according to the image resizer defined in the `pipeline.config` file and the `--input_shape` provided by the user. The batch size is set to 1 by default, but it will be overridden if you specify a batch size using command-line option `-b`. Refer to the [Custom Input Shape](#tf_od_custom_input_shape) on how the Model Optimizer calculates input layer height and width. -* Lines 52-54: Saves the placeholder shape in the `graph` object for other sub-graph replacements. -* Lines 56-59: Checks that the placeholder node follows the 'Cast' node which converts model input data from UINT8 to FP32. -* Lines 61-65: Creates edge from the placeholder node to the `Mul` (if present) or `Sub` node to a correct input port (0 for `Sub` and 1 for `Mul`). -* Line 69: The replacer returns a dictionary with nodes mapping that is used by other sub-graph replacement functions. In this case, it is not needed, so the empty dictionary is returned. - -#### Postprocessor Block - -A distinct feature of any SSD topology is a part performing non-maximum suppression of proposed images bounding boxes. This part of the topology is implemented with dozens of primitive operations in TensorFlow, while in Inference Engine, it is one [layer](../../../../ops/opset.md) called `DetectionOutput`. Thus, to convert a SSD model from the TensorFlow, the Model Optimizer should replace the entire sub-graph of operations that implement the `DetectionOutput` layer with a single `DetectionOutput` node. - -The Inference Engine `DetectionOutput` layer implementation consumes three tensors in the following order: - -1. Tensor with locations of bounding boxes -2. Tensor with confidences for each bounding box -3. Tensor with prior boxes ("anchors" in a TensorFlow terminology) - -The Inference Engine `DetectionOutput` layer implementation produces one tensor with seven numbers for each actual detection: - -* batch index -* class label -* class probability -* x_1 box coordinate -* y_1 box coordinate -* x_2 box coordinate -* y_2 box coordinate. - -There are more output tensors in the TensorFlow Object Detection API: "detection_boxes", "detection_classes", "detection_scores" and "num_detections", but the values in them are consistent with the output values of the Inference Engine DetectionOutput layer. - -The sub-graph replacement by points is used in the `ssd_v2_support.json` to match the `Postprocessor` block. The start points are defined the following way: - -* "Postprocessor/Shape" receives tensor with bounding boxes; -* "Postprocessor/scale_logits" receives tensor with confidences(probabilities) for each box; -* "Postprocessor/Tile" receives tensor with prior boxes (anchors); -* "Postprocessor/Reshape_1" is specified only to match the whole `Postprocessor` scope. Not used in the replacement code; -* "Postprocessor/ToFloat" is specified only to match the whole `Postprocessor` scope. Not used in the replacement code. - -There are a number of differences in layout, format and content of in input tensors to `DetectionOutput` layer and what tensors generates TensorFlow, so additional tensors processing before creating `DetectionOutput` layer is required. It is described below. The sub-graph replacement class for the `DetectionOutput` layer is given below: - -```python -class ObjectDetectionAPISSDPostprocessorReplacement(FrontReplacementFromConfigFileSubGraph): - replacement_id = 'ObjectDetectionAPISSDPostprocessorReplacement' - - def run_after(self): - return [ObjectDetectionAPIPreprocessorReplacement] - - def run_before(self): - # the replacer uses node of type "RealDiv" as one of the start points, but Model Optimizer replaces nodes of - # type "RealDiv" with a new ones, so it is necessary to replace the sub-graph before replacing the "RealDiv" - # nodes - return [Div, StandaloneConstEraser] - - def output_edges_match(self, graph: Graph, match: SubgraphMatch, new_sub_graph: dict): - # the DetectionOutput in IE produces single tensor, but in TF it produces two tensors, so create only one output - # edge match - return {match.output_node(0)[0].id: new_sub_graph['detection_output_node'].id} - - def generate_sub_graph(self, graph: Graph, match: SubgraphMatch): - argv = graph.graph['cmd_params'] - if argv.tensorflow_object_detection_api_pipeline_config is None: - raise Error(missing_param_error) - pipeline_config = PipelineConfig(argv.tensorflow_object_detection_api_pipeline_config) - num_classes = _value_or_raise(match, pipeline_config, 'num_classes') - - # reshapes confidences to 4D before applying activation function - expand_dims_op = Reshape(graph, {'dim': int64_array([0, 1, -1, num_classes + 1])}) - # do not convert from NHWC to NCHW this node shape - expand_dims_node = expand_dims_op.create_node([match.input_nodes(1)[0][0].in_node(0)], - dict(name='do_ExpandDims_conf')) - - activation_function = _value_or_raise(match, pipeline_config, 'postprocessing_score_converter') - activation_conf_node = add_activation_function_after_node(graph, expand_dims_node, activation_function) - PermuteAttrs.set_permutation(expand_dims_node, expand_dims_node.out_node(), None) - - # IE DetectionOutput layer consumes flattened tensors - # reshape operation to flatten locations tensor - reshape_loc_op = Reshape(graph, {'dim': int64_array([0, -1])}) - reshape_loc_node = reshape_loc_op.create_node([match.input_nodes(0)[0][0].in_node(0)], - dict(name='do_reshape_loc')) - - # IE DetectionOutput layer consumes flattened tensors - # reshape operation to flatten confidence tensor - reshape_conf_op = Reshape(graph, {'dim': int64_array([0, -1])}) - reshape_conf_node = reshape_conf_op.create_node([activation_conf_node], dict(name='do_reshape_conf')) - - if pipeline_config.get_param('ssd_anchor_generator_num_layers') is not None or \ - pipeline_config.get_param('multiscale_anchor_generator_min_level') is not None: - # change the Reshape operations with hardcoded number of output elements of the convolution nodes to be - # reshapable - _relax_reshape_nodes(graph, pipeline_config) - - # create PriorBoxClustered nodes instead of a constant value with prior boxes so the model could be reshaped - if pipeline_config.get_param('ssd_anchor_generator_num_layers') is not None: - priors_node = _create_prior_boxes_node(graph, pipeline_config) - elif pipeline_config.get_param('multiscale_anchor_generator_min_level') is not None: - priors_node = _create_multiscale_prior_boxes_node(graph, pipeline_config) - else: - log.info('The anchor generator is not known. Save constant with prior-boxes to IR.') - priors_node = match.input_nodes(2)[0][0].in_node(0) - - # creates DetectionOutput Node object from Op class - detection_output_op = DetectionOutput(graph, match.custom_replacement_desc.custom_attributes) - detection_output_op.attrs['old_infer'] = detection_output_op.attrs['infer'] - detection_output_op.attrs['infer'] = __class__.do_infer - detection_output_node = detection_output_op.create_node( - [reshape_loc_node, reshape_conf_node, priors_node], - dict(name=detection_output_op.attrs['type'], - clip=1, - confidence_threshold=_value_or_raise(match, pipeline_config, 'postprocessing_score_threshold'), - top_k=_value_or_raise(match, pipeline_config, 'postprocessing_max_detections_per_class'), - keep_top_k=_value_or_raise(match, pipeline_config, 'postprocessing_max_total_detections'), - nms_threshold=_value_or_raise(match, pipeline_config, 'postprocessing_iou_threshold'))) - - return {'detection_output_node': detection_output_node} -``` - -The `run_before` and `run_after` functions define lists of replacers that this replacer should be run before and after respectively. - -The `input_edges_match` and `output_edges_match` functions generate dictionaries describing how the input/output nodes matched with the replacer should be connected with new nodes generated in the `generate_sub_graph` function. Refer to [sub-graph replacements](../../customize_model_optimizer/Subgraph_Replacement_Model_Optimizer.md) documentation for more information. - -The `generate_sub_graph` function performs the following actions: - -* Lines 19-23: Reads the `pipeline.config` configuration file to get the model hyper-parameters and other attributes. -* Lines 25-32: Makes tensor with confidences 4D and apply correct activation function (read from the `pipeline.config` file) to it. -* Line 33: Disables permutation of `expand_dims_node`'s attributes because they are already in the NCHW layout. -* Lines 35-39: Makes tensor with bounding boxes 2D, where the first dimension corresponds to a batch size. -* Lines 49-52: Makes tensor with confidences 2D, where the first dimension corresponds to a batch size. -* Lines 41-44: Creates a node with `DetectionOutput` layer with a number of layer attributes from the `pipeline.config` file. Also the inference function (`infer` attribute) is updated with a custom inference function `__class__.do_infer`. The latter change is described below. -* Lines 46-59: Creates several `PriorBoxClustered` layers which generate prior boxes depending on the type of the grid anchor generator defined in the `pipeline.config` file. If the grid anchor type is not known then initialize `priors_node` as a node matched by the sub-graph replacement. In the latter case it is a constant node with prior boxes calculated for a particular input image shape. -* Lines 61-72: Creates `DetectionOutput` layer with attributes from the `pipeline.config` file. -* Line 74: Returns dictionary with mapping of nodes that is used in the `input_edges_match` and `output_edges_match` functions. - -The paragraphs below explains why the inference function for the Detection Output layer is modified. Before doing that it is necessary to make acquaintance with selected high-level steps of the Model Optimize model conversion pipeline. Note, that only selected steps are required for understanding the change are mentioned: - -1. Model Optimizer creates calculation graph from the initial topology where each nodes corresponds to a operation from the initial model. -2. Model Optimizer performs "Front replacers" (including the one being described now). -3. Model Optimizer adds data nodes between operation nodes to the graph. -4. Model Optimizer performs "Middle replacers". -5. Model Optimizer performs "shape inference" phase. During this phase the shape of all data nodes is being calculated. Model Optimizer also calculates value for data tensors which are constant, i.e. do not depend on input. For example, tensor with prior boxes (generated with `MultipleGridAnchorGenerator` or similar scopes) doesn't depend on input and is evaluated by Model Optimizer during shape inference. Model Optimizer uses inference function stored in the 'infer' attribute of operation nodes. -6. Model Optimizer performs "Back replacers". -7. Model Optimizer generates IR. - -The `do_infer` function is needed to perform some adjustments to the tensor with prior boxes (anchors) that is known only after the shape inference phase and to perform additional transformations described below. This change is performed only if the tensor with prior boxes is not constant (so it is produced by `PriorBoxClustered` layers during inference). It is possible to make the `Postprocessor` block replacement as a Middle replacer (so the prior boxes tensor would be evaluated by the time the replacer is called), but in this case it will be necessary to correctly handle data nodes which are created between each pair of initially adjacent operation nodes. In order to inject required modification to the inference function of the `DetectionOutput` node, a new function is created to perform modifications and to call the initial inference function. The code of a new inference function is the following: - -```python -@staticmethod -def do_infer(node: Node): - prior_boxes = node.in_node(2).value - if prior_boxes is not None: - argv = node.graph.graph['cmd_params'] - if argv.tensorflow_object_detection_api_pipeline_config is None: - raise Error(missing_param_error) - pipeline_config = PipelineConfig(argv.tensorflow_object_detection_api_pipeline_config) - variance = _variance_from_pipeline_config(pipeline_config) - # replicating the variance values for all prior-boxes - variances = np.tile(variance, [prior_boxes.shape[-2], 1]) - # DetectionOutput Inference Engine expects the prior-boxes in the following layout: (values, variances) - prior_boxes = prior_boxes.reshape([-1, 4]) - prior_boxes = np.concatenate((prior_boxes, variances), 0) - # compared to the IE's DetectionOutput, the TF keeps the prior-boxes in YXYX, need to get back to the XYXY - prior_boxes = np.concatenate((prior_boxes[:, 1:2], prior_boxes[:, 0:1], - prior_boxes[:, 3:4], prior_boxes[:, 2:3]), 1) - # adding another dimensions, as the prior-boxes are expected as 3d tensors - prior_boxes = prior_boxes.reshape((1, 2, -1)) - node.in_node(2).shape = int64_array(prior_boxes.shape) - node.in_node(2).value = prior_boxes - - node.old_infer(node) - # compared to the IE's DetectionOutput, the TF keeps the locations in YXYX, need to get back to the XYXY - # for last convolutions that operate the locations need to swap the X and Y for output feature weights & biases - conv_nodes = backward_bfs_for_operation(node.in_node(0), ['Conv2D']) - swap_weights_xy(conv_nodes) - squeeze_reshape_and_concat(conv_nodes) - - for node_name in node.graph.nodes(): - node = Node(node.graph, node_name) - if node.has_and_set('swap_xy_count') and len(node.out_nodes()) != node['swap_xy_count']: - raise Error('The weights were swapped for node "{}", but this weight was used in other nodes.'.format( - node.name)) -``` - -* Lines 3-18: Updates the value of the tensor with prior boxes by appending variance values if the prior boxes are pre-calculated. Inference Engine implementation of the `DetectionOutput` layer expects these values located within the tensor with bounding boxes, but in TensorFlow they are applied in different way. -* Line 23: Executes initial inference function to calculate the output shape of this node. -* Lines 26-27: Finds predecessor node of type "Conv2D" of the node with bounding boxes (which is `node.in_node(0)`) and modifies convolution weights so "X" and "Y" coordinates are swapped. In TensorFlow bounding boxes are stored in the tensors in "YXYX" order, while in the Inference Engine it is "XYXY". -* Line 28: Executes function looking for `Reshape` operations after the `Conv2D` nodes found above with 4D output and remove the dimension with index 2 which should be equal to 1. This is a workaround to make tensor 3D so its shape will not be transposed during the IR generation. The problem arises when bounding boxes predictions are reshaped from [1, 1, 1, X] to [1, X / 4, 1, 4]. The result tensor should not be transposed because after transpose it will have shape [1, 4, X / 4, 1] and the concatenation over dimension with index 2 will produce incorrect tensor. Also the function looks for `Concat` operations and changes the concatenation dimension from 2 to 1. - -### Faster R-CNN Topologies -The Faster R-CNN models contain several building blocks similar to building blocks from SSD models so it is highly recommended to read the section about converting them first. Detailed information about Faster R-CNN topologies is provided [in the abstract](https://arxiv.org/abs/1506.01497). - -#### Preprocessor Block -Faster R-CNN topologies contain similar `Preprocessor` block as SSD topologies. The same `ObjectDetectionAPIPreprocessorReplacement` sub-graph replacer is used to cut it off. - -#### Proposal Layer -The `Proposal` layer is implemented with dozens of primitive operations in TensorFlow, meanwhile, it is a single layer in the Inference Engine. The `ObjectDetectionAPIProposalReplacement` sub-graph replacer identifies nodes corresponding to the layer and replaces them with required new nodes. - -```python -class ObjectDetectionAPIProposalReplacement(FrontReplacementFromConfigFileSubGraph): - """ - This class replaces sub-graph of operations with Proposal layer and additional layers transforming - tensors from layout of TensorFlow to layout required by Inference Engine. - Refer to comments inside the function for more information about performed actions. - """ - replacement_id = 'ObjectDetectionAPIProposalReplacement' - - def run_after(self): - return [ObjectDetectionAPIPreprocessorReplacement] - - def run_before(self): - return [Sub, CropAndResizeReplacement] - - def output_edges_match(self, graph: Graph, match: SubgraphMatch, new_sub_graph: dict): - return {match.output_node(0)[0].id: new_sub_graph['proposal_node'].id} - - def nodes_to_remove(self, graph: Graph, match: SubgraphMatch): - new_list = match.matched_nodes_names().copy() - # do not remove nodes that produce box predictions and class predictions - new_list.remove(match.single_input_node(0)[0].id) - new_list.remove(match.single_input_node(1)[0].id) - return new_list - - def generate_sub_graph(self, graph: Graph, match: SubgraphMatch): - argv = graph.graph['cmd_params'] - if argv.tensorflow_object_detection_api_pipeline_config is None: - raise Error(missing_param_error) - pipeline_config = PipelineConfig(argv.tensorflow_object_detection_api_pipeline_config) - - max_proposals = _value_or_raise(match, pipeline_config, 'first_stage_max_proposals') - proposal_ratios = _value_or_raise(match, pipeline_config, 'anchor_generator_aspect_ratios') - proposal_scales = _value_or_raise(match, pipeline_config, 'anchor_generator_scales') - anchors_count = len(proposal_ratios) * len(proposal_scales) - - # Convolution/matmul node that produces classes predictions - # Permute result of the tensor with classes permissions so it will be in a correct layout for Softmax - predictions_node = backward_bfs_for_operation(match.single_input_node(1)[0], ['Add'])[0] - - reshape_classes_op = Reshape(graph, dict(dim=int64_array([0, anchors_count, 2, -1]))) - reshape_classes_node = reshape_classes_op.create_node([], dict(name='predictions/Reshape', nchw_layout=True)) - predictions_node.insert_node_after(reshape_classes_node, 0) - - softmax_conf_op = Softmax(graph, dict(axis=2, nchw_layout=True, name=reshape_classes_node.id + '/Softmax')) - softmax_conf_node = softmax_conf_op.create_node([reshape_classes_node]) - permute_reshape_softmax_op = Permute(graph, dict(order=int64_array([0, 2, 1, 3]), nchw_layout=True)) - permute_reshape_softmax_node = permute_reshape_softmax_op.create_node([softmax_conf_node], dict( - name=softmax_conf_node.name + '/Permute')) - - initial_shape_op = Shape(graph, dict(name=predictions_node.id + '/Shape')) - initial_shape_node = initial_shape_op.create_node([predictions_node]) - - # implement custom reshape infer function because we need to know the input convolution node output dimension - # sizes but we can know it only after partial infer - reshape_permute_op = Reshape(graph, dict()) - reshape_permute_node = reshape_permute_op.create_node([permute_reshape_softmax_node, initial_shape_node], - dict(name='Reshape_Permute_Class')) - - variance_height = pipeline_config.get_param('frcnn_variance_height') - variance_width = pipeline_config.get_param('frcnn_variance_width') - variance_x = pipeline_config.get_param('frcnn_variance_x') - variance_y = pipeline_config.get_param('frcnn_variance_y') - anchor_generator_height_stride = pipeline_config.get_param('anchor_generator_height_stride') - anchor_generator_width_stride = pipeline_config.get_param('anchor_generator_width_stride') - anchor_generator_height = pipeline_config.get_param('anchor_generator_height') - anchor_generator_width = pipeline_config.get_param('anchor_generator_width') - - if variance_height != variance_width: - log.error('The values for variance for height "{}" is not equal to variance for width "{}". The detection ' - 'results will be inaccurate.'.format(variance_height, variance_width)) - if variance_x != variance_y: - log.error('The values for variance for x "{}" is not equal to variance for y "{}". The detection ' - 'results will be inaccurate.'.format(variance_x, variance_y)) - if anchor_generator_height_stride != anchor_generator_width_stride: - log.error('The values for the anchor generator height stride "{}" is not equal to the anchor generator ' - 'width stride "{}". The detection results will be inaccurate.'.format( - anchor_generator_height_stride, anchor_generator_width_stride)) - if anchor_generator_height != anchor_generator_width: - log.error('The values for the anchor generator height "{}" is not equal to the anchor generator width ' - 'stride "{}". The detection results will be inaccurate.'.format(anchor_generator_height, - anchor_generator_width)) - - proposal_op = ProposalOp(graph, dict(min_size=1, - framework='tensorflow', - pre_nms_topn=2 ** 31 - 1, - box_size_scale=variance_height, - box_coordinate_scale=variance_x, - post_nms_topn=max_proposals, - feat_stride=anchor_generator_height_stride, - ratio=proposal_ratios, - scale=proposal_scales, - normalize=1, - base_size=anchor_generator_height, - nms_thresh=_value_or_raise(match, pipeline_config, - 'first_stage_nms_iou_threshold'))) - for key in ('clip_before_nms', 'clip_after_nms'): - if key in match.custom_replacement_desc.custom_attributes: - proposal_op.attrs[key] = int(match.custom_replacement_desc.custom_attributes[key]) - - anchors_node = backward_bfs_for_operation(match.single_input_node(0)[0], ['Add'])[0] - - # creates input to store input image height, width and scales (usually 1.0s) - # the batch size for this input is fixed because it is allowed to pass images of the same size only as input - input_op_with_image_size = Input(graph, dict(shape=int64_array([1, 3]), fixed_batch=True)) - input_with_image_size_node = input_op_with_image_size.create_node([], dict(name='image_info')) - - proposal_node = proposal_op.create_node([reshape_permute_node, anchors_node, input_with_image_size_node], - dict(name='proposals')) - - if 'do_not_swap_proposals' in match.custom_replacement_desc.custom_attributes and \ - match.custom_replacement_desc.custom_attributes['do_not_swap_proposals']: - swapped_proposals_node = proposal_node - else: - swapped_proposals_node = add_convolution_to_swap_xy_coordinates(graph, proposal_node, 5) - - proposal_reshape_2d_op = Reshape(graph, dict(dim=int64_array([-1, 5]), nchw_layout=True)) - proposal_reshape_2d_node = proposal_reshape_2d_op.create_node([swapped_proposals_node], - dict(name="reshape_swap_proposals_2d")) - - # feed the CropAndResize node with a correct boxes information produced with the Proposal layer - # find the first CropAndResize node in the BFS order - crop_and_resize_nodes_ids = [node_id for node_id in bfs_search(graph, [match.single_input_node(0)[0].id]) if - graph.node[node_id]['op'] == 'CropAndResize'] - assert len(crop_and_resize_nodes_ids) != 0, "Didn't find any CropAndResize nodes in the graph." - if 'do_not_swap_proposals' not in match.custom_replacement_desc.custom_attributes or not \ - match.custom_replacement_desc.custom_attributes['do_not_swap_proposals']: - crop_and_resize_node = Node(graph, crop_and_resize_nodes_ids[0]) - # set a marker that the input with box coordinates has been pre-processed so the CropAndResizeReplacement - # transform doesn't try to merge the second and the third inputs - crop_and_resize_node['inputs_preprocessed'] = True - graph.remove_edge(crop_and_resize_node.in_node(1).id, crop_and_resize_node.id) - graph.create_edge(proposal_reshape_2d_node, crop_and_resize_node, out_port=0, in_port=1) - - tf_proposal_reshape_4d_op = Reshape(graph, dict(dim=int64_array([-1, 1, max_proposals, 5]), nchw_layout=True)) - tf_proposal_reshape_4d_node = tf_proposal_reshape_4d_op.create_node([swapped_proposals_node], - dict(name="reshape_proposal_4d")) - - crop_op = Crop(graph, dict(axis=int64_array([3]), offset=int64_array([1]), dim=int64_array([4]), - nchw_layout=True)) - crop_node = crop_op.create_node([tf_proposal_reshape_4d_node], dict(name='crop_proposals')) - - tf_proposals_crop_reshape_3d_op = Reshape(graph, dict(dim=int64_array([0, -1, 4]), nchw_layout=True)) - tf_proposals_crop_reshape_3d_node = tf_proposals_crop_reshape_3d_op.create_node([crop_node], - dict(name="reshape_crop_3d")) - - return {'proposal_node': tf_proposals_crop_reshape_3d_node} -``` -The main interest of the implementation of this replacer is the `generate_sub_graph` function. - -Lines 26-34: Parses the `pipeline.config` file and gets required parameters for the `Proposal` layer. - -Lines 38-57: Performs the following manipulations with the tensor with class predictions. TensorFlow uses the NHWC layout, while the Inference Engine uses NCHW. Model Optimizer by default performs transformations with all nodes data in the inference graph to convert it to the NCHW layout. The size of 'C' dimension of the tensor with class predictions is equal to \f$base\_anchors\_count \cdot 2\f$, where 2 corresponds to a number of classes (background and foreground) and \f$base\_anchors\_count\f$ is equal to number of anchors that are applied to each position of 'H' and 'W' dimensions. Therefore, there are \f$H \cdot W \cdot base\_anchors\_count\f$ bounding boxes. Lines 44-45 apply the Softmax layer to this tensor to get class probabilities for each bounding box. - -Lines 59-81: Reads topology parameters related to variances and anchors generation. - -Lines 83-108: Adds the `Proposal` layer to the graph. This layer has one input (generated in lines 104-105) which should be filled with three values before inference: input image height, input image width, image scale factor. - -Lines 110-132: Swaps output values of the `Proposal` layer if the parameter `do_not_swap_proposals` is not set to `True` in the sub-graph replacement configuration file for the replacer. - -Lines 134-144: Crops the output from the `Proposal` node to remove the batch indices (the Inference Engine implementation of the `Proposal` layer generates tensor with shape `[num_proposals, 5]`). The final tensor contains just box coordinates as in the TensorFlow implementation. - -#### SecondStagePostprocessor Block -The `SecondStagePostprocessor` block is similar to the `Postprocessor` block from the SSDs topologies. But there are a number of differences in conversion of the `SecondStagePostprocessor` block. - -```python -class ObjectDetectionAPIDetectionOutputReplacement(FrontReplacementFromConfigFileSubGraph): - """ - Replaces the sub-graph that is equal to the DetectionOutput layer from Inference Engine. This replacer is used for - Faster R-CNN, R-FCN and Mask R-CNN topologies conversion. - The replacer uses a value of the custom attribute 'coordinates_swap_method' from the sub-graph replacement - configuration file to choose how to swap box coordinates of the 0-th input of the generated DetectionOutput layer. - Refer to the code for more details. - """ - replacement_id = 'ObjectDetectionAPIDetectionOutputReplacement' - - def run_before(self): - return [ObjectDetectionAPIMaskRCNNROIPoolingSecondReplacement, Unpack, Sub] - - def run_after(self): - return [ObjectDetectionAPIProposalReplacement, CropAndResizeReplacement] - - def nodes_to_remove(self, graph: Graph, match: SubgraphMatch): - new_nodes_to_remove = match.matched_nodes_names().copy() - outputs = ['detection_boxes', 'detection_scores', 'num_detections'] - for output in outputs: - children = Node(graph, output).out_nodes() - if len(children) != 1: - log.warning('Output {} has {} children. It should have only one output: with op==`OpOutput`' - ''.format(output, len(children))) - elif children[list(children.keys())[0]].op == 'OpOutput': - new_nodes_to_remove.append(children[list(children.keys())[0]].id) - else: - continue - new_nodes_to_remove.extend(outputs) - return new_nodes_to_remove - - def output_edges_match(self, graph: Graph, match: SubgraphMatch, new_sub_graph: dict): - # the DetectionOutput in IE produces single tensor, but in TF it produces four tensors, so we need to create - # only one output edge match - return {match.output_node(0)[0].id: new_sub_graph['detection_output_node'].id} - - @staticmethod - def skip_nodes_by_condition(current_node: Node, condition: callable): - while condition(current_node): - current_node = current_node.in_node() - return current_node - - def generate_sub_graph(self, graph: Graph, match: SubgraphMatch): - argv = graph.graph['cmd_params'] - if argv.tensorflow_object_detection_api_pipeline_config is None: - raise Error(missing_param_error) - pipeline_config = PipelineConfig(argv.tensorflow_object_detection_api_pipeline_config) - - num_classes = _value_or_raise(match, pipeline_config, 'num_classes') - max_proposals = _value_or_raise(match, pipeline_config, 'first_stage_max_proposals') - activation_function = _value_or_raise(match, pipeline_config, 'postprocessing_score_converter') - - activation_conf_node = add_activation_function_after_node(graph, match.single_input_node(1)[0].in_node(0), - activation_function) - - # IE DetectionOutput layer consumes flattened tensors so need add a Reshape layer. - # The batch value of the input tensor is not equal to the batch of the topology, so it is not possible to use - # "0" value in the Reshape layer attribute to refer to the batch size, but we know how to - # calculate the second dimension so the batch value will be deduced from it with help of "-1". - reshape_conf_op = Reshape(graph, dict(dim=int64_array([-1, (num_classes + 1) * max_proposals]))) - reshape_conf_node = reshape_conf_op.create_node([activation_conf_node], dict(name='do_reshape_conf')) - - # Workaround for PermuteForReshape pass. - # We looking for first not Reshape-typed node before match.single_input_node(0)[0].in_node(0). - # And add reshape_loc node after this first not Reshape-typed node. - current_node = self.skip_nodes_by_condition(match.single_input_node(0)[0].in_node(0), - lambda x: x['kind'] == 'op' and x.soft_get('type') == 'Reshape') - - reshape_loc_op = Reshape(graph, dict(dim=int64_array([-1, num_classes, 1, 4]))) - reshape_loc_node = reshape_loc_op.create_node([current_node], dict(name='reshape_loc', nchw_layout=True)) - update_attrs(reshape_loc_node, 'shape_attrs', 'dim') - - # constant node with variances - variances_const_op = Const(graph, dict(value=_variance_from_pipeline_config(pipeline_config))) - variances_const_node = variances_const_op.create_node([]) - - # TF produces locations tensor without boxes for background. - # Inference Engine DetectionOutput layer requires background boxes so we generate them - loc_node = add_fake_background_loc(graph, reshape_loc_node) - PermuteAttrs.set_permutation(reshape_loc_node, loc_node, None) - - # reshape locations tensor to 2D so it could be passed to Eltwise which will be converted to ScaleShift - reshape_loc_2d_op = Reshape(graph, dict(dim=int64_array([-1, 4]))) - reshape_loc_2d_node = reshape_loc_2d_op.create_node([loc_node], dict(name='reshape_locs_2d', nchw_layout=True)) - PermuteAttrs.set_permutation(loc_node, reshape_loc_2d_node, None) - - # element-wise multiply locations with variances - eltwise_locs_op = Eltwise(graph, dict(operation='mul')) - eltwise_locs_node = eltwise_locs_op.create_node([reshape_loc_2d_node, variances_const_node], - dict(name='scale_locs')) - - # IE DetectionOutput layer consumes flattened tensors so need add a Reshape layer. - # The batch value of the input tensor is not equal to the batch of the topology, so it is not possible to use - # "0" value in the Reshape layer attribute to refer to the batch size, but we know how to - # calculate the second dimension so the batch value will be deduced from it with help of "-1". - reshape_loc_do_op = Reshape(graph, dict(dim=int64_array([-1, (num_classes + 1) * max_proposals * 4]))) - - custom_attributes = match.custom_replacement_desc.custom_attributes - coordinates_swap_method = 'add_convolution' - if 'coordinates_swap_method' not in custom_attributes: - log.error('The ObjectDetectionAPIDetectionOutputReplacement sub-graph replacement configuration file ' - 'must contain "coordinates_swap_method" in the "custom_attributes" dictionary. Two values are ' - 'supported: "swap_weights" and "add_convolution". The first one should be used when there is ' - 'a MatMul or Conv2D node before the "SecondStagePostprocessor" block in the topology. With this ' - 'solution the weights of the MatMul or Conv2D nodes are permutted, simulating the swap of XY ' - 'coordinates in the tensor. The second could be used in any other cases but it is worse in terms ' - 'of performance because it adds the Conv2D node which performs permutting of data. Since the ' - 'attribute is not defined the second approach is used by default.') - else: - coordinates_swap_method = custom_attributes['coordinates_swap_method'] - supported_swap_methods = ['swap_weights', 'add_convolution'] - if coordinates_swap_method not in supported_swap_methods: - raise Error('Unsupported "coordinates_swap_method" defined in the sub-graph replacement configuration ' - 'file. Supported methods are: {}'.format(', '.join(supported_swap_methods))) - - if coordinates_swap_method == 'add_convolution': - swapped_locs_node = add_convolution_to_swap_xy_coordinates(graph, eltwise_locs_node, 4) - reshape_loc_do_node = reshape_loc_do_op.create_node([swapped_locs_node], dict(name='do_reshape_locs')) - else: - reshape_loc_do_node = reshape_loc_do_op.create_node([eltwise_locs_node], dict(name='do_reshape_locs')) - - # find Proposal output which has the data layout as in TF: YXYX coordinates without batch indices. - proposal_nodes_ids = [node_id for node_id, attrs in graph.nodes(data=True) - if 'name' in attrs and attrs['name'] == 'crop_proposals'] - if len(proposal_nodes_ids) != 1: - raise Error("Found the following nodes '{}' with name 'crop_proposals' but there should be exactly 1. " - "Looks like ObjectDetectionAPIProposalReplacement replacement didn't work.". - format(proposal_nodes_ids)) - proposal_node = Node(graph, proposal_nodes_ids[0]) - - # check whether it is necessary to permute proposals coordinates before passing them to the DetectionOutput - # currently this parameter is set for the RFCN topologies - if 'swap_proposals' in custom_attributes and custom_attributes['swap_proposals']: - proposal_node = add_convolution_to_swap_xy_coordinates(graph, proposal_node, 4) - - # reshape priors boxes as Detection Output expects - reshape_priors_op = Reshape(graph, dict(dim=int64_array([-1, 1, max_proposals * 4]))) - reshape_priors_node = reshape_priors_op.create_node([proposal_node], - dict(name='DetectionOutput_reshape_priors_')) - - detection_output_op = DetectionOutput(graph, {}) - if coordinates_swap_method == 'swap_weights': - # update infer function to re-pack weights - detection_output_op.attrs['old_infer'] = detection_output_op.attrs['infer'] - detection_output_op.attrs['infer'] = __class__.do_infer - for key in ('clip_before_nms', 'clip_after_nms'): - if key in match.custom_replacement_desc.custom_attributes: - detection_output_op.attrs[key] = int(match.custom_replacement_desc.custom_attributes[key]) - - detection_output_node = detection_output_op.create_node( - [reshape_loc_do_node, reshape_conf_node, reshape_priors_node], - dict(name=detection_output_op.attrs['type'], share_location=0, variance_encoded_in_target=1, - code_type='caffe.PriorBoxParameter.CENTER_SIZE', pad_mode='caffe.ResizeParameter.CONSTANT', - resize_mode='caffe.ResizeParameter.WARP', - num_classes=num_classes, - confidence_threshold=_value_or_raise(match, pipeline_config, 'postprocessing_score_threshold'), - top_k=_value_or_raise(match, pipeline_config, 'postprocessing_max_detections_per_class'), - keep_top_k=_value_or_raise(match, pipeline_config, 'postprocessing_max_total_detections'), - nms_threshold=_value_or_raise(match, pipeline_config, 'postprocessing_iou_threshold'))) - # sets specific name to the node so we can find it in other replacers - detection_output_node.name = 'detection_output' - - output_op = Output(graph, dict(name='do_OutputOp')) - output_op.create_node([detection_output_node]) - - print('The graph output nodes "num_detections", "detection_boxes", "detection_classes", "detection_scores" ' - 'have been replaced with a single layer of type "Detection Output". Refer to IR catalogue in the ' - 'documentation for information about this layer.') - - return {'detection_output_node': detection_output_node} - - @staticmethod - def do_infer(node): - node.old_infer(node) - # compared to the IE's DetectionOutput, the TF keeps the locations in YXYX, need to get back to the XYXY - # for last matmul/Conv2D that operate the locations need to swap the X and Y for output feature weights & biases - swap_weights_xy(backward_bfs_for_operation(node.in_node(0), ['MatMul', 'Conv2D'])) -``` - -The differences in conversion are the following: - -* The locations tensor does not contain information about class 0 (background), but Inference Engine `DetectionOutput` layer expects it. Line 79 append dummy tensor with fake coordinates. -* The prior boxes tensor are not constant like in SSDs models, so it is not possible to apply the same solution. Instead, the element-wise multiplication is added to scale prior boxes tensor values with the variances values. The attribute `variance_encoded_in_target=1` is set to the `DetectionOutput` layer (lines 141-159). -* The X and Y coordinates in the tensor with bounding boxes locations adjustments should be swapped. For some topologies it could be done by updating preceding convolution weights, but if there is no preceding convolutional node, the Model Optimizer inserts convolution node with specific kernel and weights that performs coordinates swap during topology inference. -* Added marker node of type `OpOutput` that is used by the Model Optimizer to determine output nodes of the topology. It is used in the dead nodes elimination pass. - -#### Cutting Off Part of the Topology - -There is an ability to cut-off part of the topology using the `--output` command line parameter. Detailed information on why it could be useful is provided in the [Cutting Off Parts of a Model ](../Cutting_Model.md). The Faster R-CNN models are cut at the end using the sub-graph replacer `ObjectDetectionAPIOutputReplacement`. - -```python -class ObjectDetectionAPIOutputReplacement(FrontReplacementFromConfigFileGeneral): - """ - This replacer is used to cut-off the network by specified nodes for models generated with Object Detection API. - The custom attribute for the replacer contains one value for key "outputs". This string is a comma separated list - of outputs alternatives. Each output alternative is a '|' separated list of node name which could be outputs. The - first node from each alternative that exits in the graph is chosen. Others are ignored. - For example, if the "outputs" is equal to the following string: - - "Reshape_16,SecondStageBoxPredictor_1/Conv_3/BiasAdd|SecondStageBoxPredictor_1/Conv_1/BiasAdd" - - then the "Reshape_16" will be an output if it exists in the graph. The second output will be - SecondStageBoxPredictor_1/Conv_3/BiasAdd if it exist in the graph, if not then - SecondStageBoxPredictor_1/Conv_1/BiasAdd will be output if it exists in the graph. - """ - replacement_id = 'ObjectDetectionAPIOutputReplacement' - - def run_before(self): - return [ObjectDetectionAPIPreprocessorReplacement] - - def transform_graph(self, graph: Graph, replacement_descriptions: dict): - if graph.graph['cmd_params'].output is not None: - log.warning('User defined output nodes are specified. Skip the graph cut-off by the ' - 'ObjectDetectionAPIOutputReplacement.') - return - outputs = [] - outputs_string = replacement_descriptions['outputs'] - for alternatives in outputs_string.split(','): - for out_node_name in alternatives.split('|'): - if graph.has_node(out_node_name): - outputs.append(out_node_name) - break - else: - log.debug('A node "{}" does not exist in the graph. Do not add it as output'.format(out_node_name)) - _outputs = output_user_data_repack(graph, outputs) - add_output_ops(graph, _outputs, graph.graph['inputs']) -``` - -This is a replacer of type "general" which is called just once in comparison with other Front-replacers ("scope" and "points") which are called for each matched instance. The replacer reads node names that should become new output nodes, like specifying `--output `. The only difference is that the string containing node names could contain '|' character specifying output node names alternatives. Detailed explanation is provided in the class description in the code. - -The `detection_boxes`, `detection_scores`, `num_detections` nodes are specified as outputs in the `faster_rcnn_support.json` file. These nodes are used to remove part of the graph that is not be needed to calculate value of specified output nodes. - -### R-FCN topologies - -The R-FCN models are based on Faster R-CNN models so it is highly recommended to read the section about converting them first. Detailed information about R-FCN topologies is provided [in the abstract](https://arxiv.org/abs/1605.06409). - -#### Preprocessor Block - -R-FCN topologies contain similar `Preprocessor` block as SSD and Faster R-CNN topologies. The same `ObjectDetectionAPIPreprocessorReplacement` sub-graph replacer is used to cut it off. - -#### Proposal Layer - -Similar to Faster R-CNNs, R-FCN topologies contain implementation of Proposal layer before the `SecondStageBoxPredictor` block, so `ObjectDetectionAPIProposalReplacement` replacement is used in the sub-graph replacement configuration file. - -#### SecondStageBoxPredictor block - -The `SecondStageBoxPredictor` block differs from the self-titled block from Faster R-CNN topologies. It contains a number of `CropAndResize` operations consuming variously scaled boxes generated with a Proposal layer. The combination of `CropAndResize` layers located in the `while` loop forms a single position-sensitive ROI pooling (PSROIPooling) layer with bilinear interpolation. The `ObjectDetectionAPIPSROIPoolingReplacement` replacement matches two `while` loops with PSROIPooling layers applied to the blobs with box coordinates and classes predictions. - -```python -class ObjectDetectionAPIPSROIPoolingReplacement(FrontReplacementFromConfigFileSubGraph): - replacement_id = 'ObjectDetectionAPIPSROIPoolingReplacement' - - def run_after(self): - return [ObjectDetectionAPIProposalReplacement] - - def output_edges_match(self, graph: Graph, match: SubgraphMatch, new_sub_graph: dict): - return {match.output_node(0)[0].id: new_sub_graph['output_node'].id} - - def generate_sub_graph(self, graph: Graph, match: SubgraphMatch): - argv = graph.graph['cmd_params'] - if argv.tensorflow_object_detection_api_pipeline_config is None: - raise Error(missing_param_error) - pipeline_config = PipelineConfig(argv.tensorflow_object_detection_api_pipeline_config) - num_classes = _value_or_raise(match, pipeline_config, 'num_classes') - - input_node = match.input_nodes(0)[0][0].in_node(0) - if 'class_predictions' in input_node.id: - psroipooling_output_dim = num_classes + 1 - else: - psroipooling_output_dim = num_classes * 4 - - num_spatial_bins_height = pipeline_config.get_param('num_spatial_bins_height') - num_spatial_bins_width = pipeline_config.get_param('num_spatial_bins_width') - crop_height = pipeline_config.get_param('crop_height') - crop_width = pipeline_config.get_param('crop_width') - if crop_height != crop_width: - raise Error('Different "crop_height" and "crop_width" parameters from the pipeline config are not ' - 'supported: {} vs {}'.format(crop_height, crop_width)) - psroipooling_op = PSROIPoolingOp(graph, {'name': input_node.soft_get('name') + '/PSROIPooling', - 'output_dim': psroipooling_output_dim, - 'group_size': crop_width / num_spatial_bins_width, - 'spatial_bins_x': num_spatial_bins_width, - 'spatial_bins_y': num_spatial_bins_height, - 'mode': 'bilinear', - 'spatial_scale': 1, - }) - - if 'reshape_swap_proposals_2d' in graph.nodes(): - reshape_swap_proposals_node = Node(graph, 'reshape_swap_proposals_2d') - else: - swap_proposals_node = add_convolution_to_swap_xy_coordinates(graph, Node(graph, 'proposals'), 5) - reshape_swap_proposals_node = Reshape(graph, {'dim': [-1, 5], 'nchw_layout': True, - 'name': 'reshape_swap_proposals_2d'}).create_node( - [swap_proposals_node]) - psroipooling_node = psroipooling_op.create_node([input_node, reshape_swap_proposals_node]) - - reduce_op = Reduce(graph, {'name': 'mean', - 'reduce_type': 'mean', - 'axis': int64_array([1, 2]), - 'keep_dims': True - }) - reduce_node = reduce_op.create_node([psroipooling_node]) - - graph.erase_node(match.output_node(0)[0].out_node()) - - return {'output_node': reduce_node} -``` - -The main interest of the implementation of this replacer is the `generate_sub_graph` function. - -Lines 12-15: Parses the `pipeline.config` file and gets required parameters for the `PSROIPooling` layer. -Lines 17-21: Determines number of output channels for the `PSROIPooling` layer for box coordinates and classes predictions. -Lines 23-46: Create `PSROIPooling` layer based on model parameters determined from the pipeline configuration file. -Lines 48-53: Add Reduce layer which is the output of the `while` loops being replaced. - -#### SecondStagePostprocessor block - -The `SecondStagePostprocessor` block implements functionality of the `DetectionOutput` layer from the Inference Engine. The `ObjectDetectionAPIDetectionOutputReplacement` sub-graph replacement is used to replace the block. For this type of topologies the replacer adds convolution node to swap coordinates of boxes in of the 0-th input tensor to the `DetectionOutput` layer. The custom attribute `coordinates_swap_method` is set to value `add_convolution` in the sub-graph replacement configuration file to enable that behaviour. A method (`swap_weights`) is not suitable for this type of topologies because there are no `Mul` or `Conv2D` operations before the 0-th input of the `DetectionOutput` layer. - -#### Cutting Off Part of the Topology - -The R-FCN models are cut at the end with the sub-graph replacer `ObjectDetectionAPIOutputReplacement` as Faster R-CNNs topologies using the following output node names: `detection_boxes`. - -### Mask R-CNN Topologies - -The Mask R-CNN models are based on Faster R-CNN models so it is highly recommended to read the section about converting them first. Detailed information about Mask R-CNN topologies is provided [in the abstract](https://arxiv.org/abs/1703.06870). - -#### Preprocessor Block - -Mask R-CNN topologies contain similar `Preprocessor` block as SSD and Faster R-CNN topologies. The same `ObjectDetectionAPIPreprocessorReplacement` sub-graph replacer is used to cut it off. - -#### Proposal and ROI (Region of Interest) Pooling - -Proposal and ROI Pooling layers are added to Mask R-CNN topologies like in Faster R-CNNs. - -#### DetectionOutput Layer - -Unlike in SSDs and Faster R-CNNs, the implementation of the `DetectionOutput` layer in Mask R-CNNs topologies is not separated in a dedicated scope. But the matcher is defined with start/end points defined in the `mask_rcnn_support.json` so the replacer correctly adds the `DetectionOutput` layer. - -#### One More ROIPooling - -There is the second `CropAndResize` (equivalent of `ROIPooling` layer) that uses boxes produced with the `DetectionOutput` layer. The `ObjectDetectionAPIMaskRCNNROIPoolingSecondReplacement` replacer is used to replace this node. - -```python -class ObjectDetectionAPIMaskRCNNROIPoolingSecondReplacement(FrontReplacementFromConfigFileSubGraph): - replacement_id = 'ObjectDetectionAPIMaskRCNNROIPoolingSecondReplacement' - - def run_after(self): - return [ObjectDetectionAPIProposalReplacement] - - def output_edges_match(self, graph: Graph, match: SubgraphMatch, new_sub_graph: dict): - return {match.output_node(0)[0].id: new_sub_graph['roi_pooling_node'].id} - - def generate_sub_graph(self, graph: Graph, match: SubgraphMatch): - argv = graph.graph['cmd_params'] - if argv.tensorflow_object_detection_api_pipeline_config is None: - raise Error(missing_param_error) - pipeline_config = PipelineConfig(argv.tensorflow_object_detection_api_pipeline_config) - roi_pool_size = _value_or_raise(match, pipeline_config, 'initial_crop_size') - - detection_output_nodes_ids = [node_id for node_id, attrs in graph.nodes(data=True) - if 'name' in attrs and attrs['name'] == 'detection_output'] - if len(detection_output_nodes_ids) != 1: - raise Error("Found the following nodes '{}' with 'detection_output' but there should be exactly 1.". - format(detection_output_nodes_ids)) - detection_output_node = Node(graph, detection_output_nodes_ids[0]) - - # add reshape of Detection Output so it can be an output of the topology - reshape_detection_output_2d_op = Reshape(graph, dict(dim=int64_array([-1, 7]))) - reshape_detection_output_2d_node = reshape_detection_output_2d_op.create_node( - [detection_output_node], dict(name='reshape_do_2d')) - - # adds special node of type "Output" that is a marker for the output nodes of the topology - output_op = Output(graph, dict(name='do_reshaped_OutputOp')) - output_node = output_op.create_node([reshape_detection_output_2d_node]) - - # add attribute 'output_sort_order' so it will be used as a key to sort output nodes before generation of IR - output_node.in_edge()['data_attrs'].append('output_sort_order') - output_node.in_edge()['output_sort_order'] = [('detection_boxes', 0)] - - # creates two Crop operations which get input from the DetectionOutput layer, cuts of slices of data with class - # ids and probabilities and produce a tensor with batch ids and bounding boxes only (as it is expected by the - # ROIPooling layer) - crop_batch_op = Crop(graph, dict(axis=int64_array([3]), offset=int64_array([0]), dim=int64_array([1]), - nchw_layout=True)) - crop_batch_node = crop_batch_op.create_node([detection_output_node], dict(name='crop_do_batch_ids')) - - crop_coordinates_op = Crop(graph, dict(axis=int64_array([3]), offset=int64_array([3]), dim=int64_array([4]), - nchw_layout=True)) - crop_coordinates_node = crop_coordinates_op.create_node([detection_output_node], dict(name='crop_do_coords')) - - concat_op = Concat(graph, dict(axis=3)) - concat_node = concat_op.create_node([crop_batch_node, crop_coordinates_node], dict(name='batch_and_coords', - nchw_layout=True)) - - # reshape bounding boxes as required by ROIPooling - reshape_do_op = Reshape(graph, dict(dim=int64_array([-1, 5]))) - reshape_do_node = reshape_do_op.create_node([concat_node], dict(name='reshape_do')) - - roi_pooling_op = ROIPooling(graph, dict(method="bilinear", spatial_scale=1, - pooled_h=roi_pool_size, pooled_w=roi_pool_size)) - roi_pooling_node = roi_pooling_op.create_node([match.single_input_node(0)[0].in_node(), reshape_do_node], - dict(name='ROI_pooling_2')) - return {'roi_pooling_node': roi_pooling_node} -``` -The Inference Engine `DetectionOutput` layer implementation produces one tensor with seven numbers for each actual detection: - -* batch index -* class label -* class probability -* x_1 box coordinate -* y_1 box coordinate -* x_2 box coordinate -* y_2 box coordinate. - -The boxes coordinates must be fed to the `ROIPooling` layer, so the `Crop` layer is added to remove unnecessary part (lines 37-50). - -Then the result tensor is reshaped (lines 53-54) and `ROIPooling` layer is created (lines 56-59). - -#### Mask Tensors Processing - -The post-processing part of Mask R-CNN topologies filters out bounding boxes with low probabilities and applies activation function to the rest one. This post-processing is implemented using the `Gather` operation, which is not supported by the Inference Engine. Special Front-replacer removes this post-processing and just inserts the activation layer to the end. The filtering of bounding boxes is done in the dedicated demo `mask_rcnn_demo`. The code of the replacer is the following: - -```python -class ObjectDetectionAPIMaskRCNNSigmoidReplacement(FrontReplacementFromConfigFileGeneral): - """ - This replacer is used to convert Mask R-CNN topologies only. - Adds activation with sigmoid function to the end of the network producing masks tensors. - """ - replacement_id = 'ObjectDetectionAPIMaskRCNNSigmoidReplacement' - - def run_after(self): - return [ObjectDetectionAPIMaskRCNNROIPoolingSecondReplacement] - - def transform_graph(self, graph: Graph, replacement_descriptions): - output_node = None - op_outputs = [n for n, d in graph.nodes(data=True) if 'op' in d and d['op'] == 'OpOutput'] - for op_output in op_outputs: - last_node = Node(graph, op_output).in_node(0) - if last_node.name.startswith('SecondStageBoxPredictor'): - sigmoid_op = Activation(graph, dict(operation='sigmoid')) - sigmoid_node = sigmoid_op.create_node([last_node], dict(name=last_node.id + '/sigmoid')) - sigmoid_node.name = 'masks' - - if output_node is not None: - raise Error('Identified two possible outputs from the topology. Cannot proceed.') - # add special node of type "Output" that is a marker for the output nodes of the topology - output_op = Output(graph, dict(name=sigmoid_node.name + '/OutputOp')) - output_node = output_op.create_node([sigmoid_node]) - - print('The predicted masks are produced by the "masks" layer for each bounding box generated with a ' - '"detection_output" layer.\n Refer to IR catalogue in the documentation for information ' - 'about the DetectionOutput layer and Inference Engine documentation about output data interpretation.\n' - 'The topology can be inferred using dedicated demo "mask_rcnn_demo".') -``` -The replacer looks for the output node which name starts with 'SecondStageBoxPredictor' (the another node of type 'OpOutput' is located after the `DetectionOutput` node). This node contains the generated masks. The replacer adds activation layer 'Sigmoid' after this node as it is done in the initial TensorFlow* model. - -#### Cutting Off Part of the Topology - -The Mask R-CNN models are cut at the end with the sub-graph replacer `ObjectDetectionAPIOutputReplacement` using the following output node names: - -```SecondStageBoxPredictor_1/Conv_3/BiasAdd|SecondStageBoxPredictor_1/Conv_1/BiasAdd``` - -One of these two nodes produces output mask tensors. The child nodes of these nodes are related to post-processing which is implemented in the [Mask R-CNN demo](@ref omz_demos_mask_rcnn_demo_cpp) and should be cut off. +Implementation of the transformations for Object Detection API models is located in the file `/tools/model_optimizer/extensions/front/tf/ObjectDetectionAPI.py`. Refer to the code in this file to understand the details of the conversion process. diff --git a/docs/MO_DG/prepare_model/convert_model/tf_specific/Convert_XLNet_From_Tensorflow.md b/docs/MO_DG/prepare_model/convert_model/tf_specific/Convert_XLNet_From_Tensorflow.md index cc121ab19e1ad9..ac706c664f2d1e 100644 --- a/docs/MO_DG/prepare_model/convert_model/tf_specific/Convert_XLNet_From_Tensorflow.md +++ b/docs/MO_DG/prepare_model/convert_model/tf_specific/Convert_XLNet_From_Tensorflow.md @@ -24,13 +24,15 @@ To get pb-file from the archive contents, you need to do the following. 1. Run commands ```sh - cd ~ - mkdir XLNet-Base - cd XLNet-Base - git clone https://github.com/zihangdai/xlnet - wget https://storage.googleapis.com/xlnet/released_models/cased_L-12_H-768_A-12.zip - unzip cased_L-12_H-768_A-12.zip - mkdir try_save +cd ~ +mkdir XLNet-Base +cd XLNet-Base +git clone https://github.com/zihangdai/xlnet +wget https://storage.googleapis.com/xlnet/released_models/cased_L-12_H-768_A-12.zip +unzip cased_L-12_H-768_A-12.zip +mkdir try_save +cd xlnet +sed -i "s/tf\.train\.Optimizer/tf\.train.Optimizer if tf.version < '1.15' else tf.compat.v1.train.Optimizer/g" model_utils.py ``` diff --git a/docs/MO_DG/prepare_model/convert_model/tf_specific/Convert_YOLO_From_Tensorflow.md b/docs/MO_DG/prepare_model/convert_model/tf_specific/Convert_YOLO_From_Tensorflow.md index 60674b1c768ad8..40d592e4690d61 100644 --- a/docs/MO_DG/prepare_model/convert_model/tf_specific/Convert_YOLO_From_Tensorflow.md +++ b/docs/MO_DG/prepare_model/convert_model/tf_specific/Convert_YOLO_From_Tensorflow.md @@ -67,7 +67,11 @@ git checkout ed60b90 ``` 3. Download [coco.names](https://raw.githubusercontent.com/pjreddie/darknet/master/data/coco.names) file from the DarkNet website **OR** use labels that fit your task. 4. Download the [yolov3.weights](https://pjreddie.com/media/files/yolov3.weights) (for the YOLOv3 model) or [yolov3-tiny.weights](https://pjreddie.com/media/files/yolov3-tiny.weights) (for the YOLOv3-tiny model) file **OR** use your pre-trained weights with the same structure -5. Run a converter: +5. Install PIL, which is used by the conversion script in the repo: +```sh +pip install PIL +``` +6. Run a converter: - for YOLO-v3: ```sh python3 convert_weights_pb.py --class_names coco.names --data_format NHWC --weights_file yolov3.weights @@ -88,7 +92,7 @@ python3 convert_weights_pb.py --class_names coco.names --data_format NHWC --weig ### Convert YOLOv3 TensorFlow Model to IR -To solve the problems explained in the YOLOv3 architecture overview section, use the `yolo_v3.json` or `yolo_v3_tiny.json` (depending on a model) configuration file with custom operations located in the `/deployment_tools/model_optimizer/extensions/front/tf` repository. +To solve the problems explained in the YOLOv3 architecture overview section, use the `yolo_v3.json` or `yolo_v3_tiny.json` (depending on a model) configuration file with custom operations located in the `/tools/model_optimizer/extensions/front/tf` repository. It consists of several attributes:
```sh @@ -202,7 +206,7 @@ Converted TensorFlow YOLO model is missing `Region` layer and its parameters. Or file under the `[region]` title. To recreate the original model structure, use the corresponding yolo `.json` configuration file with custom operations and `Region` layer -parameters when converting the model to the IR. This file is located in the `/deployment_tools/model_optimizer/extensions/front/tf` directory. +parameters when converting the model to the IR. This file is located in the `/tools/model_optimizer/extensions/front/tf` directory. If chosen model has specific values of this parameters, create another configuration file with custom operations and use it for conversion. @@ -213,7 +217,7 @@ python3 ./mo_tf.py --input_model /.pb \ --batch 1 \ --scale 255 \ ---transformations_config /deployment_tools/model_optimizer/extensions/front/tf/.json +--transformations_config /tools/model_optimizer/extensions/front/tf/.json ``` where: diff --git a/docs/MO_DG/prepare_model/customize_model_optimizer/Customize_Model_Optimizer.md b/docs/MO_DG/prepare_model/customize_model_optimizer/Customize_Model_Optimizer.md index cda8458e4dd72f..567543a01a88dd 100644 --- a/docs/MO_DG/prepare_model/customize_model_optimizer/Customize_Model_Optimizer.md +++ b/docs/MO_DG/prepare_model/customize_model_optimizer/Customize_Model_Optimizer.md @@ -34,7 +34,7 @@ Model Optimizer extensibility mechanism enables support of new operations and custom transformations to generate the optimized intermediate representation (IR) as described in the [Deep Learning Network Intermediate Representation and Operation Sets in OpenVINO™](../../IR_and_opsets.md). This -mechanism is a core part of the Model Optimizer. The Model Optimizer itself uses it under the hood, being a huge set of examples on how to add custom logic to support your model. +mechanism is a core part of the Model Optimizer, which uses it under the hood, so the Model Optimizer itself is a huge set of examples for adding custom logic to support your model. There are several cases when the customization is needed: diff --git a/docs/MO_DG/prepare_model/customize_model_optimizer/Extending_Model_Optimizer_with_Caffe_Python_Layers.md b/docs/MO_DG/prepare_model/customize_model_optimizer/Extending_Model_Optimizer_with_Caffe_Python_Layers.md index e4a71a8fdc9298..579437aeb5a98a 100644 --- a/docs/MO_DG/prepare_model/customize_model_optimizer/Extending_Model_Optimizer_with_Caffe_Python_Layers.md +++ b/docs/MO_DG/prepare_model/customize_model_optimizer/Extending_Model_Optimizer_with_Caffe_Python_Layers.md @@ -35,7 +35,7 @@ operation. Here is a simplified example of the extractor for the custom operation Proposal from Faster-R-CNN model mentioned above. The full code with additional checks is provided in the -`/deployment_tools/model_optimizer/extensions/front/caffe/proposal_python_ext.py`. The sample code uses +`/tools/model_optimizer/extensions/front/caffe/proposal_python_ext.py`. The sample code uses operation `ProposalOp` which corresponds to `Proposal` operation described in the [Available Operations Sets](../../../ops/opset.md) document. Refer to the source code below for a detailed explanation of the extractor. diff --git a/docs/benchmarks/performance_benchmarks_faq.md b/docs/benchmarks/performance_benchmarks_faq.md index 2ff33612097b38..b833f03c531862 100644 --- a/docs/benchmarks/performance_benchmarks_faq.md +++ b/docs/benchmarks/performance_benchmarks_faq.md @@ -19,31 +19,34 @@ All of the performance benchmarks were generated using the open-sourced tool wit #### 6. What image sizes are used for the classification network models? The image size used in the inference depends on the network being benchmarked. The following table shows the list of input sizes for each network model. -| **Model** | **Public Network** | **Task** | **Input Size** (Height x Width) | -|------------------------------------------------------------------------------------------------------------------------------------|-----------------------------------------|-----------------------------|-----------------------------------| -| [bert-large-uncased-whole-word-masking-squad](https://github.com/openvinotoolkit/open_model_zoo/tree/develop/models/intel/bert-large-uncased-whole-word-masking-squad-int8-0001) | BERT-large |question / answer |384| -| [deeplabv3-TF](https://github.com/openvinotoolkit/open_model_zoo/tree/master/models/public/deeplabv3) | DeepLab v3 Tf |semantic segmentation | 513x513 | -| [densenet-121-TF](https://github.com/openvinotoolkit/open_model_zoo/tree/master/models/public/densenet-121-tf) | Densenet-121 Tf |classification | 224x224 | -| [facenet-20180408-102900-TF](https://github.com/openvinotoolkit/open_model_zoo/tree/master/models/public/facenet-20180408-102900) | FaceNet TF | face recognition | 160x160 | -| [faster_rcnn_resnet50_coco-TF](https://github.com/openvinotoolkit/open_model_zoo/tree/master/models/public/faster_rcnn_resnet50_coco) | Faster RCNN Tf | object detection | 600x1024 | -| [googlenet-v1-TF](https://github.com/openvinotoolkit/open_model_zoo/tree/master/models/public/googlenet-v1-tf) | GoogLeNet_ILSVRC-2012 | classification | 224x224 | -| [inception-v3-TF](https://github.com/openvinotoolkit/open_model_zoo/tree/master/models/public/googlenet-v3) | Inception v3 Tf | classification | 299x299 | -| [mobilenet-ssd-CF](https://github.com/openvinotoolkit/open_model_zoo/tree/master/models/public/mobilenet-ssd) | SSD (MobileNet)_COCO-2017_Caffe | object detection | 300x300 | -| [mobilenet-v1-1.0-224-TF](https://github.com/openvinotoolkit/open_model_zoo/tree/master/models/public/mobilenet-v1-1.0-224-tf) | MobileNet v1 Tf | classification | 224x224 | -| [mobilenet-v2-1.0-224-TF](https://github.com/openvinotoolkit/open_model_zoo/tree/master/models/public/mobilenet-v2-1.0-224) | MobileNet v2 Tf | classification | 224x224 | -| [mobilenet-v2-pytorch](https://github.com/openvinotoolkit/open_model_zoo/tree/master/models/public/mobilenet-v2-pytorch ) | Mobilenet V2 PyTorch | classification | 224x224 | -| [resnet-18-pytorch](https://github.com/openvinotoolkit/open_model_zoo/tree/master/models/public/resnet-18-pytorch) | ResNet-18 PyTorch | classification | 224x224 | -| [resnet-50-pytorch](https://github.com/openvinotoolkit/open_model_zoo/tree/master/models/public/resnet-50-pytorch) | ResNet-50 v1 PyTorch | classification | 224x224 | -| [resnet-50-TF](https://github.com/openvinotoolkit/open_model_zoo/tree/master/models/public/resnet-50-tf) | ResNet-50_v1_ILSVRC-2012 | classification | 224x224 | -| [se-resnext-50-CF](https://github.com/openvinotoolkit/open_model_zoo/tree/master/models/public/se-resnext-50) | Se-ResNext-50_ILSVRC-2012_Caffe | classification | 224x224 | -| [squeezenet1.1-CF](https://github.com/openvinotoolkit/open_model_zoo/tree/master/models/public/squeezenet1.1) | SqueezeNet_v1.1_ILSVRC-2012_Caffe | classification | 227x227 | -| [ssd300-CF](https://github.com/openvinotoolkit/open_model_zoo/tree/master/models/public/ssd300) | SSD (VGG-16)_VOC-2007_Caffe | object detection | 300x300 | -| [yolo_v3-TF](https://github.com/openvinotoolkit/open_model_zoo/tree/master/models/public/yolo-v3-tf) | TF Keras YOLO v3 Modelset | object detection | 300x300 | -| [yolo_v4-TF](https://github.com/openvinotoolkit/open_model_zoo/tree/master/models/public/yolo-v4-tf) | Yolo-V4 TF | object detection | 608x608 | -| [ssd_mobilenet_v1_coco-TF](https://github.com/openvinotoolkit/open_model_zoo/tree/master/models/public/ssd_mobilenet_v1_coco) | ssd_mobilenet_v1_coco | object detection | 300x300 | -| [ssdlite_mobilenet_v2-TF](https://github.com/openvinotoolkit/open_model_zoo/tree/master/models/public/ssdlite_mobilenet_v2) | ssd_mobilenet_v2 | object detection | 300x300 | -| [unet-camvid-onnx-0001](https://github.com/openvinotoolkit/open_model_zoo/blob/master/models/intel/unet-camvid-onnx-0001/description/unet-camvid-onnx-0001.md) | U-Net | semantic segmentation | 368x480 | - +| **Model** | **Public Network** | **Task** | **Input Size** (Height x Width) | +|------------------------------------------------------------------------------------------------------------------------------------|------------------------------------|-----------------------------|-----------------------------------| +| [bert-large-uncased-whole-word-masking-squad](https://github.com/openvinotoolkit/open_model_zoo/tree/develop/models/intel/bert-large-uncased-whole-word-masking-squad-int8-0001) | BERT-large |question / answer |384| +| [brain-tumor-segmentation-0001-MXNET](https://github.com/openvinotoolkit/open_model_zoo/tree/master/models/public/brain-tumor-segmentation-0001) | brain-tumor-segmentation-0001 | semantic segmentation | 128x128x128 | +| [brain-tumor-segmentation-0002-CF2](https://github.com/openvinotoolkit/open_model_zoo/tree/master/models/public/brain-tumor-segmentation-0002) | brain-tumor-segmentation-0002 | semantic segmentation | 128x128x128 | +| [deeplabv3-TF](https://github.com/openvinotoolkit/open_model_zoo/tree/master/models/public/deeplabv3) | DeepLab v3 Tf | semantic segmentation | 513x513 | +| [densenet-121-TF](https://github.com/openvinotoolkit/open_model_zoo/tree/master/models/public/densenet-121-tf) | Densenet-121 Tf | classification | 224x224 | +| [facenet-20180408-102900-TF](https://github.com/openvinotoolkit/open_model_zoo/tree/master/models/public/facenet-20180408-102900) | FaceNet TF | face recognition | 160x160 | +| [faster_rcnn_resnet50_coco-TF](https://github.com/openvinotoolkit/open_model_zoo/tree/master/models/public/faster_rcnn_resnet50_coco) | Faster RCNN Tf | object detection | 600x1024 | +| [inception-v4-TF](https://github.com/openvinotoolkit/open_model_zoo/tree/develop/models/public/googlenet-v4-tf) | Inception v4 Tf (aka GoogleNet-V4) | classification | 299x299 | +| [inception-v3-TF](https://github.com/openvinotoolkit/open_model_zoo/tree/master/models/public/googlenet-v3) | Inception v3 Tf | classification | 299x299 | +| [mobilenet-ssd-CF](https://github.com/openvinotoolkit/open_model_zoo/tree/master/models/public/mobilenet-ssd) | SSD (MobileNet)_COCO-2017_Caffe | object detection | 300x300 | +| [mobilenet-v2-1.0-224-TF](https://github.com/openvinotoolkit/open_model_zoo/tree/master/models/public/mobilenet-v2-1.0-224) | MobileNet v2 Tf | classification | 224x224 | +| [mobilenet-v2-pytorch](https://github.com/openvinotoolkit/open_model_zoo/tree/master/models/public/mobilenet-v2-pytorch ) | Mobilenet V2 PyTorch | classification | 224x224 | +| [resnet-18-pytorch](https://github.com/openvinotoolkit/open_model_zoo/tree/master/models/public/resnet-18-pytorch) | ResNet-18 PyTorch | classification | 224x224 | +| [resnet-50-pytorch](https://github.com/openvinotoolkit/open_model_zoo/tree/master/models/public/resnet-50-pytorch) | ResNet-50 v1 PyTorch | classification | 224x224 | +| [resnet-50-TF](https://github.com/openvinotoolkit/open_model_zoo/tree/master/models/public/resnet-50-tf) | ResNet-50_v1_ILSVRC-2012 | classification | 224x224 | +| [se-resnext-50-CF](https://github.com/openvinotoolkit/open_model_zoo/tree/master/models/public/se-resnext-50) | Se-ResNext-50_ILSVRC-2012_Caffe | classification | 224x224 | +| [squeezenet1.1-CF](https://github.com/openvinotoolkit/open_model_zoo/tree/master/models/public/squeezenet1.1) | SqueezeNet_v1.1_ILSVRC-2012_Caffe | classification | 227x227 | +| [ssd300-CF](https://github.com/openvinotoolkit/open_model_zoo/tree/master/models/public/ssd300) | SSD (VGG-16)_VOC-2007_Caffe | object detection | 300x300 | +| [yolo_v4-TF](https://github.com/openvinotoolkit/open_model_zoo/tree/master/models/public/yolo-v4-tf) | Yolo-V4 TF | object detection | 608x608 | +| [ssd_mobilenet_v1_coco-TF](https://github.com/openvinotoolkit/open_model_zoo/tree/master/models/public/ssd_mobilenet_v1_coco) | ssd_mobilenet_v1_coco | object detection | 300x300 | +| [ssdlite_mobilenet_v2-TF](https://github.com/openvinotoolkit/open_model_zoo/tree/master/models/public/ssdlite_mobilenet_v2) | ssdlite_mobilenet_v2 | object detection | 300x300 | +| [unet-camvid-onnx-0001](https://github.com/openvinotoolkit/open_model_zoo/blob/master/models/intel/unet-camvid-onnx-0001/description/unet-camvid-onnx-0001.md) | U-Net | semantic segmentation | 368x480 | +| [yolo-v3-tiny-tf](https://github.com/openvinotoolkit/open_model_zoo/tree/develop/models/public/yolo-v3-tiny-tf) | YOLO v3 Tiny | object detection | 416x416 | +| [ssd-resnet34-1200-onnx](https://github.com/openvinotoolkit/open_model_zoo/tree/develop/models/public/ssd-resnet34-1200-onnx) | ssd-resnet34 onnx model | object detection | 1200x1200 | +| [vgg19-caffe](https://github.com/openvinotoolkit/open_model_zoo/tree/master/models/public/vgg19-caffe2) | VGG-19 | classification | 224x224| + #### 7. Where can I purchase the specific hardware used in the benchmarking? Intel partners with various vendors all over the world. Visit the [Intel® AI: In Production Partners & Solutions Catalog](https://www.intel.com/content/www/us/en/internet-of-things/ai-in-production/partners-solutions-catalog.html) for a list of Equipment Makers and the [Supported Devices](../IE_DG/supported_plugins/Supported_Devices.md) documentation. You can also remotely test and run models before purchasing any hardware by using [Intel® DevCloud for the Edge](http://devcloud.intel.com/edge/). diff --git a/docs/benchmarks/performance_benchmarks_openvino.md b/docs/benchmarks/performance_benchmarks_openvino.md index 456f593db14461..be7c46410d752f 100644 --- a/docs/benchmarks/performance_benchmarks_openvino.md +++ b/docs/benchmarks/performance_benchmarks_openvino.md @@ -29,81 +29,86 @@ Measuring inference performance involves many variables and is extremely use-cas \htmlonly - + \endhtmlonly \htmlonly - + \endhtmlonly \htmlonly - + \endhtmlonly \htmlonly - + \endhtmlonly \htmlonly - + \endhtmlonly \htmlonly - + \endhtmlonly \htmlonly - + \endhtmlonly \htmlonly - + \endhtmlonly \htmlonly - + \endhtmlonly \htmlonly - + \endhtmlonly \htmlonly - + \endhtmlonly - \htmlonly - + \endhtmlonly + \htmlonly - + \endhtmlonly +\htmlonly + +\endhtmlonly \htmlonly - + \endhtmlonly \htmlonly - + \endhtmlonly \htmlonly - + \endhtmlonly \htmlonly - + \endhtmlonly + + ## Platform Configurations -Intel® Distribution of OpenVINO™ toolkit performance benchmark numbers are based on release 2021.3. +Intel® Distribution of OpenVINO™ toolkit performance benchmark numbers are based on release 2021.4. -Intel technologies’ features and benefits depend on system configuration and may require enabled hardware, software or service activation. Learn more at intel.com, or from the OEM or retailer. Performance results are based on testing as of March 15, 2021 and may not reflect all publicly available updates. See configuration disclosure for details. No product can be absolutely secure. +Intel technologies’ features and benefits depend on system configuration and may require enabled hardware, software or service activation. Learn more at intel.com, or from the OEM or retailer. Performance results are based on testing as of June 18, 2021 and may not reflect all publicly available updates. See configuration disclosure for details. No product can be absolutely secure. Performance varies by use, configuration and other factors. Learn more at [www.intel.com/PerformanceIndex](https://www.intel.com/PerformanceIndex). @@ -127,15 +132,15 @@ Testing by Intel done on: see test date for each HW platform below. | Operating System | Ubuntu* 18.04 LTS | Ubuntu* 18.04 LTS | Ubuntu* 18.04 LTS | | Kernel Version | 5.3.0-24-generic | 5.3.0-24-generic | 5.3.0-24-generic | | BIOS Vendor | American Megatrends Inc.* | American Megatrends Inc. | Intel Corporation | -| BIOS Version | 0904 | 607 | SE5C620.86B.02.01.
0009.092820190230 | -| BIOS Release | April 12, 2019 | May 29, 2020 | September 28, 2019 | +| BIOS Version | 0904 | 607 | SE5C620.86B.02.01.
0013.121520200651 | +| BIOS Release | April 12, 2019 | May 29, 2020 | December 15, 2020 | | BIOS Settings | Select optimized default settings,
save & exit | Select optimized default settings,
save & exit | Select optimized default settings,
change power policy
to "performance",
save & exit | | Batch size | 1 | 1 | 1 | Precision | INT8 | INT8 | INT8 | Number of concurrent inference requests | 4 | 5 | 32 -| Test Date | March 15, 2021 | March 15, 2021 | March 15, 2021 -| Power dissipation, TDP in Watt | [71](https://ark.intel.com/content/www/us/en/ark/products/134854/intel-xeon-e-2124g-processor-8m-cache-up-to-4-50-ghz.html#tab-blade-1-0-1) | [125](https://ark.intel.com/content/www/us/en/ark/products/199336/intel-xeon-w-1290p-processor-20m-cache-3-70-ghz.html) | [125](https://ark.intel.com/content/www/us/en/ark/products/193394/intel-xeon-silver-4216-processor-22m-cache-2-10-ghz.html#tab-blade-1-0-1) | -| CPU Price on Mach 15th, 2021, USD
Prices may vary | [213](https://ark.intel.com/content/www/us/en/ark/products/134854/intel-xeon-e-2124g-processor-8m-cache-up-to-4-50-ghz.html) | [539](https://ark.intel.com/content/www/us/en/ark/products/199336/intel-xeon-w-1290p-processor-20m-cache-3-70-ghz.html) |[1,002](https://ark.intel.com/content/www/us/en/ark/products/193394/intel-xeon-silver-4216-processor-22m-cache-2-10-ghz.html) | +| Test Date | June 18, 2021 | June 18, 2021 | June 18, 2021 +| Rated maximum TDP/socket in Watt | [71](https://ark.intel.com/content/www/us/en/ark/products/134854/intel-xeon-e-2124g-processor-8m-cache-up-to-4-50-ghz.html#tab-blade-1-0-1) | [125](https://ark.intel.com/content/www/us/en/ark/products/199336/intel-xeon-w-1290p-processor-20m-cache-3-70-ghz.html) | [125](https://ark.intel.com/content/www/us/en/ark/products/193394/intel-xeon-silver-4216-processor-22m-cache-2-10-ghz.html#tab-blade-1-0-1) | +| CPU Price/socket on June 21, 2021, USD
Prices may vary | [213](https://ark.intel.com/content/www/us/en/ark/products/134854/intel-xeon-e-2124g-processor-8m-cache-up-to-4-50-ghz.html) | [539](https://ark.intel.com/content/www/us/en/ark/products/199336/intel-xeon-w-1290p-processor-20m-cache-3-70-ghz.html) |[1,002](https://ark.intel.com/content/www/us/en/ark/products/193394/intel-xeon-silver-4216-processor-22m-cache-2-10-ghz.html) | **CPU Inference Engines (continue)** @@ -149,84 +154,104 @@ Testing by Intel done on: see test date for each HW platform below. | Operating System | Ubuntu* 18.04 LTS | Ubuntu* 18.04 LTS | Ubuntu* 18.04 LTS | | Kernel Version | 5.3.0-24-generic | 5.3.0-24-generic | 5.3.0-24-generic | | BIOS Vendor | Intel Corporation | Intel Corporation | Intel Corporation | -| BIOS Version | SE5C620.86B.02.01.
0009.092820190230 | SE5C620.86B.02.01.
0009.092820190230 | WLYDCRB1.SYS.0020.
P86.2103050636 | -| BIOS Release | September 28, 2019 | September 28, 2019 | March 5, 2021 | +| BIOS Version | SE5C620.86B.02.01.
0013.121520200651 | SE5C620.86B.02.01.
0013.121520200651 | WLYDCRB1.SYS.0020.
P86.2103050636 | +| BIOS Release | December 15, 2020 | December 15, 2020 | March 5, 2021 | | BIOS Settings | Select optimized default settings,
change power policy to "performance",
save & exit | Select optimized default settings,
change power policy to "performance",
save & exit | Select optimized default settings,
change power policy to "performance",
save & exit | | Batch size | 1 | 1 | 1 | | Precision | INT8 | INT8 | INT8 | | Number of concurrent inference requests |32 | 52 | 80 | -| Test Date | March 15, 2021 | March 15, 2021 | March 22, 2021 | -| Power dissipation, TDP in Watt | [105](https://ark.intel.com/content/www/us/en/ark/products/193953/intel-xeon-gold-5218t-processor-22m-cache-2-10-ghz.html#tab-blade-1-0-1) | [205](https://ark.intel.com/content/www/us/en/ark/products/192482/intel-xeon-platinum-8270-processor-35-75m-cache-2-70-ghz.html#tab-blade-1-0-1) | [270](https://ark.intel.com/content/www/us/en/ark/products/212287/intel-xeon-platinum-8380-processor-60m-cache-2-30-ghz.html) | -| CPU Price, USD
Prices may vary | [1,349](https://ark.intel.com/content/www/us/en/ark/products/193953/intel-xeon-gold-5218t-processor-22m-cache-2-10-ghz.html) (on Mach 15th, 2021) | [7,405](https://ark.intel.com/content/www/us/en/ark/products/192482/intel-xeon-platinum-8270-processor-35-75m-cache-2-70-ghz.html) (on Mach 15th, 2021) | [8,099](https://ark.intel.com/content/www/us/en/ark/products/212287/intel-xeon-platinum-8380-processor-60m-cache-2-30-ghz.html) (on March 26th, 2021) | +| Test Date | June 18, 2021 | June 18, 2021 | June 18, 2021 | +| Rated maximum TDP/socket in Watt | [105](https://ark.intel.com/content/www/us/en/ark/products/193953/intel-xeon-gold-5218t-processor-22m-cache-2-10-ghz.html#tab-blade-1-0-1) | [205](https://ark.intel.com/content/www/us/en/ark/products/192482/intel-xeon-platinum-8270-processor-35-75m-cache-2-70-ghz.html#tab-blade-1-0-1) | [270](https://ark.intel.com/content/www/us/en/ark/products/212287/intel-xeon-platinum-8380-processor-60m-cache-2-30-ghz.html) | +| CPU Price/socket on June 21, 2021, USD
Prices may vary | [1,349](https://ark.intel.com/content/www/us/en/ark/products/193953/intel-xeon-gold-5218t-processor-22m-cache-2-10-ghz.html) | [7,405](https://ark.intel.com/content/www/us/en/ark/products/192482/intel-xeon-platinum-8270-processor-35-75m-cache-2-70-ghz.html) | [8,099](https://ark.intel.com/content/www/us/en/ark/products/212287/intel-xeon-platinum-8380-processor-60m-cache-2-30-ghz.html) | **CPU Inference Engines (continue)** -| | Intel® Core™ i7-8700T | Intel® Core™ i9-10920X | 11th Gen Intel® Core™ i7-1185G7 | -| -------------------- | ----------------------------------- |--------------------------------------| --------------------------------| -| Motherboard | GIGABYTE* Z370M DS3H-CF | ASUS* PRIME X299-A II | Intel Corporation
internal/Reference
Validation Platform | -| CPU | Intel® Core™ i7-8700T CPU @ 2.40GHz | Intel® Core™ i9-10920X CPU @ 3.50GHz | 11th Gen Intel® Core™ i7-1185G7 @ 3.00GHz | -| Hyper Threading | ON | ON | ON | -| Turbo Setting | ON | ON | ON | -| Memory | 4 x 16 GB DDR4 2400MHz | 4 x 16 GB DDR4 2666MHz | 2 x 8 GB DDR4 3200MHz | -| Operating System | Ubuntu* 18.04 LTS | Ubuntu* 18.04 LTS | Ubuntu* 18.04 LTS | -| Kernel Version | 5.3.0-24-generic | 5.3.0-24-generic | 5.8.0-05-generic | -| BIOS Vendor | American Megatrends Inc.* | American Megatrends Inc.* | Intel Corporation | -| BIOS Version | F11 | 505 | TGLSFWI1.R00.3425.
A00.2010162309 | -| BIOS Release | March 13, 2019 | December 17, 2019 | October 16, 2020 | -| BIOS Settings | Select optimized default settings,
set OS type to "other",
save & exit | Default Settings | Default Settings | -| Batch size | 1 | 1 | 1 | -| Precision | INT8 | INT8 | INT8 | -| Number of concurrent inference requests |4 | 24 | 4 | -| Test Date | March 15, 2021 | March 15, 2021 | March 15, 2021 | -| Power dissipation, TDP in Watt | [35](https://ark.intel.com/content/www/us/en/ark/products/129948/intel-core-i7-8700t-processor-12m-cache-up-to-4-00-ghz.html#tab-blade-1-0-1) | [165](https://ark.intel.com/content/www/us/en/ark/products/198012/intel-core-i9-10920x-x-series-processor-19-25m-cache-3-50-ghz.html) | [28](https://ark.intel.com/content/www/us/en/ark/products/208664/intel-core-i7-1185g7-processor-12m-cache-up-to-4-80-ghz-with-ipu.html#tab-blade-1-0-1) | -| CPU Price on Mach 15th, 2021, USD
Prices may vary | [303](https://ark.intel.com/content/www/us/en/ark/products/129948/intel-core-i7-8700t-processor-12m-cache-up-to-4-00-ghz.html) | [700](https://ark.intel.com/content/www/us/en/ark/products/198012/intel-core-i9-10920x-x-series-processor-19-25m-cache-3-50-ghz.html) | [426](https://ark.intel.com/content/www/us/en/ark/products/208664/intel-core-i7-1185g7-processor-12m-cache-up-to-4-80-ghz-with-ipu.html#tab-blade-1-0-0) | +| | Intel® Core™ i7-8700T | Intel® Core™ i9-10920X | +| -------------------- | ----------------------------------- |--------------------------------------| +| Motherboard | GIGABYTE* Z370M DS3H-CF | ASUS* PRIME X299-A II | +| CPU | Intel® Core™ i7-8700T CPU @ 2.40GHz | Intel® Core™ i9-10920X CPU @ 3.50GHz | +| Hyper Threading | ON | ON | +| Turbo Setting | ON | ON | +| Memory | 4 x 16 GB DDR4 2400MHz | 4 x 16 GB DDR4 2666MHz | +| Operating System | Ubuntu* 18.04 LTS | Ubuntu* 18.04 LTS | +| Kernel Version | 5.3.0-24-generic | 5.3.0-24-generic | +| BIOS Vendor | American Megatrends Inc.* | American Megatrends Inc.* | +| BIOS Version | F14c | 1004 | +| BIOS Release | March 23, 2021 | March 19, 2021 | +| BIOS Settings | Select optimized default settings,
set OS type to "other",
save & exit | Default Settings | +| Batch size | 1 | 1 | +| Precision | INT8 | INT8 | +| Number of concurrent inference requests |4 | 24 | +| Test Date | June 18, 2021 | June 18, 2021 | +| Rated maximum TDP/socket in Watt | [35](https://ark.intel.com/content/www/us/en/ark/products/129948/intel-core-i7-8700t-processor-12m-cache-up-to-4-00-ghz.html#tab-blade-1-0-1) | [165](https://ark.intel.com/content/www/us/en/ark/products/198012/intel-core-i9-10920x-x-series-processor-19-25m-cache-3-50-ghz.html) | +| CPU Price/socket on June 21, 2021, USD
Prices may vary | [303](https://ark.intel.com/content/www/us/en/ark/products/129948/intel-core-i7-8700t-processor-12m-cache-up-to-4-00-ghz.html) | [700](https://ark.intel.com/content/www/us/en/ark/products/198012/intel-core-i9-10920x-x-series-processor-19-25m-cache-3-50-ghz.html) | +**CPU Inference Engines (continue)** +| | 11th Gen Intel® Core™ i7-1185G7 | 11th Gen Intel® Core™ i7-11850HE | +| -------------------- | --------------------------------|----------------------------------| +| Motherboard | Intel Corporation
internal/Reference
Validation Platform | Intel Corporation
internal/Reference
Validation Platform | +| CPU | 11th Gen Intel® Core™ i7-1185G7 @ 3.00GHz | 11th Gen Intel® Core™ i7-11850HE @ 2.60GHz | +| Hyper Threading | ON | ON | +| Turbo Setting | ON | ON | +| Memory | 2 x 8 GB DDR4 3200MHz | 2 x 16 GB DDR4 3200MHz | +| Operating System | Ubuntu* 18.04 LTS | Ubuntu* 18.04.4 LTS | +| Kernel Version | 5.8.0-05-generic | 5.8.0-050800-generic | +| BIOS Vendor | Intel Corporation | Intel Corporation | +| BIOS Version | TGLSFWI1.R00.3425.
A00.2010162309 | TGLIFUI1.R00.4064.
A01.2102200132 | +| BIOS Release | October 16, 2020 | February 20, 2021 | +| BIOS Settings | Default Settings | Default Settings | +| Batch size | 1 | 1 | +| Precision | INT8 | INT8 | +| Number of concurrent inference requests |4 | 4 | +| Test Date | June 18, 2021 | June 18, 2021 | +| Rated maximum TDP/socket in Watt | [28](https://ark.intel.com/content/www/us/en/ark/products/208664/intel-core-i7-1185g7-processor-12m-cache-up-to-4-80-ghz-with-ipu.html) | [45](https://ark.intel.com/content/www/us/en/ark/products/213799/intel-core-i7-11850h-processor-24m-cache-up-to-4-80-ghz.html) | +| CPU Price/socket on June 21, 2021, USD
Prices may vary | [426](https://ark.intel.com/content/www/us/en/ark/products/208664/intel-core-i7-1185g7-processor-12m-cache-up-to-4-80-ghz-with-ipu.html) | [395](https://ark.intel.com/content/www/us/en/ark/products/213799/intel-core-i7-11850h-processor-24m-cache-up-to-4-80-ghz.html) | **CPU Inference Engines (continue)** -| | Intel® Core™ i5-8500 | Intel® Core™ i5-10500TE | -| -------------------- | ---------------------------------- | ----------------------------------- | -| Motherboard | ASUS* PRIME Z370-A | GIGABYTE* Z490 AORUS PRO AX | -| CPU | Intel® Core™ i5-8500 CPU @ 3.00GHz | Intel® Core™ i5-10500TE CPU @ 2.30GHz | -| Hyper Threading | OFF | ON | -| Turbo Setting | ON | ON | -| Memory | 2 x 16 GB DDR4 2666MHz | 2 x 16 GB DDR4 @ 2666MHz | -| Operating System | Ubuntu* 18.04 LTS | Ubuntu* 18.04 LTS | -| Kernel Version | 5.3.0-24-generic | 5.3.0-24-generic | -| BIOS Vendor | American Megatrends Inc.* | American Megatrends Inc.* | -| BIOS Version | 2401 | F3 | -| BIOS Release | July 12, 2019 | March 25, 2020 | -| BIOS Settings | Select optimized default settings,
save & exit | Select optimized default settings,
set OS type to "other",
save & exit | -| Batch size | 1 | 1 | -| Precision | INT8 | INT8 | -| Number of concurrent inference requests | 3 | 4 | -| Test Date | March 15, 2021 | March 15, 2021 | -| Power dissipation, TDP in Watt | [65](https://ark.intel.com/content/www/us/en/ark/products/129939/intel-core-i5-8500-processor-9m-cache-up-to-4-10-ghz.html#tab-blade-1-0-1)| [35](https://ark.intel.com/content/www/us/en/ark/products/203891/intel-core-i5-10500te-processor-12m-cache-up-to-3-70-ghz.html) | -| CPU Price on Mach 15th, 2021, USD
Prices may vary | [192](https://ark.intel.com/content/www/us/en/ark/products/129939/intel-core-i5-8500-processor-9m-cache-up-to-4-10-ghz.html) | [195](https://ark.intel.com/content/www/us/en/ark/products/203891/intel-core-i5-10500te-processor-12m-cache-up-to-3-70-ghz.html) | +| | Intel® Core™ i3-8100 | Intel® Core™ i5-8500 | Intel® Core™ i5-10500TE | +| -------------------- |----------------------------------- | ---------------------------------- | ----------------------------------- | +| Motherboard | GIGABYTE* Z390 UD | ASUS* PRIME Z370-A | GIGABYTE* Z490 AORUS PRO AX | +| CPU | Intel® Core™ i3-8100 CPU @ 3.60GHz | Intel® Core™ i5-8500 CPU @ 3.00GHz | Intel® Core™ i5-10500TE CPU @ 2.30GHz | +| Hyper Threading | OFF | OFF | ON | +| Turbo Setting | OFF | ON | ON | +| Memory | 4 x 8 GB DDR4 2400MHz | 2 x 16 GB DDR4 2666MHz | 2 x 16 GB DDR4 @ 2666MHz | +| Operating System | Ubuntu* 18.04 LTS | Ubuntu* 18.04 LTS | Ubuntu* 18.04 LTS | +| Kernel Version | 5.3.0-24-generic | 5.3.0-24-generic | 5.3.0-24-generic | +| BIOS Vendor | American Megatrends Inc.* | American Megatrends Inc.* | American Megatrends Inc.* | +| BIOS Version | F8 | 2401 | F3 | +| BIOS Release | May 24, 2019 | July 12, 2019 | March 25, 2020 | +| BIOS Settings | Select optimized default settings,
set OS type to "other",
save & exit | Select optimized default settings,
save & exit | Select optimized default settings,
set OS type to "other",
save & exit | +| Batch size | 1 | 1 | 1 | +| Precision | INT8 | INT8 | INT8 | +| Number of concurrent inference requests | 4 | 3 | 4 | +| Test Date | June 18, 2021 | June 18, 2021 | June 18, 2021 | +| Rated maximum TDP/socket in Watt | [65](https://ark.intel.com/content/www/us/en/ark/products/126688/intel-core-i3-8100-processor-6m-cache-3-60-ghz.html#tab-blade-1-0-1)| [65](https://ark.intel.com/content/www/us/en/ark/products/129939/intel-core-i5-8500-processor-9m-cache-up-to-4-10-ghz.html#tab-blade-1-0-1)| [35](https://ark.intel.com/content/www/us/en/ark/products/203891/intel-core-i5-10500te-processor-12m-cache-up-to-3-70-ghz.html) | +| CPU Price/socket on June 21, 2021, USD
Prices may vary | [117](https://ark.intel.com/content/www/us/en/ark/products/126688/intel-core-i3-8100-processor-6m-cache-3-60-ghz.html) | [192](https://ark.intel.com/content/www/us/en/ark/products/129939/intel-core-i5-8500-processor-9m-cache-up-to-4-10-ghz.html) | [195](https://ark.intel.com/content/www/us/en/ark/products/203891/intel-core-i5-10500te-processor-12m-cache-up-to-3-70-ghz.html) | **CPU Inference Engines (continue)** -| | Intel Atom® x5-E3940 | Intel Atom® x6425RE | Intel® Core™ i3-8100 | -| -------------------- | --------------------------------------|------------------------------- |----------------------------------- | -| Motherboard | | Intel Corporation /
ElkhartLake LPDDR4x T3 CRB | GIGABYTE* Z390 UD | -| CPU | Intel Atom® Processor E3940 @ 1.60GHz | Intel Atom® x6425RE
Processor @ 1.90GHz | Intel® Core™ i3-8100 CPU @ 3.60GHz | -| Hyper Threading | OFF | OFF | OFF | -| Turbo Setting | ON | ON | OFF | -| Memory | 1 x 8 GB DDR3 1600MHz | 2 x 4GB DDR4 3200 MHz | 4 x 8 GB DDR4 2400MHz | -| Operating System | Ubuntu* 18.04 LTS | Ubuntu* 18.04 LTS | Ubuntu* 18.04 LTS | -| Kernel Version | 5.3.0-24-generic | 5.8.0-050800-generic | 5.3.0-24-generic | -| BIOS Vendor | American Megatrends Inc.* | Intel Corporation | American Megatrends Inc.* | -| BIOS Version | 5.12 | EHLSFWI1.R00.2463.
A03.2011200425 | F8 | -| BIOS Release | September 6, 2017 | November 22, 2020 | May 24, 2019 | -| BIOS Settings | Default settings | Default settings | Select optimized default settings,
set OS type to "other",
save & exit | -| Batch size | 1 | 1 | 1 | -| Precision | INT8 | INT8 | INT8 | -| Number of concurrent inference requests | 4 | 4 | 4 | -| Test Date | March 15, 2021 | March 15, 2021 | March 15, 2021 | -| Power dissipation, TDP in Watt | [9.5](https://ark.intel.com/content/www/us/en/ark/products/96485/intel-atom-x5-e3940-processor-2m-cache-up-to-1-80-ghz.html) | [12](https://ark.intel.com/content/www/us/en/ark/products/207899/intel-atom-x6425re-processor-1-5m-cache-1-90-ghz.html) | [65](https://ark.intel.com/content/www/us/en/ark/products/126688/intel-core-i3-8100-processor-6m-cache-3-60-ghz.html#tab-blade-1-0-1)| -| CPU Price, USD
Prices may vary | [34](https://ark.intel.com/content/www/us/en/ark/products/96485/intel-atom-x5-e3940-processor-2m-cache-up-to-1-80-ghz.html) (on March 15th, 2021) | [59](https://ark.intel.com/content/www/us/en/ark/products/207899/intel-atom-x6425re-processor-1-5m-cache-1-90-ghz.html) (on March 26th, 2021) | [117](https://ark.intel.com/content/www/us/en/ark/products/126688/intel-core-i3-8100-processor-6m-cache-3-60-ghz.html) (on March 15th, 2021) | +| | Intel Atom® x5-E3940 | Intel Atom® x6425RE | Intel® Celeron® 6305E | +| -------------------- | --------------------------------------|------------------------------- |----------------------------------| +| Motherboard | Intel Corporation
internal/Reference
Validation Platform | Intel Corporation
internal/Reference
Validation Platform | Intel Corporation
internal/Reference
Validation Platform | +| CPU | Intel Atom® Processor E3940 @ 1.60GHz | Intel Atom® x6425RE
Processor @ 1.90GHz | Intel® Celeron®
6305E @ 1.80GHz | +| Hyper Threading | OFF | OFF | OFF | +| Turbo Setting | ON | ON | ON | +| Memory | 1 x 8 GB DDR3 1600MHz | 2 x 4GB DDR4 3200MHz | 2 x 8 GB DDR4 3200MHz | +| Operating System | Ubuntu* 18.04 LTS | Ubuntu* 18.04 LTS | Ubuntu 18.04.5 LTS | +| Kernel Version | 5.3.0-24-generic | 5.8.0-050800-generic | 5.8.0-050800-generic | +| BIOS Vendor | American Megatrends Inc.* | Intel Corporation | Intel Corporation | +| BIOS Version | 5.12 | EHLSFWI1.R00.2463.
A03.2011200425 | TGLIFUI1.R00.4064.A02.2102260133 | +| BIOS Release | September 6, 2017 | November 22, 2020 | February 26, 2021 | +| BIOS Settings | Default settings | Default settings | Default settings | +| Batch size | 1 | 1 | 1 | +| Precision | INT8 | INT8 | INT8 | +| Number of concurrent inference requests | 4 | 4 | 4| +| Test Date | June 18, 2021 | June 18, 2021 | June 18, 2021 | +| Rated maximum TDP/socket in Watt | [9.5](https://ark.intel.com/content/www/us/en/ark/products/96485/intel-atom-x5-e3940-processor-2m-cache-up-to-1-80-ghz.html) | [12](https://ark.intel.com/content/www/us/en/ark/products/207899/intel-atom-x6425re-processor-1-5m-cache-1-90-ghz.html) | [15](https://ark.intel.com/content/www/us/en/ark/products/208072/intel-celeron-6305e-processor-4m-cache-1-80-ghz.html)| +| CPU Price/socket on June 21, 2021, USD
Prices may vary | [34](https://ark.intel.com/content/www/us/en/ark/products/96485/intel-atom-x5-e3940-processor-2m-cache-up-to-1-80-ghz.html) | [59](https://ark.intel.com/content/www/us/en/ark/products/207899/intel-atom-x6425re-processor-1-5m-cache-1-90-ghz.html) |[107](https://ark.intel.com/content/www/us/en/ark/products/208072/intel-celeron-6305e-processor-4m-cache-1-80-ghz.html) | @@ -239,8 +264,8 @@ Testing by Intel done on: see test date for each HW platform below. | Batch size | 1 | 1 | | Precision | FP16 | FP16 | | Number of concurrent inference requests | 4 | 32 | -| Power dissipation, TDP in Watt | 2.5 | [30](https://www.arrow.com/en/products/mustang-v100-mx8-r10/iei-technology?gclid=Cj0KCQiA5bz-BRD-ARIsABjT4ng1v1apmxz3BVCPA-tdIsOwbEjTtqnmp_rQJGMfJ6Q2xTq6ADtf9OYaAhMUEALw_wcB) | -| CPU Price, USD
Prices may vary | [69](https://ark.intel.com/content/www/us/en/ark/products/140109/intel-neural-compute-stick-2.html) (from March 15, 2021) | [1180](https://www.arrow.com/en/products/mustang-v100-mx8-r10/iei-technology?gclid=Cj0KCQiA5bz-BRD-ARIsABjT4ng1v1apmxz3BVCPA-tdIsOwbEjTtqnmp_rQJGMfJ6Q2xTq6ADtf9OYaAhMUEALw_wcB) (from March 15, 2021) | +| Rated maximum TDP/socket in Watt | 2.5 | [30](https://www.arrow.com/en/products/mustang-v100-mx8-r10/iei-technology?gclid=Cj0KCQiA5bz-BRD-ARIsABjT4ng1v1apmxz3BVCPA-tdIsOwbEjTtqnmp_rQJGMfJ6Q2xTq6ADtf9OYaAhMUEALw_wcB) | +| CPU Price/socket on June 21, 2021, USD
Prices may vary | [69](https://ark.intel.com/content/www/us/en/ark/products/140109/intel-neural-compute-stick-2.html) | [425](https://www.arrow.com/en/products/mustang-v100-mx8-r10/iei-technology?gclid=Cj0KCQiA5bz-BRD-ARIsABjT4ng1v1apmxz3BVCPA-tdIsOwbEjTtqnmp_rQJGMfJ6Q2xTq6ADtf9OYaAhMUEALw_wcB) | | Host Computer | Intel® Core™ i7 | Intel® Core™ i5 | | Motherboard | ASUS* Z370-A II | Uzelinfo* / US-E1300 | | CPU | Intel® Core™ i7-8700 CPU @ 3.20GHz | Intel® Core™ i5-6600 CPU @ 3.30GHz | @@ -252,9 +277,9 @@ Testing by Intel done on: see test date for each HW platform below. | BIOS Vendor | American Megatrends Inc.* | American Megatrends Inc.* | | BIOS Version | 411 | 5.12 | | BIOS Release | September 21, 2018 | September 21, 2018 | -| Test Date | March 15, 2021 | March 15, 2021 | +| Test Date | June 18, 2021 | June 18, 2021 | -Please follow this link for more detailed configuration descriptions: [Configuration Details](https://docs.openvinotoolkit.org/resources/benchmark_files/system_configurations_2021.3.html) +Please follow this link for more detailed configuration descriptions: [Configuration Details](https://docs.openvinotoolkit.org/resources/benchmark_files/system_configurations_2021.4.html) \htmlonly