diff --git a/cmake/developer_package/download/dependency_solver.cmake b/cmake/developer_package/download/dependency_solver.cmake
index 9038f610035860..2f2ab192d3e82d 100644
--- a/cmake/developer_package/download/dependency_solver.cmake
+++ b/cmake/developer_package/download/dependency_solver.cmake
@@ -176,9 +176,9 @@ function(reset_deps_cache)
foreach(var_name IN LISTS ARGN)
unset(${var_name} CACHE)
endforeach()
- # foreach(var_name IN LISTS ARGN)
- # unset(ENV{${var_name}})
- # endforeach()
+ foreach(var_name IN LISTS ARGN)
+ unset(ENV{${var_name}})
+ endforeach()
endif()
endfunction()
diff --git a/docs/install_guides/pypi-openvino-dev.md b/docs/install_guides/pypi-openvino-dev.md
index 7164c2cdf555c8..f04bdf3a21f8c0 100644
--- a/docs/install_guides/pypi-openvino-dev.md
+++ b/docs/install_guides/pypi-openvino-dev.md
@@ -1,7 +1,7 @@
# Intel® Distribution of OpenVINO™ Toolkit Developer Package
-
+Copyright © 2018-2021 Intel Corporation
> **LEGAL NOTICE**: Your use of this software and any required dependent software (the
-“Software Package”) is subject to the terms and conditions of the [software license agreements](https://software.intel.com/en-us/license/eula-for-intel-software-development-products) for the Software Package, which may also include notices, disclaimers, or
+“Software Package”) is subject to the terms and conditions of the [software license agreements](https://software.intel.com/content/dam/develop/external/us/en/documents/intel-openvino-license-agreements.pdf) for the Software Package, which may also include notices, disclaimers, or
license terms for third party or open source software included in or with the Software Package, and your use indicates your acceptance of all such terms. Please refer to the “third-party-programs.txt” or other similarly-named text file included with the Software Package for additional details.
## Introduction
@@ -40,11 +40,7 @@ The table below lists the supported operating systems and Python* versions requi
## Install the Developer Package
-### Step 1. Install External Software Dependencies
-
-On Windows* OS you are required to install [Microsoft* Visual C++ Redistributable Package (x64)](https://visualstudio.microsoft.com/downloads/#microsoft-visual-c-redistributable-for-visual-studio-2019) to be able to run OpenVINO™ applications.
-
-### Step 2. Set Up Python Virtual Environment
+### Step 1. Set Up Python Virtual Environment
To avoid dependency conflicts, use a virtual environment. Skip this
step only if you do want to install all dependencies globally.
@@ -62,7 +58,7 @@ On Windows:
python -m venv openvino_env
```
-### Step 3. Activate Virtual Environment
+### Step 2. Activate Virtual Environment
On Linux and macOS:
```sh
@@ -73,14 +69,14 @@ On Windows:
openvino_env\Scripts\activate
```
-### Step 4. Set Up and Update pip to the Highest Version
+### Step 3. Set Up and Update PIP to the Highest Version
Run the command below:
```sh
python -m pip install --upgrade pip
```
-### Step 5. Install the Package
+### Step 4. Install the Package
Run the command below:
@@ -88,7 +84,7 @@ Run the command below:
pip install openvino-dev
```
-### Step 6. Verify that the Package is Installed
+### Step 5. Verify that the Package is Installed
Run the command below (this may take a few seconds):
```sh
@@ -97,6 +93,19 @@ pot -h
You will see the help message for Post-Training Optimization Tool if installation finished successfully.
+## Troubleshooting
+
+#### Error: Microsoft Visual C++ 14.0 is required. Get it with "Build Tools for Visual Studio"
+
+On Windows* some dependencies may require compilation from source when installing. To resolve this issue, you need to install [Build Tools for Visual Studio* 2019](https://visualstudio.microsoft.com/downloads/#build-tools-for-visual-studio-2019) and repeat package installation.
+
+#### ImportError: libpython3.7m.so.1.0: cannot open shared object file: No such file or directory
+
+To resolve missing external dependency on Ubuntu*, execute the following command:
+```sh
+sudo apt-get install libpython3.7
+```
+
## Additional Resources
- Intel® Distribution of OpenVINO™ toolkit home page: [https://software.intel.com/en-us/openvino-toolkit](https://software.intel.com/en-us/openvino-toolkit)
diff --git a/docs/install_guides/pypi-openvino-rt.md b/docs/install_guides/pypi-openvino-rt.md
index cfe95281fdfc30..6e22d74157cd28 100644
--- a/docs/install_guides/pypi-openvino-rt.md
+++ b/docs/install_guides/pypi-openvino-rt.md
@@ -1,7 +1,7 @@
# Intel® Distribution of OpenVINO™ Toolkit Runtime Package
-
+Copyright © 2018-2021 Intel Corporation
> **LEGAL NOTICE**: Your use of this software and any required dependent software (the
-“Software Package”) is subject to the terms and conditions of the [software license agreements](https://software.intel.com/en-us/license/eula-for-intel-software-development-products) for the Software Package, which may also include notices, disclaimers, or
+“Software Package”) is subject to the terms and conditions of the [software license agreements](https://software.intel.com/content/dam/develop/external/us/en/documents/intel-openvino-license-agreements.pdf) for the Software Package, which may also include notices, disclaimers, or
license terms for third party or open source software included in or with the Software Package, and your use indicates your acceptance of all such terms. Please refer to the “third-party-programs.txt” or other similarly-named text file included with the Software Package for additional details.
## Introduction
@@ -37,11 +37,7 @@ The table below lists supported operating systems and Python* versions required
## Install the Runtime Package
-### Step 1. Install External Software Dependencies
-
-On Windows* OS you are required to install [Microsoft* Visual C++ Redistributable Package (x64)](https://visualstudio.microsoft.com/downloads/#microsoft-visual-c-redistributable-for-visual-studio-2019) to be able to run OpenVINO™ applications.
-
-### Step 2. Set Up Python Virtual Environment
+### Step 1. Set Up Python Virtual Environment
To avoid dependency conflicts, use a virtual environment. Skip this
step only if you do want to install all dependencies globally.
@@ -55,7 +51,7 @@ python -m venv openvino_env
> **NOTE**: On Linux and macOS, you may need to type `python3` instead of
`python`. You may also need to [install pip](https://pip.pypa.io/en/stable/installing/).
-### Step 3. Activate Virtual Environment
+### Step 2. Activate Virtual Environment
On Linux and macOS:
```sh
@@ -66,14 +62,14 @@ On Windows:
openvino_env\Scripts\activate
```
-### Step 4. Set Up and Update pip to the Highest Version
+### Step 3. Set Up and Update PIP to the Highest Version
Run the command below:
```sh
python -m pip install --upgrade pip
```
-### Step 5. Install the Package
+### Step 4. Install the Package
Run the command below:
@@ -81,7 +77,7 @@ Run the command below:
pip install openvino
```
-### Step 6. Verify that the Package is Installed
+### Step 5. Verify that the Package is Installed
Run the command below:
```sh
@@ -90,6 +86,19 @@ python -c "from openvino.inference_engine import IECore"
You will not see any error messages if installation finished successfully.
+## Troubleshooting
+
+#### Error: Microsoft Visual C++ 14.0 is required. Get it with "Build Tools for Visual Studio"
+
+On Windows* some dependencies may require compilation from source when installing. To resolve this issue, you need to install [Build Tools for Visual Studio* 2019](https://visualstudio.microsoft.com/downloads/#build-tools-for-visual-studio-2019) and repeat package installation.
+
+#### ImportError: libpython3.7m.so.1.0: cannot open shared object file: No such file or directory
+
+To resolve missing external dependency on Ubuntu*, execute the following command:
+```sh
+sudo apt-get install libpython3.7
+```
+
## Additional Resources
- [Intel® Distribution of OpenVINO™ toolkit](https://software.intel.com/en-us/openvino-toolkit).
diff --git a/docs/ops/normalization/BatchNormInference_1.md b/docs/ops/normalization/BatchNormInference_1.md
index 218111575bd91d..694a9989e9f0fb 100644
--- a/docs/ops/normalization/BatchNormInference_1.md
+++ b/docs/ops/normalization/BatchNormInference_1.md
@@ -58,7 +58,7 @@ For a particular activation, consider a mini-batch \f$\mathcal{B}\f$ of m values
* *epsilon*
* **Description**: *epsilon* is a constant added to the variance for numerical stability.
- * **Range of values**: a positive floating-point number
+ * **Range of values**: a floating-point number greater than or equal to zero
* **Type**: `float`
* **Default value**: none
* **Required**: *yes*
diff --git a/docs/ops/normalization/BatchNormInference_5.md b/docs/ops/normalization/BatchNormInference_5.md
index cec26e4b2ecf16..f5019d08b2d37e 100644
--- a/docs/ops/normalization/BatchNormInference_5.md
+++ b/docs/ops/normalization/BatchNormInference_5.md
@@ -58,7 +58,7 @@ For a particular activation, consider a mini-batch \f$\mathcal{B}\f$ of m values
* *epsilon*
* **Description**: *epsilon* is a constant added to the variance for numerical stability.
- * **Range of values**: a positive floating-point number
+ * **Range of values**: a floating-point number greater than or equal to zero
* **Type**: `float`
* **Default value**: none
* **Required**: *yes*
diff --git a/inference-engine/cmake/dependencies.cmake b/inference-engine/cmake/dependencies.cmake
index 0a0770f06ecfb0..4ce1ef3136550e 100644
--- a/inference-engine/cmake/dependencies.cmake
+++ b/inference-engine/cmake/dependencies.cmake
@@ -261,8 +261,8 @@ if (ENABLE_GNA)
set(GNA_HASH "cc954e67525006bf8bd353a6682e38bf208f6d74e973e0fc292850e721f17452")
endif()
if(GNA_LIBRARY_VERSION STREQUAL "GNA2")
- set(GNA_VERSION "02.00.00.1191.0")
- set(GNA_HASH "a61b4a9133549b0a9f0b46d069f72906ced28bcbbe7d5c361e687645f53a1c8b")
+ set(GNA_VERSION "02.00.00.1226")
+ set(GNA_HASH "d5450af15c993e264c25ac4591a7dab44722e10d15fca4f222a1b84429d4e5b6")
endif()
set(FILES_TO_EXTRACT_LIST gna_${GNA_VERSION}/include)
diff --git a/inference-engine/cmake/ie_parallel.cmake b/inference-engine/cmake/ie_parallel.cmake
index 958ea9b23a74ca..a4960ce3430d8d 100644
--- a/inference-engine/cmake/ie_parallel.cmake
+++ b/inference-engine/cmake/ie_parallel.cmake
@@ -25,9 +25,9 @@ function(set_ie_threading_interface_for TARGET_NAME)
else()
find_dependency(TBB COMPONENTS tbb tbbmalloc)
endif()
- set("TBB_FOUND" ${TBB_FOUND} PARENT_SCOPE)
- set("TBB_IMPORTED_TARGETS" ${TBB_IMPORTED_TARGETS} PARENT_SCOPE)
- set("TBB_VERSION" ${TBB_VERSION} PARENT_SCOPE)
+ set(TBB_FOUND ${TBB_FOUND} PARENT_SCOPE)
+ set(TBB_IMPORTED_TARGETS ${TBB_IMPORTED_TARGETS} PARENT_SCOPE)
+ set(TBB_VERSION ${TBB_VERSION} PARENT_SCOPE)
if (NOT TBB_FOUND)
ext_message(WARNING "TBB was not found by the configured TBB_DIR/TBBROOT path.\
SEQ method will be used.")
diff --git a/inference-engine/include/ie_blob.h b/inference-engine/include/ie_blob.h
index dbe264c054de7f..db7c29c950877c 100644
--- a/inference-engine/include/ie_blob.h
+++ b/inference-engine/include/ie_blob.h
@@ -799,6 +799,7 @@ class TBlob : public MemoryBlob {
}
};
+#ifdef __clang__
extern template class INFERENCE_ENGINE_API_CLASS(InferenceEngine::TBlob);
extern template class INFERENCE_ENGINE_API_CLASS(InferenceEngine::TBlob);
extern template class INFERENCE_ENGINE_API_CLASS(InferenceEngine::TBlob);
@@ -813,6 +814,7 @@ extern template class INFERENCE_ENGINE_API_CLASS(InferenceEngine::TBlob);
extern template class INFERENCE_ENGINE_API_CLASS(InferenceEngine::TBlob);
extern template class INFERENCE_ENGINE_API_CLASS(InferenceEngine::TBlob);
+#endif // __clang__
/**
* @brief Creates a blob with the given tensor descriptor.
diff --git a/inference-engine/src/cldnn_engine/cldnn_engine.cpp b/inference-engine/src/cldnn_engine/cldnn_engine.cpp
index 0bea81efacea19..4aa53beb1e5a86 100644
--- a/inference-engine/src/cldnn_engine/cldnn_engine.cpp
+++ b/inference-engine/src/cldnn_engine/cldnn_engine.cpp
@@ -70,6 +70,7 @@
#include
#include
#include
+#include
#include
#include
#include
@@ -381,6 +382,9 @@ InferenceEngine::CNNNetwork clDNNEngine::CloneAndTransformNetwork(const Inferenc
.add(LayerTransformation::Params(params)
.setSupportAsymmetricQuantization(false)
.setSupport3DTensorOnActivations(false))
+ .add(LayerTransformation::Params(params)
+ .setSupportAsymmetricQuantization(false)
+ .setDeconvolutionSpecificChannelsRatio(true))
// INT8 StridedSlice not supported
.remove());
diff --git a/inference-engine/src/gna_plugin/backend/am_intel_dnn.cpp b/inference-engine/src/gna_plugin/backend/am_intel_dnn.cpp
index 728efcdd684687..f6bc926a35ada6 100644
--- a/inference-engine/src/gna_plugin/backend/am_intel_dnn.cpp
+++ b/inference-engine/src/gna_plugin/backend/am_intel_dnn.cpp
@@ -1784,7 +1784,7 @@ void GNAPluginNS::backend::AMIntelDNN::InitGNAStruct(intel_nnet_type_t *ptr_nnet
|| (component[i - 1].operation == kDnnConvolutional1dOp)
|| (component[i - 1].operation == kDnnConvolutional2dOp)
|| ((component[i - 1].operation == kDnnMaxPoolOp) &&
- (component[i - 2].operation == kDnnConvolutional1dOp))) {
+ (component[i - 2].operation == kDnnConvolutional1dOp || component[i - 2].operation == kDnnConvolutional2dOp))) {
if (gnaOperation->Operands[PwlOpIdx] == nullptr) {
HelperGna2OperationSetOperand(gnaOperation, gnaUserAllocator, gnaUserFree, PwlOpIdx, createGna2TensorPwl(1, nullptr));
}
diff --git a/inference-engine/src/gna_plugin/backend/gna_limitations.cpp b/inference-engine/src/gna_plugin/backend/gna_limitations.cpp
index 98257eb3687939..cef6e26537a29c 100644
--- a/inference-engine/src/gna_plugin/backend/gna_limitations.cpp
+++ b/inference-engine/src/gna_plugin/backend/gna_limitations.cpp
@@ -31,7 +31,7 @@ bool RangeLimit2D::isValid(const uint32_t h, const uint32_t w) const {
}
std::string RangeLimit2D::GetErrorOrEmpty(const uint32_t h, const uint32_t w) const {
- return hLimit.GetErrorOrEmpty(h) + hLimit.GetErrorOrEmpty(w);
+ return hLimit.GetErrorOrEmpty(h) + wLimit.GetErrorOrEmpty(w);
}
RangeMultipleLimit::RangeMultipleLimit(RangeLimit rlIn, uint32_t multiplierIn) : RangeLimit(rlIn), multiplier(multiplierIn) {
diff --git a/inference-engine/src/gna_plugin/gna_device.cpp b/inference-engine/src/gna_plugin/gna_device.cpp
index 01e0d7e80befb9..cbfc47f57aab0b 100644
--- a/inference-engine/src/gna_plugin/gna_device.cpp
+++ b/inference-engine/src/gna_plugin/gna_device.cpp
@@ -156,24 +156,42 @@ void GNADeviceHelper::releaseModel(const uint32_t model_id) {
}
bool GNADeviceHelper::enforceLegacyCnnNeeded() const {
- auto devVersion = getExecutionTargetDevice();
- return isGnaLibVersion2_1 && isUpTo20HwGnaDevice(devVersion);
+ const auto compileTargetDevice = getTargetDevice(false);
+ return isGnaLibVersion2_1 && isUpTo20HwGnaDevice(compileTargetDevice);
}
-Gna2DeviceVersion GNADeviceHelper::getExecutionTargetDevice() const {
+namespace {
const volatile auto Gna2DeviceVersion3_0 = static_cast(0x30);
- if (executionTarget.empty()) {
- if (detectedGnaDevVersion == Gna2DeviceVersionSoftwareEmulation)
- return isGnaLibVersion2_1 ? Gna2DeviceVersion3_0 : Gna2DeviceVersion2_0;
- return detectedGnaDevVersion;
- } else if (executionTarget == InferenceEngine::GNAConfigParams::GNA_TARGET_3_0) {
+} // namespace
+
+Gna2DeviceVersion GNADeviceHelper::parseDeclaredTarget(std::string target, const bool execTarget) const {
+ auto parsed = Gna2DeviceVersion2_0;
+ auto throwUnsupportedGnaTarget = [&](std::string extraSuffix) {
+ auto key = execTarget ? InferenceEngine::GNAConfigParams::KEY_GNA_EXEC_TARGET : InferenceEngine::GNAConfigParams::KEY_GNA_COMPILE_TARGET;
+ THROW_GNA_EXCEPTION << "Unsupported " << key << " = \"" << target << "\"" << extraSuffix;
+ };
+ if (target == InferenceEngine::GNAConfigParams::GNA_TARGET_3_0) {
if (!isGnaLibVersion2_1)
- THROW_GNA_EXCEPTION << "Unsupported GNA execution target " << executionTarget << " when GNA Library version is 2.0.X.Y";
- return Gna2DeviceVersion3_0;
- } else if (executionTarget == InferenceEngine::GNAConfigParams::GNA_TARGET_2_0) {
- return Gna2DeviceVersion2_0;
+ throwUnsupportedGnaTarget(", when GNA Library version is 2.0.X.Y");
+ parsed = Gna2DeviceVersion3_0;
+ } else if (target != InferenceEngine::GNAConfigParams::GNA_TARGET_2_0) {
+ throwUnsupportedGnaTarget("");
}
- THROW_GNA_EXCEPTION << "Unknown execution target: \"" << executionTarget << "\"";
+ return parsed;
+}
+
+Gna2DeviceVersion GNADeviceHelper::getDefaultTarget() const {
+ if (detectedGnaDevVersion == Gna2DeviceVersionSoftwareEmulation)
+ return isGnaLibVersion2_1 ? Gna2DeviceVersion3_0 : Gna2DeviceVersion2_0;
+ return detectedGnaDevVersion;
+}
+
+Gna2DeviceVersion GNADeviceHelper::getTargetDevice(const bool execTarget) const {
+ const auto declared = execTarget ? executionTarget : compileTarget;
+ if (declared.empty()) {
+ return execTarget ? getDefaultTarget() : getTargetDevice(true);
+ }
+ return parseDeclaredTarget(declared, execTarget);
}
uint32_t GNADeviceHelper::createRequestConfig(const uint32_t model_id) {
@@ -186,7 +204,7 @@ uint32_t GNADeviceHelper::createRequestConfig(const uint32_t model_id) {
// (bit exactly) as on the selected GNA execution target generation.
// See the GNA Plugin's GNA_EXEC_TARGET config option description.
if (swExactMode) {
- const auto consistentDevice = getExecutionTargetDevice();
+ const auto consistentDevice = getTargetDevice(true);
status = Gna2RequestConfigEnableHardwareConsistency(reqConfId, consistentDevice);
checkGna2Status(status, "Gna2RequestConfigEnableHardwareConsistency(" + std::to_string(static_cast(consistentDevice)) + ")");
}
diff --git a/inference-engine/src/gna_plugin/gna_device.hpp b/inference-engine/src/gna_plugin/gna_device.hpp
index 831b9fde517352..e032e5532dafc3 100644
--- a/inference-engine/src/gna_plugin/gna_device.hpp
+++ b/inference-engine/src/gna_plugin/gna_device.hpp
@@ -145,7 +145,6 @@ class GNADeviceHelper {
return dev <= Gna2DeviceVersion2_0 && isGnaHw(dev);
}
bool enforceLegacyCnnNeeded() const;
- Gna2DeviceVersion getExecutionTargetDevice() const;
static void checkGna2Status(Gna2Status status, const std::string& from);
static void checkGna2Status(Gna2Status status, const Gna2Model& gnaModel);
#endif
@@ -197,6 +196,9 @@ class GNADeviceHelper {
static const std::map , const std::string > operandTypes;
static void enforceLegacyCnns(Gna2Model& gnaModel);
+ Gna2DeviceVersion parseDeclaredTarget(std::string target, const bool execTarget) const;
+ Gna2DeviceVersion getDefaultTarget() const;
+ Gna2DeviceVersion getTargetDevice(bool execTarget) const;
#endif
void setOMPThreads(uint8_t const n_threads);
diff --git a/inference-engine/src/gna_plugin/gna_graph_compiler.cpp b/inference-engine/src/gna_plugin/gna_graph_compiler.cpp
index b085dcef7f46c3..2dcac40afc6a65 100644
--- a/inference-engine/src/gna_plugin/gna_graph_compiler.cpp
+++ b/inference-engine/src/gna_plugin/gna_graph_compiler.cpp
@@ -1027,13 +1027,8 @@ void GNAGraphCompiler::ConcatPrimitive(InferenceEngine::CNNLayerPtr layer) {
auto layerInfo = LayerInfo(concatParent);
// auto layerInfo = LayerInfo(getCreatorLayer(concatLayerInput->insData[it].lock()).lock());
if (layerInfo.isInput()) {
- auto & bytesAllocated = inputDesc->bytes_allocated_for_input[((InferenceEngine::CNNLayerPtr)layerInfo)->name];
-
connectInput(layer, &concatLayerInfo.gna_ptr,
- concatLayerInfo.reserved_size, inputLayer.offset, idx, false);
-
- // TODO: currently connectInput api accept only total size, for concat we need extension for allocated, and actual sizes
- bytesAllocated = inputLayer.tensorSize;
+ inputLayer.tensorSize, inputLayer.offset, idx, false);
concatLayerInfo.input_allocated = true;
} else if (layerInfo.isMemory()) {
diff --git a/inference-engine/src/gna_plugin/gna_plugin.cpp b/inference-engine/src/gna_plugin/gna_plugin.cpp
index 9832d59d527a70..1b1019767f242f 100644
--- a/inference-engine/src/gna_plugin/gna_plugin.cpp
+++ b/inference-engine/src/gna_plugin/gna_plugin.cpp
@@ -54,6 +54,7 @@
#include
#include
#include
+#include
#include "transformations/remove_extra_reshapes.hpp"
@@ -662,6 +663,7 @@ void GNAPlugin::LoadNetwork(CNNNetwork & _network) {
manager.register_pass();
// WA: ConvertPriorBox must be executed before the 1st ConstantFolding pass
manager.register_pass();
+ manager.register_pass();
manager.register_pass();
manager.register_pass();
manager.register_pass();
diff --git a/inference-engine/src/gna_plugin/optimizer/gna_pass_manager.cpp b/inference-engine/src/gna_plugin/optimizer/gna_pass_manager.cpp
index 52bc0d1d43fe63..35c9d2206a642e 100644
--- a/inference-engine/src/gna_plugin/optimizer/gna_pass_manager.cpp
+++ b/inference-engine/src/gna_plugin/optimizer/gna_pass_manager.cpp
@@ -1189,7 +1189,7 @@ void InsertConcatAligningFilterPass::run() {
getCreatorLayer(outData) = filterWithQuant;
filterWithQuant->outData.push_back(outData);
- CNNNetworkInsertLayer(prevLayer, l, filterWithQuant);
+ CNNNetworkInsertLayer(prevLayer, l, filterWithQuant, invalid_data_idx, input_idx);
}
offset += outputSize;
}
diff --git a/inference-engine/src/inference_engine/CMakeLists.txt b/inference-engine/src/inference_engine/CMakeLists.txt
index 99dfa1b64010f2..68e0f131721af4 100644
--- a/inference-engine/src/inference_engine/CMakeLists.txt
+++ b/inference-engine/src/inference_engine/CMakeLists.txt
@@ -201,7 +201,6 @@ if(WIN32)
endif()
target_link_libraries(${TARGET_NAME}_s PRIVATE openvino::itt ${CMAKE_DL_LIBS} ${NGRAPH_LIBRARIES}
- inference_engine_snippets
inference_engine_transformations pugixml)
target_compile_definitions(${TARGET_NAME}_s PUBLIC USE_STATIC_IE)
diff --git a/inference-engine/src/inference_engine/ie_common.cpp b/inference-engine/src/inference_engine/ie_common.cpp
index c10c7a6c7bc3e4..effee536d59993 100644
--- a/inference-engine/src/inference_engine/ie_common.cpp
+++ b/inference-engine/src/inference_engine/ie_common.cpp
@@ -124,19 +124,19 @@ TBlob::~TBlob() {
free();
}
-template class TBlob;
-template class TBlob;
-template class TBlob;
-template class TBlob;
-template class TBlob;
-template class TBlob;
-template class TBlob;
-template class TBlob;
-template class TBlob;
-template class TBlob;
-template class TBlob;
-template class TBlob;
-template class TBlob;
-template class TBlob;
+template class INFERENCE_ENGINE_API_CLASS(TBlob);
+template class INFERENCE_ENGINE_API_CLASS(TBlob);
+template class INFERENCE_ENGINE_API_CLASS(TBlob);
+template class INFERENCE_ENGINE_API_CLASS(TBlob);
+template class INFERENCE_ENGINE_API_CLASS(TBlob);
+template class INFERENCE_ENGINE_API_CLASS(TBlob);
+template class INFERENCE_ENGINE_API_CLASS(TBlob);
+template class INFERENCE_ENGINE_API_CLASS(TBlob);
+template class INFERENCE_ENGINE_API_CLASS(TBlob);
+template class INFERENCE_ENGINE_API_CLASS(TBlob);
+template class INFERENCE_ENGINE_API_CLASS(TBlob);
+template class INFERENCE_ENGINE_API_CLASS(TBlob);
+template class INFERENCE_ENGINE_API_CLASS(TBlob);
+template class INFERENCE_ENGINE_API_CLASS(TBlob);
} // namespace InferenceEngine
diff --git a/inference-engine/src/legacy_api/CMakeLists.txt b/inference-engine/src/legacy_api/CMakeLists.txt
index b03c329ca44cf3..ca65d596e60868 100644
--- a/inference-engine/src/legacy_api/CMakeLists.txt
+++ b/inference-engine/src/legacy_api/CMakeLists.txt
@@ -40,7 +40,6 @@ target_include_directories(${TARGET_NAME}_obj PRIVATE
${PUBLIC_HEADERS_DIR}
${CMAKE_CURRENT_SOURCE_DIR}/src
${IE_MAIN_SOURCE_DIR}/src/inference_engine # For CNNNetworkNGraphImpl
- $
$
$
$
@@ -61,7 +60,7 @@ add_library(${TARGET_NAME} SHARED
ie_add_vs_version_file(NAME ${TARGET_NAME}
FILEDESCRIPTION "Inference Engine Legacy library")
-target_link_libraries(${TARGET_NAME} PUBLIC inference_engine inference_engine_snippets
+target_link_libraries(${TARGET_NAME} PUBLIC inference_engine
PRIVATE pugixml openvino::itt
${NGRAPH_LIBRARIES} inference_engine_transformations)
diff --git a/inference-engine/src/legacy_api/src/convert_function_to_cnn_network.cpp b/inference-engine/src/legacy_api/src/convert_function_to_cnn_network.cpp
index 0b66531044a62b..6c76ac47e0222a 100644
--- a/inference-engine/src/legacy_api/src/convert_function_to_cnn_network.cpp
+++ b/inference-engine/src/legacy_api/src/convert_function_to_cnn_network.cpp
@@ -39,7 +39,6 @@
#include "legacy/ngraph_ops/rnn_sequence_ie.hpp"
#include "legacy/ngraph_ops/lstm_sequence_ie.hpp"
#include "legacy/ngraph_ops/gru_sequence_ie.hpp"
-#include "snippets/op/subgraph.hpp"
#include "exec_graph_info.hpp"
#include "caseless.hpp"
@@ -1979,15 +1978,6 @@ void convertFunctionToICNNNetwork(const std::shared_ptrparams[ExecGraphInfoSerialization::ORIGINAL_NAMES] = originalNames;
}
- if (auto subgraph = ::ngraph::as_type_ptr(layer)) {
- std::string names = "";
- for (const auto& op : subgraph->get_body()->get_ordered_ops()) {
- names += ", " + op->get_friendly_name();
- }
-
- cnnLayer->params["originalLayersNames"] += names;
- }
-
std::string primitivesPriority = ::ngraph::getPrimitivesPriority(layer);
if (!primitivesPriority.empty()) {
cnnLayer->params["PrimitivesPriority"] = primitivesPriority;
diff --git a/inference-engine/src/low_precision_transformations/include/low_precision/convolution_backprop_data.hpp b/inference-engine/src/low_precision_transformations/include/low_precision/convolution_backprop_data.hpp
new file mode 100644
index 00000000000000..d6bbe504dc6eea
--- /dev/null
+++ b/inference-engine/src/low_precision_transformations/include/low_precision/convolution_backprop_data.hpp
@@ -0,0 +1,25 @@
+// Copyright (C) 2021 Intel Corporation
+// SPDX-License-Identifier: Apache-2.0
+//
+
+#pragma once
+
+#include
+#include "weightable_layer_transformation.hpp"
+
+namespace ngraph {
+namespace pass {
+namespace low_precision {
+
+class TRANSFORMATIONS_API ConvolutionBackpropDataTransformation : public WeightableLayerTransformation {
+public:
+ ConvolutionBackpropDataTransformation(const Params& params);
+ void registerMatcherIn(GraphRewrite& pass, TransformationContext& context) const override;
+ bool transform(TransformationContext& context, ngraph::pattern::Matcher &m) const override;
+ bool canBeTransformed(const TransformationContext& context, std::shared_ptr op) const override;
+ bool isQuantized(std::shared_ptr layer) const noexcept override;
+};
+
+} // namespace low_precision
+} // namespace pass
+} // namespace ngraph
diff --git a/inference-engine/src/low_precision_transformations/include/low_precision/layer_transformation.hpp b/inference-engine/src/low_precision_transformations/include/low_precision/layer_transformation.hpp
index 36b1293cd425b3..06a37ab8b22015 100644
--- a/inference-engine/src/low_precision_transformations/include/low_precision/layer_transformation.hpp
+++ b/inference-engine/src/low_precision_transformations/include/low_precision/layer_transformation.hpp
@@ -45,6 +45,13 @@ class TRANSFORMATIONS_API DataPrecision {
public:
DataPrecision() : precision(element::undefined), min(0.f), max(0.f), hasZeroPoint(false) {}
+ explicit DataPrecision(const element::Type& precision) {
+ this->precision = precision;
+ min = getMinValue(precision, 256);
+ max = getMaxValue(precision, 256);
+ hasZeroPoint = false;
+ }
+
DataPrecision(const element::Type precision, const float min, const float max, const bool hasZeroPoint) :
precision(precision),
min(min),
@@ -122,29 +129,6 @@ class TRANSFORMATIONS_API DataPrecision {
static element::Type getPrecision(const size_t /* quantizationLevels */, const bool signedInterval) {
return signedInterval ? element::i8 : element::u8;
}
-
- static float getMin(const size_t quantizationLevels, const bool signedInterval) {
- if (quantizationLevels == 255) {
- return signedInterval ? -127.0f : 0.0f;
- } else if (quantizationLevels == 256) {
- return signedInterval ? -128.0f : 0.0f;
- } else {
- // THROW_TRANSFORMATION_EXCEPTION << "quantization level " << quantizationLevels << " is not supported";
- // FIXME: not completed
- return signedInterval ? -128.0f : 0.0f;
- }
- }
-
- static float getMax(const size_t quantizationLevels, const bool signedInterval) {
- if ((quantizationLevels == 255) || (quantizationLevels == 256)) {
- return signedInterval ? 127.0f : 255.0f;
- } else {
- // THROW_TRANSFORMATION_EXCEPTION << "quantization level " << quantizationLevels << " is not supported";
- // FIXME: not completed
- // return quantizationLevels - 1.0;
- return signedInterval ? 127.0f : 255.0f;
- }
- }
};
inline bool operator==(const DataPrecision& value1, const DataPrecision& value2) {
@@ -181,7 +165,8 @@ class TRANSFORMATIONS_API LayerTransformation {
std::vector precisionsOnActivations = { element::u8, element::i8 },
std::vector precisionsOnWeights = { element::i8 },
element::Type deqPrecision = element::f32,
- bool support3DTensorOnActivations = true) :
+ bool support3DTensorOnActivations = true,
+ bool deconvolutionSpecificChannelsRatio = false) :
updatePrecisions(updatePrecisions),
quantizedTensorAlignmentOnActivations(quantizedTensorAlignmentOnActivations),
quantizedTensorAlignmentOnWeights(quantizedTensorAlignmentOnWeights),
@@ -189,7 +174,8 @@ class TRANSFORMATIONS_API LayerTransformation {
precisionsOnActivations(precisionsOnActivations),
precisionsOnWeights(precisionsOnWeights),
deqPrecision(deqPrecision),
- support3DTensorOnActivations(support3DTensorOnActivations) {
+ support3DTensorOnActivations(support3DTensorOnActivations),
+ deconvolutionSpecificChannelsRatio(deconvolutionSpecificChannelsRatio) {
if (precisionsOnActivations.size() == 0ul) {
THROW_TRANSFORMATION_EXCEPTION << "precisions on activations are not specisifed";
}
@@ -234,6 +220,11 @@ class TRANSFORMATIONS_API LayerTransformation {
return *this;
}
+ Params& setDeconvolutionSpecificChannelsRatio(const bool deconvolutionSpecificChannelsRatio) {
+ this->deconvolutionSpecificChannelsRatio = deconvolutionSpecificChannelsRatio;
+ return *this;
+ }
+
bool updatePrecisions;
QuantizedTensorAlignment quantizedTensorAlignmentOnActivations;
QuantizedTensorAlignment quantizedTensorAlignmentOnWeights;
@@ -242,6 +233,7 @@ class TRANSFORMATIONS_API LayerTransformation {
std::vector precisionsOnWeights;
element::Type deqPrecision;
bool support3DTensorOnActivations;
+ bool deconvolutionSpecificChannelsRatio;
};
class PrecisionDetails {
@@ -318,6 +310,7 @@ class TRANSFORMATIONS_API LayerTransformation {
std::vector precisionsOnWeights;
element::Type deqPrecision;
bool support3DTensorOnActivations;
+ bool deconvolutionSpecificChannelsRatio;
// absolute value, used to determine quantization interval asymmetry
float quantizationIntervalAsymmetryThreshold;
diff --git a/inference-engine/src/low_precision_transformations/include/low_precision/network_helper.hpp b/inference-engine/src/low_precision_transformations/include/low_precision/network_helper.hpp
index 9846ef50d6aa2d..8cf52a13fe20ca 100644
--- a/inference-engine/src/low_precision_transformations/include/low_precision/network_helper.hpp
+++ b/inference-engine/src/low_precision_transformations/include/low_precision/network_helper.hpp
@@ -109,7 +109,8 @@ class TRANSFORMATIONS_API NetworkHelper {
const float max,
const bool hasZeroPoint,
const bool updatePrecision,
- const element::Type deqPrecision = element::f32);
+ const element::Type deqPrecision = element::f32,
+ const size_t outChannelsShapeIndex = 0);
static std::shared_ptr updateFakeQuantize(
std::shared_ptr fq,
@@ -183,7 +184,7 @@ class TRANSFORMATIONS_API NetworkHelper {
static std::shared_ptr toScalarIfPossible(std::shared_ptr node);
static std::shared_ptr fold_fake_quantize(const std::shared_ptr& fq);
- static std::shared_ptr fold_fake_quantize(const std::shared_ptr& fq, const bool roundValues);
+ static std::shared_ptr fold_fake_quantize(const std::shared_ptr& fq, const bool roundValues, int outChannelsShapeIndex = 0);
static FakeQuantizeDequantization foldDequantization(const std::shared_ptr& node, const size_t branchIndex, const bool inPlace = false);
@@ -191,8 +192,16 @@ class TRANSFORMATIONS_API NetworkHelper {
static std::shared_ptr fuseConvert(const std::shared_ptr& fakeQuantize);
+ static std::vector precisionIntersection(
+ const std::vector& v1,
+ const std::vector& v2) noexcept;
+
private:
- static std::shared_ptr foldFakeQuantize(const std::shared_ptr& fq, const bool roundValues, const bool roundValuesWasSet);
+ static std::shared_ptr foldFakeQuantize(
+ const std::shared_ptr& fq,
+ const bool roundValues,
+ const bool roundValuesWasSet,
+ int outChannelsShapeIndex = 0);
// 1 - on weights
// 0 - weightable layer was not found
diff --git a/inference-engine/src/low_precision_transformations/include/low_precision/transformer.hpp b/inference-engine/src/low_precision_transformations/include/low_precision/transformer.hpp
index 7a10d1daeb1b74..8de3fba36d5906 100644
--- a/inference-engine/src/low_precision_transformations/include/low_precision/transformer.hpp
+++ b/inference-engine/src/low_precision_transformations/include/low_precision/transformer.hpp
@@ -303,10 +303,6 @@ class TRANSFORMATIONS_API LowPrecisionTransformer : public IParamsManager, ILaye
std::map>> transformations,
GraphRewrite& pass,
TransformationContext& context);
-
- std::vector precisionIntersection(
- const std::vector& v1,
- const std::vector& v2) const noexcept;
};
class TRANSFORMATIONS_API TypeRelaxedReplacer : public GraphRewrite {
diff --git a/inference-engine/src/low_precision_transformations/include/low_precision/weightable_layer_transformation.hpp b/inference-engine/src/low_precision_transformations/include/low_precision/weightable_layer_transformation.hpp
index 94b81f2b2af785..aeb0a6d9abd576 100644
--- a/inference-engine/src/low_precision_transformations/include/low_precision/weightable_layer_transformation.hpp
+++ b/inference-engine/src/low_precision_transformations/include/low_precision/weightable_layer_transformation.hpp
@@ -22,7 +22,7 @@ class TRANSFORMATIONS_API WeightableLayerTransformation : public LayerTransforma
bool isPrecisionPreserved(std::shared_ptr layer) const noexcept override;
protected:
- void decomposeFakeQuantizeForWeightsPath(std::shared_ptr weightableLayer) const;
+ void decomposeFakeQuantizeForWeightsPath(const std::shared_ptr& weightableLayer, size_t outChannelsShapeIndex = 0ul) const;
static bool isGroup(const std::shared_ptr& node);
static bool isDepthwise(const std::shared_ptr& node);
diff --git a/inference-engine/src/low_precision_transformations/src/add.cpp b/inference-engine/src/low_precision_transformations/src/add.cpp
index 85aef194893107..915e87d2f60803 100644
--- a/inference-engine/src/low_precision_transformations/src/add.cpp
+++ b/inference-engine/src/low_precision_transformations/src/add.cpp
@@ -42,6 +42,7 @@ std::shared_ptr replaceToSubtract(const std::shared_ptr&
const auto parent = add->get_input_node_shared_ptr(dataBranchIndex);
if (is_type(parent) ||
is_type(parent) ||
+ is_type(parent) ||
(is_type(parent) &&
(is_type(parent->get_input_node_ptr(0)) || is_type(parent->get_input_node_ptr(1))))) {
return nullptr;
diff --git a/inference-engine/src/low_precision_transformations/src/concat.cpp b/inference-engine/src/low_precision_transformations/src/concat.cpp
index 24cc5940c1bb1f..4988e29b1e289a 100644
--- a/inference-engine/src/low_precision_transformations/src/concat.cpp
+++ b/inference-engine/src/low_precision_transformations/src/concat.cpp
@@ -50,14 +50,14 @@ bool ConcatTransformation::transform(TransformationContext& context, ngraph::pat
return false;
}
- DataPrecision dataPrecision = getDataPrecision(fq, QuantizationDetails::getDetails(fq), false);
- if (dataPrecision.precision == ngraph::element::undefined) {
+ std::vector concatParentsChildrensPrecisions = precisionsOnActivations;
+ fillAvailablePrecisions(subgraph.quantizationLayers[0], concatParentsChildrensPrecisions);
+ if (concatParentsChildrensPrecisions.empty()) {
return false;
}
- std::unordered_map dequantizations;
for (size_t i = 0; i < subgraph.quantizationLayers.size(); ++i) {
- const std::shared_ptr fq = ngraph::as_type_ptr(subgraph.quantizationLayers[i]);
+ fq = ngraph::as_type_ptr(subgraph.quantizationLayers[i]);
if (fq == nullptr) {
return false;
}
@@ -72,21 +72,20 @@ bool ConcatTransformation::transform(TransformationContext& context, ngraph::pat
if (quantizationDetails.inputHighValues.size() != 1ul) {
return false;
}
+ std::vector fqChildrensPrecisions = precisionsOnActivations;
+ fillAvailablePrecisions(subgraph.quantizationLayers[i], fqChildrensPrecisions);
+ concatParentsChildrensPrecisions = NetworkHelper::precisionIntersection(concatParentsChildrensPrecisions, fqChildrensPrecisions);
- const DataPrecision dataPrecision2 = getDataPrecision(subgraph.quantizationLayers[i]->shared_from_this(), quantizationDetails, false);
- if (dataPrecision2.precision == ngraph::element::undefined) {
+ if (concatParentsChildrensPrecisions.empty()) {
return false;
}
-
- if (dataPrecision.precision != dataPrecision2.precision) {
- // quantization levels are the same, difference can be in sign
- // wider interval (precision) is preferable: use signed if least one interval is signed
- dataPrecision = dataPrecision.precision.is_signed() ? dataPrecision : dataPrecision2;
- }
}
- if (dataPrecision.precision == ngraph::element::undefined) {
- return false;
+ DataPrecision dataPrecision;
+ if (std::find(concatParentsChildrensPrecisions.begin(), concatParentsChildrensPrecisions.end(), element::i8) != concatParentsChildrensPrecisions.end()) {
+ dataPrecision = DataPrecision(element::i8);
+ } else {
+ dataPrecision = DataPrecision(concatParentsChildrensPrecisions[0]);
}
std::vector quantizationLayersDetails;
diff --git a/inference-engine/src/low_precision_transformations/src/concat_multi_channels.cpp b/inference-engine/src/low_precision_transformations/src/concat_multi_channels.cpp
index 62d958d22b4037..dc81d51cd717de 100644
--- a/inference-engine/src/low_precision_transformations/src/concat_multi_channels.cpp
+++ b/inference-engine/src/low_precision_transformations/src/concat_multi_channels.cpp
@@ -27,7 +27,9 @@ bool ConcatMultiChannelsTransformation::isMultiChannel(const std::vector& concat : concatLayers) {
const std::vector> children = getChildrenRecursivelyExceptPrecisionPreserved(concat);
for (const std::shared_ptr& child : children) {
- if (is_type(child.get())) {
+ if ((is_type(child.get()) ||
+ is_type(child.get())) &&
+ this->layerTransformationsManager->isQuantized(child)) {
return false;
}
}
diff --git a/inference-engine/src/low_precision_transformations/src/convolution.cpp b/inference-engine/src/low_precision_transformations/src/convolution.cpp
index ff5ca944df5796..6496ee4ee54eab 100644
--- a/inference-engine/src/low_precision_transformations/src/convolution.cpp
+++ b/inference-engine/src/low_precision_transformations/src/convolution.cpp
@@ -42,7 +42,27 @@ bool ConvolutionTransformation::transform(TransformationContext &context, ngraph
auto convolution = m.get_match_root();
if (!canConvolutionBeTransformed(context, convolution)) {
- return false;
+ auto weightInput = convolution->get_input_node_shared_ptr(1);
+ std::shared_ptr reshapeFromWeights = as_type_ptr(weightInput);
+ FakeQuantizeDequantization dequantization = reshapeFromWeights == nullptr ?
+ NetworkHelper::getDequantization(convolution, 1ul) :
+ NetworkHelper::getDequantization(reshapeFromWeights);
+ if (dequantization.empty()) {
+ const auto fqOnWeights = getFakeQuantizeOnWeights(convolution);
+ std::shared_ptr resultConstant = NetworkHelper::fold_fake_quantize(fqOnWeights);
+ if (reshapeFromWeights != nullptr) {
+ resultConstant = fold_reshape(
+ resultConstant,
+ reshapeFromWeights->input_value(1),
+ false);
+ }
+ if (as_type_ptr(resultConstant)) {
+ replace_node(weightInput, resultConstant);
+ }
+ } else {
+ NetworkHelper::foldDequantization(dequantization.multiply, 0, true);
+ }
+ return true;
}
convolution = NetworkHelper::separateInStandaloneBranch(convolution);
diff --git a/inference-engine/src/low_precision_transformations/src/convolution_backprop_data.cpp b/inference-engine/src/low_precision_transformations/src/convolution_backprop_data.cpp
new file mode 100644
index 00000000000000..a73ee1de155781
--- /dev/null
+++ b/inference-engine/src/low_precision_transformations/src/convolution_backprop_data.cpp
@@ -0,0 +1,218 @@
+// Copyright (C) 2021 Intel Corporation
+// SPDX-License-Identifier: Apache-2.0
+//
+
+#include "low_precision/convolution_backprop_data.hpp"
+
+#include
+#include
+#include
+#include
+#include
+
+#include "low_precision/network_helper.hpp"
+#include "low_precision/common/dequantization_op.hpp"
+
+namespace ngraph {
+namespace pass {
+namespace low_precision {
+
+ConvolutionBackpropDataTransformation::ConvolutionBackpropDataTransformation(const Params& params) : WeightableLayerTransformation(params) {
+}
+
+void ConvolutionBackpropDataTransformation::registerMatcherIn(GraphRewrite &pass, TransformationContext &context) const {
+ addPattern(
+ pass,
+ context,
+ make_op_pattern({ make_op_label(), make_op_label() }));
+ addPattern(
+ pass,
+ context,
+ make_op_pattern({ make_op_label(), make_op_label() }));
+ addPattern(
+ pass,
+ context,
+ make_op_pattern(
+ { make_op_label(), make_op_label(), make_op_label() }));
+ addPattern(
+ pass,
+ context,
+ make_op_pattern(
+ { make_op_label(), make_op_label(), make_op_label() }));
+}
+
+bool ConvolutionBackpropDataTransformation::isQuantized(std::shared_ptr layer) const noexcept {
+ if (deconvolutionSpecificChannelsRatio) {
+ size_t inputChannels = layer->get_input_shape(0)[1];
+ size_t outputChannels = layer->get_output_shape(0)[1];
+ if (inputChannels % 4 != 0 || outputChannels % 16 != 0) {
+ return false;
+ }
+ }
+ return WeightableLayerTransformation::isQuantized(layer, false);
+}
+
+bool ConvolutionBackpropDataTransformation::transform(TransformationContext &context, ngraph::pattern::Matcher &m) const {
+ auto convolutionBackpropData = m.get_match_root();
+
+ if (!canBeTransformed(context, convolutionBackpropData)) {
+ auto weightsInput = convolutionBackpropData->get_input_node_shared_ptr(1);
+ std::shared_ptr reshapeFromWeights = as_type_ptr(weightsInput);
+ FakeQuantizeDequantization dequantization = reshapeFromWeights == nullptr ?
+ NetworkHelper::getDequantization(convolutionBackpropData, 1ul) :
+ NetworkHelper::getDequantization(reshapeFromWeights);
+ if (dequantization.empty()) {
+ const auto fqOnWeights = getFakeQuantizeOnWeights(convolutionBackpropData);
+ std::shared_ptr resultConstant = NetworkHelper::fold_fake_quantize(fqOnWeights);
+ if (reshapeFromWeights != nullptr) {
+ resultConstant = fold_reshape(
+ resultConstant,
+ reshapeFromWeights->input_value(1),
+ false);
+ }
+ if (as_type_ptr(resultConstant)) {
+ replace_node(weightsInput, resultConstant);
+ }
+ } else {
+ NetworkHelper::foldDequantization(dequantization.multiply, 0, true);
+ }
+ return true;
+ }
+
+ convolutionBackpropData = NetworkHelper::separateInStandaloneBranch(convolutionBackpropData);
+ FakeQuantizeDequantization dequantization = NetworkHelper::getDequantization(convolutionBackpropData);
+ {
+ if (dequantization.subtract != nullptr) {
+ std::shared_ptr layer = dequantization.subtract;
+ ngraph::pass::low_precision::NetworkHelper::cleanRunTimeInfo(layer);
+
+ NetworkHelper::optimizeSubtract(dequantization.subtract);
+ }
+ std::shared_ptr reducedConstant = as_type_ptr(dequantization.multiplyConstant);
+ std::shared_ptr newMultiplyAfterConst = std::make_shared(
+ reducedConstant->get_output_element_type(0),
+ Shape{ 1 },
+ reducedConstant->cast_vector()[0]);
+ auto inputs = convolutionBackpropData->input_values();
+ inputs[0] = dequantization.multiply->input_value(0);
+ const auto copyNode = convolutionBackpropData->copy_with_new_inputs(inputs);
+
+ const auto relaxedConvolutionBackpropData = std::make_shared>(
+ *as_type_ptr(copyNode),
+ std::vector{deqPrecision, deqPrecision},
+ std::vector{deqPrecision});
+
+ const auto newMultiplyAfter = std::make_shared>(
+ std::vector{ deqPrecision, deqPrecision },
+ std::vector{ dequantization.multiply->get_output_element_type(0) },
+ ngraph::op::TemporaryReplaceOutputType(relaxedConvolutionBackpropData, deqPrecision).get(),
+ ngraph::op::TemporaryReplaceOutputType(newMultiplyAfterConst, deqPrecision).get());
+
+ replace_node(convolutionBackpropData, newMultiplyAfter);
+ convolutionBackpropData = newMultiplyAfter->input_value(0).get_node_shared_ptr();
+ inputs[0] = convolutionBackpropData->get_input_node_ptr(0)->input_value(0);
+ if (is_type(convolutionBackpropData->get_input_node_ptr(0))) {
+ auto newConvolution = convolutionBackpropData->copy_with_new_inputs(inputs);
+ replace_node(convolutionBackpropData, newConvolution);
+ convolutionBackpropData = newConvolution;
+ }
+ }
+
+ {
+ decomposeFakeQuantizeForWeightsPath(convolutionBackpropData, 1ul);
+
+ dequantization = NetworkHelper::getDequantization(convolutionBackpropData, 1ul);
+
+ if (is_type(dequantization.data.get_node())) {
+ const std::shared_ptr fq = as_type_ptr(dequantization.data.get_node_shared_ptr());
+ std::shared_ptr newFQ = NetworkHelper::fold_fake_quantize(fq, true);
+ NetworkHelper::copyInfo(fq, newFQ);
+ replace_node(fq, newFQ);
+ }
+
+ std::shared_ptr multiplyFromWeights = as_type_ptr(
+ convolutionBackpropData->input_value(1).get_node_shared_ptr());
+ std::shared_ptr subtractFromWeights = as_type_ptr(multiplyFromWeights->get_input_node_shared_ptr(0));
+
+ {
+ Shape newScaleShape = multiplyFromWeights->get_input_shape(1);
+ auto inputs = convolutionBackpropData->input_values();
+ inputs[1] = multiplyFromWeights->input_value(0);
+ auto newMultiplyAfter = std::make_shared(
+ convolutionBackpropData->copy_with_new_inputs(inputs),
+ foldConvert(
+ fold_reshape(
+ multiplyFromWeights->input_value(1),
+ std::make_shared(element::u64, Shape{ newScaleShape.size() }, newScaleShape),
+ false),
+ convolutionBackpropData->get_output_element_type(0)));
+ replace_node(convolutionBackpropData, newMultiplyAfter);
+ convolutionBackpropData = newMultiplyAfter->input_value(0).get_node_shared_ptr();
+ }
+
+ if (subtractFromWeights != nullptr) {
+ // optimize zero point on weights
+ auto optimizedSubtract = NetworkHelper::optimizeSubtract(subtractFromWeights);
+ if (optimizedSubtract == nullptr) {
+ subtractFromWeights = nullptr;
+ } else {
+ subtractFromWeights = as_type_ptr(optimizedSubtract);
+
+ const Shape weightsShape = subtractFromWeights->input(0).get_shape();
+ Shape zeroPointShape(weightsShape.size(), 1ul);
+ zeroPointShape[1] = weightsShape[1];
+
+ auto zeroPointConstant = fold(
+ subtractFromWeights->get_input_node_shared_ptr(1),
+ std::make_shared(element::i32, Shape{zeroPointShape.size()}, zeroPointShape));
+ replace_node(subtractFromWeights->get_input_node_shared_ptr(1), zeroPointConstant);
+ }
+ }
+
+ std::shared_ptr convertFromWeights =
+ as_type_ptr(
+ subtractFromWeights == nullptr ?
+ multiplyFromWeights->get_input_node_shared_ptr(0) :
+ subtractFromWeights->get_input_node_shared_ptr(0));
+ if (convertFromWeights != nullptr) {
+ auto inputs = convolutionBackpropData->input_values();
+ inputs[1] = convolutionBackpropData->get_input_node_ptr(1)->input_value(0);
+ // remove Convert on weights
+ auto newConvolution = convolutionBackpropData->clone_with_new_inputs(inputs);
+ replace_node(convolutionBackpropData, newConvolution);
+ convolutionBackpropData = newConvolution;
+ }
+ }
+ std::shared_ptr finalDequantization = NetworkHelper::optimizeMultipliesAfter(
+ convolutionBackpropData->output(0).get_target_inputs().begin()->get_node()->shared_from_this());
+ ngraph::copy_runtime_info({ convolutionBackpropData, finalDequantization }, finalDequantization);
+ updateOutput(context, finalDequantization, convolutionBackpropData);
+
+ auto onWeights = convolutionBackpropData->get_input_node_shared_ptr(1);
+ if (is_type(onWeights)) {
+ onWeights = onWeights->get_input_node_shared_ptr(0);
+ }
+
+ if (is_type(onWeights)) {
+ auto& rt = onWeights->get_rt_info();
+ rt["DISABLED_CONSTANT_FOLDING"] = std::make_shared>("");
+ }
+
+ return true;
+}
+
+bool ConvolutionBackpropDataTransformation::canBeTransformed(const TransformationContext& context, std::shared_ptr op) const {
+ if (deconvolutionSpecificChannelsRatio) {
+ size_t inputChannels = op->get_input_shape(0)[1];
+ size_t outputChannels = op->get_output_shape(0)[1];
+ if (inputChannels % 4 != 0 || outputChannels % 16 != 0) {
+ return false;
+ }
+ }
+
+ return canConvolutionBeTransformed(context, op);
+}
+
+} // namespace low_precision
+} // namespace pass
+} // namespace ngraph
diff --git a/inference-engine/src/low_precision_transformations/src/fake_quantize.cpp b/inference-engine/src/low_precision_transformations/src/fake_quantize.cpp
index 41b9851d5e3ff9..53fe2702984909 100644
--- a/inference-engine/src/low_precision_transformations/src/fake_quantize.cpp
+++ b/inference-engine/src/low_precision_transformations/src/fake_quantize.cpp
@@ -20,7 +20,7 @@ void FakeQuantizeTransformation::registerMatcherIn(GraphRewrite& pass, Transform
bool FakeQuantizeTransformation::transform(TransformationContext& context, ngraph::pattern::Matcher &m) const {
std::shared_ptr layer = std::dynamic_pointer_cast(m.get_match_root());
- if (!NetworkHelper::isQuantizeSupported(layer)) {
+ if (!QuantizationDetails::outputLayoutIsSupported(layer)) {
return false;
}
@@ -149,7 +149,9 @@ std::shared_ptr FakeQuantizeTransformation::fuseElementwis
inputHighConst_f32 = fq::updateShape(fold(inputHighConst_f32, value), fakeQuantize->get_output_shape(0));
} else if (is_type(eltwise) && checkElementwise(eltwise)) {
if (is_type(fq::getData(eltwise)) ||
- is_type(fq::getData(eltwise))) {
+ is_type(fq::getData(eltwise)) ||
+ is_type(fq::getData(eltwise)) ||
+ is_type(fq::getData(eltwise))) {
return nullptr;
}
diff --git a/inference-engine/src/low_precision_transformations/src/fuse_multiply_to_fake_quantize.cpp b/inference-engine/src/low_precision_transformations/src/fuse_multiply_to_fake_quantize.cpp
index c1b7f4e907b6a8..734d9abec435ec 100644
--- a/inference-engine/src/low_precision_transformations/src/fuse_multiply_to_fake_quantize.cpp
+++ b/inference-engine/src/low_precision_transformations/src/fuse_multiply_to_fake_quantize.cpp
@@ -45,11 +45,18 @@ bool FuseMultiplyToFakeQuantizeTransformation::transform(TransformationContext&
const auto fakeQuantizeParent = fakeQuantize->get_input_node_shared_ptr(0);
const size_t parentIndex = NetworkHelper::getParentOutputIndex(fakeQuantizeParent, fakeQuantize);
+ const auto inputLow = foldConvert(fakeQuantize->input_value(1), deqPrecision);
+ const auto inputHigh = foldConvert(fakeQuantize->input_value(2), deqPrecision);
+ NetworkHelper::copyInfo(fakeQuantize->get_input_node_shared_ptr(1), inputLow);
+ NetworkHelper::copyInfo(fakeQuantize->get_input_node_shared_ptr(2), inputHigh);
+ NetworkHelper::copyInfo(fakeQuantize->get_input_node_shared_ptr(3), outputLowConst_f32);
+ NetworkHelper::copyInfo(fakeQuantize->get_input_node_shared_ptr(4), outputHighConst_f32);
+
auto newFakeQuantize = std::make_shared>(
opset1::FakeQuantize(
fakeQuantizeParent->output(parentIndex),
- foldConvert(fakeQuantize->input_value(1), deqPrecision),
- foldConvert(fakeQuantize->input_value(2), deqPrecision),
+ inputLow,
+ inputHigh,
outputLowConst_f32,
outputHighConst_f32,
fakeQuantize->get_levels()),
diff --git a/inference-engine/src/low_precision_transformations/src/fuse_subtract_to_fake_quantize.cpp b/inference-engine/src/low_precision_transformations/src/fuse_subtract_to_fake_quantize.cpp
index 2e3f2e23d3f428..8d8d9968802e44 100644
--- a/inference-engine/src/low_precision_transformations/src/fuse_subtract_to_fake_quantize.cpp
+++ b/inference-engine/src/low_precision_transformations/src/fuse_subtract_to_fake_quantize.cpp
@@ -45,11 +45,18 @@ bool FuseSubtractToFakeQuantizeTransformation::transform(TransformationContext&
const auto fakeQuantizeParent = fakeQuantize->get_input_node_shared_ptr(0);
const size_t parentIndex = NetworkHelper::getParentOutputIndex(fakeQuantizeParent, fakeQuantize);
+ const auto inputLow = foldConvert(fakeQuantize->input_value(1), deqPrecision);
+ const auto inputHigh = foldConvert(fakeQuantize->input_value(2), deqPrecision);
+ NetworkHelper::copyInfo(fakeQuantize->get_input_node_shared_ptr(1), inputLow);
+ NetworkHelper::copyInfo(fakeQuantize->get_input_node_shared_ptr(2), inputHigh);
+ NetworkHelper::copyInfo(fakeQuantize->get_input_node_shared_ptr(3), outputLowConst_f32);
+ NetworkHelper::copyInfo(fakeQuantize->get_input_node_shared_ptr(4), outputHighConst_f32);
+
auto newFakeQuantize = std::make_shared>(
opset1::FakeQuantize(
fakeQuantizeParent->output(parentIndex),
- foldConvert(fakeQuantize->input_value(1), deqPrecision),
- foldConvert(fakeQuantize->input_value(2), deqPrecision),
+ inputLow,
+ inputHigh,
outputLowConst_f32,
outputHighConst_f32,
fakeQuantize->get_levels()),
@@ -76,7 +83,8 @@ bool FuseSubtractToFakeQuantizeTransformation::canBeTransformed(const Transforma
for (const auto& target : children) {
const auto convolution = is_type(target.get_node());
const auto groupConvolution = is_type(target.get_node());
- if (convolution || groupConvolution) {
+ const auto convolutionBackpropData = is_type(target.get_node());
+ if (convolution || groupConvolution || convolutionBackpropData) {
return false;
}
}
diff --git a/inference-engine/src/low_precision_transformations/src/layer_transformation.cpp b/inference-engine/src/low_precision_transformations/src/layer_transformation.cpp
index 834aa6931c5a61..0fc0a9dc4fc52d 100644
--- a/inference-engine/src/low_precision_transformations/src/layer_transformation.cpp
+++ b/inference-engine/src/low_precision_transformations/src/layer_transformation.cpp
@@ -32,6 +32,7 @@ LayerTransformation::LayerTransformation(const Params& params) :
precisionsOnWeights(params.precisionsOnWeights),
deqPrecision(params.deqPrecision),
support3DTensorOnActivations(params.support3DTensorOnActivations),
+ deconvolutionSpecificChannelsRatio(params.deconvolutionSpecificChannelsRatio),
quantizationIntervalAsymmetryThreshold(0.002f),
zeroThreshold(1.e-6f),
minQuantizationLevels(2ul),
diff --git a/inference-engine/src/low_precision_transformations/src/network_helper.cpp b/inference-engine/src/low_precision_transformations/src/network_helper.cpp
index dbca7606e7322a..4a1e942e5753ba 100644
--- a/inference-engine/src/low_precision_transformations/src/network_helper.cpp
+++ b/inference-engine/src/low_precision_transformations/src/network_helper.cpp
@@ -69,7 +69,8 @@ bool NetworkHelper::isConstantPath(const std::shared_ptr& op) {
return is_type(node) ||
is_type(node) ||
is_type(node) ||
- is_type(node);
+ is_type(node) ||
+ is_type(node);
};
if (isNotConstantPathOperation(op)) {
@@ -440,8 +441,11 @@ std::shared_ptr NetworkHelper::fold_fake_quantize(const std::shared_ptr NetworkHelper::fold_fake_quantize(const std::shared_ptr& fq, const bool roundValues) {
- return foldFakeQuantize(fq, roundValues, true);
+std::shared_ptr NetworkHelper::fold_fake_quantize(
+ const std::shared_ptr& fq,
+ const bool roundValues,
+ const int outChannelsShapeIndex) {
+ return foldFakeQuantize(fq, roundValues, true, outChannelsShapeIndex);
}
FakeQuantizeDequantization NetworkHelper::foldDequantization(const std::shared_ptr& node, const size_t branchIndex, const bool inPlace) {
@@ -591,7 +595,8 @@ std::shared_ptr NetworkHelper::fuseConvert(const std::shar
std::shared_ptr NetworkHelper::foldFakeQuantize(
const std::shared_ptr& fq,
const bool roundValuesArg,
- const bool roundValuesWasSet) {
+ const bool roundValuesWasSet,
+ const int outChannelsShapeIndex) {
if (is_type(fq->get_input_node_shared_ptr(0)) &&
is_type(fq->get_input_node_shared_ptr(1)) &&
is_type(fq->get_input_node_shared_ptr(2)) &&
@@ -630,10 +635,20 @@ std::shared_ptr NetworkHelper::foldFakeQuantize(
if (constShape.empty() || constShape.size() > 5lu) {
THROW_IE_LPT_EXCEPTION(*fq) << "Unexpected dimensions count " << constShape.size();
}
+ if (outChannelsShapeIndex != 0 && outChannelsShapeIndex != 1) {
+ THROW_IE_LPT_EXCEPTION(*fq) << "Unexpected outChannelsShapeIndex " << outChannelsShapeIndex;
+ }
- // OIDHW
- const size_t OC = constShape[0];
- const size_t IC = constShape.size() > 1lu ? constShape[1] : 1;
+ size_t OC;
+ size_t IC;
+ // OIDHW or IODHW
+ if (constShape.size() == 1) {
+ OC = constShape[0];
+ IC = 1;
+ } else {
+ OC = constShape[outChannelsShapeIndex];
+ IC = constShape[outChannelsShapeIndex == 0 ? 1 : 0];
+ }
const size_t D = constShape.size() > 4lu ? constShape[constShape.size() - 3] : 1;
const size_t H = constShape.size() > 2lu ? constShape.size() == 3lu ? constShape[2] : constShape[constShape.size() - 2] : 1;
const size_t W = constShape.size() > 3lu ? constShape[constShape.size() - 1] : 1;
@@ -667,29 +682,35 @@ std::shared_ptr NetworkHelper::foldFakeQuantize(
auto levels_1 = fq->get_levels() - 1.f;
- //const size_t DHW = D * H * W;
+ const size_t DHW = D * H * W;
const size_t IDHW = IC * D * H * W;
const auto values = constant->cast_vector();
std::vector quantizedValues(OC * IC * D * H * W);
for (size_t oc = 0; oc < OC; ++oc) {
- for (size_t iidx = 0; iidx < IDHW; ++iidx) {
- const float inputLow = inputLowValues[isInputLowBroadcasted ? 0 : oc];
- const float inputHigh = inputHighValues[isInputHighBroadcasted ? 0 : oc];
- const float outputLow = outputLowValues[isOutputLowBroadcasted ? 0 : oc];
- const float outputHigh = outputHighValues[isOutputHighBroadcasted ? 0 : oc];
-
- const size_t idx = oc * IDHW + iidx;
-
- if (values[idx] <= inputLow) {
- quantizedValues[idx] = roundValues ? std::roundf(outputLow) : outputLow;
- } else if (values[idx] > inputHigh) {
- quantizedValues[idx] = roundValues ? std::roundf(outputHigh) : outputHigh;
- } else {
- const float value = std::roundf((values[idx] - inputLow) / (inputHigh - inputLow) * levels_1) /
- levels_1 * (outputHigh - outputLow) + outputLow;
- quantizedValues[idx] = roundValues ? std::roundf(value) : value;
+ const float inputLow = inputLowValues[isInputLowBroadcasted ? 0 : oc];
+ const float inputHigh = inputHighValues[isInputHighBroadcasted ? 0 : oc];
+ const float outputLow = outputLowValues[isOutputLowBroadcasted ? 0 : oc];
+ const float outputHigh = outputHighValues[isOutputHighBroadcasted ? 0 : oc];
+ for (size_t ic = 0; ic < IC; ++ic) {
+ for (size_t iidx = 0; iidx < DHW; ++iidx) {
+ size_t idx;
+ if (outChannelsShapeIndex == 0) {
+ idx = oc * IDHW + ic * DHW + iidx;
+ } else {
+ idx = ic * IDHW + oc * DHW + iidx;
+ }
+
+ if (values[idx] <= inputLow) {
+ quantizedValues[idx] = roundValues ? std::roundf(outputLow) : outputLow;
+ } else if (values[idx] > inputHigh) {
+ quantizedValues[idx] = roundValues ? std::roundf(outputHigh) : outputHigh;
+ } else {
+ const float value = std::roundf((values[idx] - inputLow) / (inputHigh - inputLow) * levels_1) /
+ levels_1 * (outputHigh - outputLow) + outputLow;
+ quantizedValues[idx] = roundValues ? std::roundf(value) : value;
+ }
}
}
}
@@ -818,7 +839,8 @@ std::tuple, std::shared_ptr> NetworkHelper::decompos
const float max,
const bool hasZeroPoint,
const bool updatePrecision,
- const element::Type deqPrecision) {
+ const element::Type deqPrecision,
+ const size_t outChannelsShapeIndex) {
using std::make_shared;
const auto outputLow = fq->input_value(3);
@@ -898,7 +920,8 @@ std::tuple, std::shared_ptr> NetworkHelper::decompos
newMax->output(0),
fq->get_levels(),
fq->get_auto_broadcast()),
- true);
+ true,
+ outChannelsShapeIndex);
NetworkHelper::copyInfo(fq, newFQ);
std::shared_ptr convert2;
@@ -1548,12 +1571,12 @@ bool NetworkHelper::checkZeroPoint(const std::shared_ptr& node, const Data
if (is_type(node)) {
const auto parent = node->get_input_node_shared_ptr(0);
const auto intNode = is_type(parent) ? parent : node;
- const auto intType = intNode->get_input_element_type(0);
- if (intType == element::u8 || intType == element::i8) {
- min = DataPrecision::getMinValue(intType, 256) - 0.5f;
- max = DataPrecision::getMaxValue(intType, 256) + 0.5f;
+ const auto type = intNode->get_input_element_type(0);
+ if (type == element::u8 || type == element::i8) {
+ min = DataPrecision::getMinValue(type, 256) - 0.5f;
+ max = DataPrecision::getMaxValue(type, 256) + 0.5f;
} else {
- return false;
+ return type == element::f32 || type == element::f16;
}
auto subtract1input = node->get_input_node_shared_ptr(1);
if (is_type(subtract1input)) {
@@ -1595,6 +1618,23 @@ bool NetworkHelper::checkZeroPoint(const std::shared_ptr& node, const Data
return true;
}
+std::vector NetworkHelper::precisionIntersection(
+ const std::vector& v1,
+ const std::vector& v2) noexcept {
+ std::vector v3;
+
+ auto v1Copy = v1;
+ auto v2Copy = v2;
+
+ std::sort(v1Copy.begin(), v1Copy.end());
+ std::sort(v2Copy.begin(), v2Copy.end());
+
+ std::set_intersection(v1Copy.begin(), v1Copy.end(),
+ v2Copy.begin(), v2Copy.end(),
+ std::back_inserter(v3));
+ return v3;
+}
+
} // namespace low_precision
} // namespace pass
} // namespace ngraph
diff --git a/inference-engine/src/low_precision_transformations/src/transformer.cpp b/inference-engine/src/low_precision_transformations/src/transformer.cpp
index d8b484bcbcebc1..4debb5868b6d96 100644
--- a/inference-engine/src/low_precision_transformations/src/transformer.cpp
+++ b/inference-engine/src/low_precision_transformations/src/transformer.cpp
@@ -34,6 +34,7 @@
#include "low_precision/avg_pool.hpp"
#include "low_precision/clamp.hpp"
#include "low_precision/convolution.hpp"
+#include "low_precision/convolution_backprop_data.hpp"
#include "low_precision/depth_to_space.hpp"
#include "low_precision/fake_quantize.hpp"
#include "low_precision/group_convolution.hpp"
@@ -220,6 +221,7 @@ LowPrecisionTransformations LowPrecisionTransformer::getAllTransformations(const
add(params).
add(params).
add(params).
+ add(params).
add(params).
add(params).
add(params).
@@ -338,6 +340,7 @@ TypeRelaxedReplacer::TypeRelaxedReplacer() {
make_matcher_type_relaxed(this);
make_matcher_type_relaxed(this);
make_matcher_type_relaxed(this);
+ make_matcher_type_relaxed(this);
make_matcher_type_relaxed(this);
make_matcher_type_relaxed(this);
make_matcher_type_relaxed(this);
@@ -430,23 +433,6 @@ void LowPrecisionTransformer::transform(std::shared_ptr network) {
network->validate_nodes_and_infer_types();
}
-std::vector LowPrecisionTransformer::precisionIntersection(
- const std::vector