diff --git a/cmake/developer_package/download/dependency_solver.cmake b/cmake/developer_package/download/dependency_solver.cmake index 9038f610035860..2f2ab192d3e82d 100644 --- a/cmake/developer_package/download/dependency_solver.cmake +++ b/cmake/developer_package/download/dependency_solver.cmake @@ -176,9 +176,9 @@ function(reset_deps_cache) foreach(var_name IN LISTS ARGN) unset(${var_name} CACHE) endforeach() - # foreach(var_name IN LISTS ARGN) - # unset(ENV{${var_name}}) - # endforeach() + foreach(var_name IN LISTS ARGN) + unset(ENV{${var_name}}) + endforeach() endif() endfunction() diff --git a/docs/install_guides/pypi-openvino-dev.md b/docs/install_guides/pypi-openvino-dev.md index 7164c2cdf555c8..f04bdf3a21f8c0 100644 --- a/docs/install_guides/pypi-openvino-dev.md +++ b/docs/install_guides/pypi-openvino-dev.md @@ -1,7 +1,7 @@ # Intel® Distribution of OpenVINO™ Toolkit Developer Package - +Copyright © 2018-2021 Intel Corporation > **LEGAL NOTICE**: Your use of this software and any required dependent software (the -“Software Package”) is subject to the terms and conditions of the [software license agreements](https://software.intel.com/en-us/license/eula-for-intel-software-development-products) for the Software Package, which may also include notices, disclaimers, or +“Software Package”) is subject to the terms and conditions of the [software license agreements](https://software.intel.com/content/dam/develop/external/us/en/documents/intel-openvino-license-agreements.pdf) for the Software Package, which may also include notices, disclaimers, or license terms for third party or open source software included in or with the Software Package, and your use indicates your acceptance of all such terms. Please refer to the “third-party-programs.txt” or other similarly-named text file included with the Software Package for additional details. ## Introduction @@ -40,11 +40,7 @@ The table below lists the supported operating systems and Python* versions requi ## Install the Developer Package -### Step 1. Install External Software Dependencies - -On Windows* OS you are required to install [Microsoft* Visual C++ Redistributable Package (x64)](https://visualstudio.microsoft.com/downloads/#microsoft-visual-c-redistributable-for-visual-studio-2019) to be able to run OpenVINO™ applications. - -### Step 2. Set Up Python Virtual Environment +### Step 1. Set Up Python Virtual Environment To avoid dependency conflicts, use a virtual environment. Skip this step only if you do want to install all dependencies globally. @@ -62,7 +58,7 @@ On Windows: python -m venv openvino_env ``` -### Step 3. Activate Virtual Environment +### Step 2. Activate Virtual Environment On Linux and macOS: ```sh @@ -73,14 +69,14 @@ On Windows: openvino_env\Scripts\activate ``` -### Step 4. Set Up and Update pip to the Highest Version +### Step 3. Set Up and Update PIP to the Highest Version Run the command below: ```sh python -m pip install --upgrade pip ``` -### Step 5. Install the Package +### Step 4. Install the Package Run the command below:
@@ -88,7 +84,7 @@ Run the command below:
pip install openvino-dev ``` -### Step 6. Verify that the Package is Installed +### Step 5. Verify that the Package is Installed Run the command below (this may take a few seconds): ```sh @@ -97,6 +93,19 @@ pot -h You will see the help message for Post-Training Optimization Tool if installation finished successfully. +## Troubleshooting + +#### Error: Microsoft Visual C++ 14.0 is required. Get it with "Build Tools for Visual Studio" + +On Windows* some dependencies may require compilation from source when installing. To resolve this issue, you need to install [Build Tools for Visual Studio* 2019](https://visualstudio.microsoft.com/downloads/#build-tools-for-visual-studio-2019) and repeat package installation. + +#### ImportError: libpython3.7m.so.1.0: cannot open shared object file: No such file or directory + +To resolve missing external dependency on Ubuntu*, execute the following command: +```sh +sudo apt-get install libpython3.7 +``` + ## Additional Resources - Intel® Distribution of OpenVINO™ toolkit home page: [https://software.intel.com/en-us/openvino-toolkit](https://software.intel.com/en-us/openvino-toolkit) diff --git a/docs/install_guides/pypi-openvino-rt.md b/docs/install_guides/pypi-openvino-rt.md index cfe95281fdfc30..6e22d74157cd28 100644 --- a/docs/install_guides/pypi-openvino-rt.md +++ b/docs/install_guides/pypi-openvino-rt.md @@ -1,7 +1,7 @@ # Intel® Distribution of OpenVINO™ Toolkit Runtime Package - +Copyright © 2018-2021 Intel Corporation > **LEGAL NOTICE**: Your use of this software and any required dependent software (the -“Software Package”) is subject to the terms and conditions of the [software license agreements](https://software.intel.com/en-us/license/eula-for-intel-software-development-products) for the Software Package, which may also include notices, disclaimers, or +“Software Package”) is subject to the terms and conditions of the [software license agreements](https://software.intel.com/content/dam/develop/external/us/en/documents/intel-openvino-license-agreements.pdf) for the Software Package, which may also include notices, disclaimers, or license terms for third party or open source software included in or with the Software Package, and your use indicates your acceptance of all such terms. Please refer to the “third-party-programs.txt” or other similarly-named text file included with the Software Package for additional details. ## Introduction @@ -37,11 +37,7 @@ The table below lists supported operating systems and Python* versions required ## Install the Runtime Package -### Step 1. Install External Software Dependencies - -On Windows* OS you are required to install [Microsoft* Visual C++ Redistributable Package (x64)](https://visualstudio.microsoft.com/downloads/#microsoft-visual-c-redistributable-for-visual-studio-2019) to be able to run OpenVINO™ applications. - -### Step 2. Set Up Python Virtual Environment +### Step 1. Set Up Python Virtual Environment To avoid dependency conflicts, use a virtual environment. Skip this step only if you do want to install all dependencies globally. @@ -55,7 +51,7 @@ python -m venv openvino_env > **NOTE**: On Linux and macOS, you may need to type `python3` instead of `python`. You may also need to [install pip](https://pip.pypa.io/en/stable/installing/). -### Step 3. Activate Virtual Environment +### Step 2. Activate Virtual Environment On Linux and macOS: ```sh @@ -66,14 +62,14 @@ On Windows: openvino_env\Scripts\activate ``` -### Step 4. Set Up and Update pip to the Highest Version +### Step 3. Set Up and Update PIP to the Highest Version Run the command below: ```sh python -m pip install --upgrade pip ``` -### Step 5. Install the Package +### Step 4. Install the Package Run the command below:
@@ -81,7 +77,7 @@ Run the command below:
pip install openvino ``` -### Step 6. Verify that the Package is Installed +### Step 5. Verify that the Package is Installed Run the command below: ```sh @@ -90,6 +86,19 @@ python -c "from openvino.inference_engine import IECore" You will not see any error messages if installation finished successfully. +## Troubleshooting + +#### Error: Microsoft Visual C++ 14.0 is required. Get it with "Build Tools for Visual Studio" + +On Windows* some dependencies may require compilation from source when installing. To resolve this issue, you need to install [Build Tools for Visual Studio* 2019](https://visualstudio.microsoft.com/downloads/#build-tools-for-visual-studio-2019) and repeat package installation. + +#### ImportError: libpython3.7m.so.1.0: cannot open shared object file: No such file or directory + +To resolve missing external dependency on Ubuntu*, execute the following command: +```sh +sudo apt-get install libpython3.7 +``` + ## Additional Resources - [Intel® Distribution of OpenVINO™ toolkit](https://software.intel.com/en-us/openvino-toolkit). diff --git a/docs/ops/normalization/BatchNormInference_1.md b/docs/ops/normalization/BatchNormInference_1.md index 218111575bd91d..694a9989e9f0fb 100644 --- a/docs/ops/normalization/BatchNormInference_1.md +++ b/docs/ops/normalization/BatchNormInference_1.md @@ -58,7 +58,7 @@ For a particular activation, consider a mini-batch \f$\mathcal{B}\f$ of m values * *epsilon* * **Description**: *epsilon* is a constant added to the variance for numerical stability. - * **Range of values**: a positive floating-point number + * **Range of values**: a floating-point number greater than or equal to zero * **Type**: `float` * **Default value**: none * **Required**: *yes* diff --git a/docs/ops/normalization/BatchNormInference_5.md b/docs/ops/normalization/BatchNormInference_5.md index cec26e4b2ecf16..f5019d08b2d37e 100644 --- a/docs/ops/normalization/BatchNormInference_5.md +++ b/docs/ops/normalization/BatchNormInference_5.md @@ -58,7 +58,7 @@ For a particular activation, consider a mini-batch \f$\mathcal{B}\f$ of m values * *epsilon* * **Description**: *epsilon* is a constant added to the variance for numerical stability. - * **Range of values**: a positive floating-point number + * **Range of values**: a floating-point number greater than or equal to zero * **Type**: `float` * **Default value**: none * **Required**: *yes* diff --git a/inference-engine/cmake/dependencies.cmake b/inference-engine/cmake/dependencies.cmake index 0a0770f06ecfb0..4ce1ef3136550e 100644 --- a/inference-engine/cmake/dependencies.cmake +++ b/inference-engine/cmake/dependencies.cmake @@ -261,8 +261,8 @@ if (ENABLE_GNA) set(GNA_HASH "cc954e67525006bf8bd353a6682e38bf208f6d74e973e0fc292850e721f17452") endif() if(GNA_LIBRARY_VERSION STREQUAL "GNA2") - set(GNA_VERSION "02.00.00.1191.0") - set(GNA_HASH "a61b4a9133549b0a9f0b46d069f72906ced28bcbbe7d5c361e687645f53a1c8b") + set(GNA_VERSION "02.00.00.1226") + set(GNA_HASH "d5450af15c993e264c25ac4591a7dab44722e10d15fca4f222a1b84429d4e5b6") endif() set(FILES_TO_EXTRACT_LIST gna_${GNA_VERSION}/include) diff --git a/inference-engine/cmake/ie_parallel.cmake b/inference-engine/cmake/ie_parallel.cmake index 958ea9b23a74ca..a4960ce3430d8d 100644 --- a/inference-engine/cmake/ie_parallel.cmake +++ b/inference-engine/cmake/ie_parallel.cmake @@ -25,9 +25,9 @@ function(set_ie_threading_interface_for TARGET_NAME) else() find_dependency(TBB COMPONENTS tbb tbbmalloc) endif() - set("TBB_FOUND" ${TBB_FOUND} PARENT_SCOPE) - set("TBB_IMPORTED_TARGETS" ${TBB_IMPORTED_TARGETS} PARENT_SCOPE) - set("TBB_VERSION" ${TBB_VERSION} PARENT_SCOPE) + set(TBB_FOUND ${TBB_FOUND} PARENT_SCOPE) + set(TBB_IMPORTED_TARGETS ${TBB_IMPORTED_TARGETS} PARENT_SCOPE) + set(TBB_VERSION ${TBB_VERSION} PARENT_SCOPE) if (NOT TBB_FOUND) ext_message(WARNING "TBB was not found by the configured TBB_DIR/TBBROOT path.\ SEQ method will be used.") diff --git a/inference-engine/include/ie_blob.h b/inference-engine/include/ie_blob.h index dbe264c054de7f..db7c29c950877c 100644 --- a/inference-engine/include/ie_blob.h +++ b/inference-engine/include/ie_blob.h @@ -799,6 +799,7 @@ class TBlob : public MemoryBlob { } }; +#ifdef __clang__ extern template class INFERENCE_ENGINE_API_CLASS(InferenceEngine::TBlob); extern template class INFERENCE_ENGINE_API_CLASS(InferenceEngine::TBlob); extern template class INFERENCE_ENGINE_API_CLASS(InferenceEngine::TBlob); @@ -813,6 +814,7 @@ extern template class INFERENCE_ENGINE_API_CLASS(InferenceEngine::TBlob); extern template class INFERENCE_ENGINE_API_CLASS(InferenceEngine::TBlob); extern template class INFERENCE_ENGINE_API_CLASS(InferenceEngine::TBlob); +#endif // __clang__ /** * @brief Creates a blob with the given tensor descriptor. diff --git a/inference-engine/src/cldnn_engine/cldnn_engine.cpp b/inference-engine/src/cldnn_engine/cldnn_engine.cpp index 0bea81efacea19..4aa53beb1e5a86 100644 --- a/inference-engine/src/cldnn_engine/cldnn_engine.cpp +++ b/inference-engine/src/cldnn_engine/cldnn_engine.cpp @@ -70,6 +70,7 @@ #include #include #include +#include #include #include #include @@ -381,6 +382,9 @@ InferenceEngine::CNNNetwork clDNNEngine::CloneAndTransformNetwork(const Inferenc .add(LayerTransformation::Params(params) .setSupportAsymmetricQuantization(false) .setSupport3DTensorOnActivations(false)) + .add(LayerTransformation::Params(params) + .setSupportAsymmetricQuantization(false) + .setDeconvolutionSpecificChannelsRatio(true)) // INT8 StridedSlice not supported .remove()); diff --git a/inference-engine/src/gna_plugin/backend/am_intel_dnn.cpp b/inference-engine/src/gna_plugin/backend/am_intel_dnn.cpp index 728efcdd684687..f6bc926a35ada6 100644 --- a/inference-engine/src/gna_plugin/backend/am_intel_dnn.cpp +++ b/inference-engine/src/gna_plugin/backend/am_intel_dnn.cpp @@ -1784,7 +1784,7 @@ void GNAPluginNS::backend::AMIntelDNN::InitGNAStruct(intel_nnet_type_t *ptr_nnet || (component[i - 1].operation == kDnnConvolutional1dOp) || (component[i - 1].operation == kDnnConvolutional2dOp) || ((component[i - 1].operation == kDnnMaxPoolOp) && - (component[i - 2].operation == kDnnConvolutional1dOp))) { + (component[i - 2].operation == kDnnConvolutional1dOp || component[i - 2].operation == kDnnConvolutional2dOp))) { if (gnaOperation->Operands[PwlOpIdx] == nullptr) { HelperGna2OperationSetOperand(gnaOperation, gnaUserAllocator, gnaUserFree, PwlOpIdx, createGna2TensorPwl(1, nullptr)); } diff --git a/inference-engine/src/gna_plugin/backend/gna_limitations.cpp b/inference-engine/src/gna_plugin/backend/gna_limitations.cpp index 98257eb3687939..cef6e26537a29c 100644 --- a/inference-engine/src/gna_plugin/backend/gna_limitations.cpp +++ b/inference-engine/src/gna_plugin/backend/gna_limitations.cpp @@ -31,7 +31,7 @@ bool RangeLimit2D::isValid(const uint32_t h, const uint32_t w) const { } std::string RangeLimit2D::GetErrorOrEmpty(const uint32_t h, const uint32_t w) const { - return hLimit.GetErrorOrEmpty(h) + hLimit.GetErrorOrEmpty(w); + return hLimit.GetErrorOrEmpty(h) + wLimit.GetErrorOrEmpty(w); } RangeMultipleLimit::RangeMultipleLimit(RangeLimit rlIn, uint32_t multiplierIn) : RangeLimit(rlIn), multiplier(multiplierIn) { diff --git a/inference-engine/src/gna_plugin/gna_device.cpp b/inference-engine/src/gna_plugin/gna_device.cpp index 01e0d7e80befb9..cbfc47f57aab0b 100644 --- a/inference-engine/src/gna_plugin/gna_device.cpp +++ b/inference-engine/src/gna_plugin/gna_device.cpp @@ -156,24 +156,42 @@ void GNADeviceHelper::releaseModel(const uint32_t model_id) { } bool GNADeviceHelper::enforceLegacyCnnNeeded() const { - auto devVersion = getExecutionTargetDevice(); - return isGnaLibVersion2_1 && isUpTo20HwGnaDevice(devVersion); + const auto compileTargetDevice = getTargetDevice(false); + return isGnaLibVersion2_1 && isUpTo20HwGnaDevice(compileTargetDevice); } -Gna2DeviceVersion GNADeviceHelper::getExecutionTargetDevice() const { +namespace { const volatile auto Gna2DeviceVersion3_0 = static_cast(0x30); - if (executionTarget.empty()) { - if (detectedGnaDevVersion == Gna2DeviceVersionSoftwareEmulation) - return isGnaLibVersion2_1 ? Gna2DeviceVersion3_0 : Gna2DeviceVersion2_0; - return detectedGnaDevVersion; - } else if (executionTarget == InferenceEngine::GNAConfigParams::GNA_TARGET_3_0) { +} // namespace + +Gna2DeviceVersion GNADeviceHelper::parseDeclaredTarget(std::string target, const bool execTarget) const { + auto parsed = Gna2DeviceVersion2_0; + auto throwUnsupportedGnaTarget = [&](std::string extraSuffix) { + auto key = execTarget ? InferenceEngine::GNAConfigParams::KEY_GNA_EXEC_TARGET : InferenceEngine::GNAConfigParams::KEY_GNA_COMPILE_TARGET; + THROW_GNA_EXCEPTION << "Unsupported " << key << " = \"" << target << "\"" << extraSuffix; + }; + if (target == InferenceEngine::GNAConfigParams::GNA_TARGET_3_0) { if (!isGnaLibVersion2_1) - THROW_GNA_EXCEPTION << "Unsupported GNA execution target " << executionTarget << " when GNA Library version is 2.0.X.Y"; - return Gna2DeviceVersion3_0; - } else if (executionTarget == InferenceEngine::GNAConfigParams::GNA_TARGET_2_0) { - return Gna2DeviceVersion2_0; + throwUnsupportedGnaTarget(", when GNA Library version is 2.0.X.Y"); + parsed = Gna2DeviceVersion3_0; + } else if (target != InferenceEngine::GNAConfigParams::GNA_TARGET_2_0) { + throwUnsupportedGnaTarget(""); } - THROW_GNA_EXCEPTION << "Unknown execution target: \"" << executionTarget << "\""; + return parsed; +} + +Gna2DeviceVersion GNADeviceHelper::getDefaultTarget() const { + if (detectedGnaDevVersion == Gna2DeviceVersionSoftwareEmulation) + return isGnaLibVersion2_1 ? Gna2DeviceVersion3_0 : Gna2DeviceVersion2_0; + return detectedGnaDevVersion; +} + +Gna2DeviceVersion GNADeviceHelper::getTargetDevice(const bool execTarget) const { + const auto declared = execTarget ? executionTarget : compileTarget; + if (declared.empty()) { + return execTarget ? getDefaultTarget() : getTargetDevice(true); + } + return parseDeclaredTarget(declared, execTarget); } uint32_t GNADeviceHelper::createRequestConfig(const uint32_t model_id) { @@ -186,7 +204,7 @@ uint32_t GNADeviceHelper::createRequestConfig(const uint32_t model_id) { // (bit exactly) as on the selected GNA execution target generation. // See the GNA Plugin's GNA_EXEC_TARGET config option description. if (swExactMode) { - const auto consistentDevice = getExecutionTargetDevice(); + const auto consistentDevice = getTargetDevice(true); status = Gna2RequestConfigEnableHardwareConsistency(reqConfId, consistentDevice); checkGna2Status(status, "Gna2RequestConfigEnableHardwareConsistency(" + std::to_string(static_cast(consistentDevice)) + ")"); } diff --git a/inference-engine/src/gna_plugin/gna_device.hpp b/inference-engine/src/gna_plugin/gna_device.hpp index 831b9fde517352..e032e5532dafc3 100644 --- a/inference-engine/src/gna_plugin/gna_device.hpp +++ b/inference-engine/src/gna_plugin/gna_device.hpp @@ -145,7 +145,6 @@ class GNADeviceHelper { return dev <= Gna2DeviceVersion2_0 && isGnaHw(dev); } bool enforceLegacyCnnNeeded() const; - Gna2DeviceVersion getExecutionTargetDevice() const; static void checkGna2Status(Gna2Status status, const std::string& from); static void checkGna2Status(Gna2Status status, const Gna2Model& gnaModel); #endif @@ -197,6 +196,9 @@ class GNADeviceHelper { static const std::map , const std::string > operandTypes; static void enforceLegacyCnns(Gna2Model& gnaModel); + Gna2DeviceVersion parseDeclaredTarget(std::string target, const bool execTarget) const; + Gna2DeviceVersion getDefaultTarget() const; + Gna2DeviceVersion getTargetDevice(bool execTarget) const; #endif void setOMPThreads(uint8_t const n_threads); diff --git a/inference-engine/src/gna_plugin/gna_graph_compiler.cpp b/inference-engine/src/gna_plugin/gna_graph_compiler.cpp index b085dcef7f46c3..2dcac40afc6a65 100644 --- a/inference-engine/src/gna_plugin/gna_graph_compiler.cpp +++ b/inference-engine/src/gna_plugin/gna_graph_compiler.cpp @@ -1027,13 +1027,8 @@ void GNAGraphCompiler::ConcatPrimitive(InferenceEngine::CNNLayerPtr layer) { auto layerInfo = LayerInfo(concatParent); // auto layerInfo = LayerInfo(getCreatorLayer(concatLayerInput->insData[it].lock()).lock()); if (layerInfo.isInput()) { - auto & bytesAllocated = inputDesc->bytes_allocated_for_input[((InferenceEngine::CNNLayerPtr)layerInfo)->name]; - connectInput(layer, &concatLayerInfo.gna_ptr, - concatLayerInfo.reserved_size, inputLayer.offset, idx, false); - - // TODO: currently connectInput api accept only total size, for concat we need extension for allocated, and actual sizes - bytesAllocated = inputLayer.tensorSize; + inputLayer.tensorSize, inputLayer.offset, idx, false); concatLayerInfo.input_allocated = true; } else if (layerInfo.isMemory()) { diff --git a/inference-engine/src/gna_plugin/gna_plugin.cpp b/inference-engine/src/gna_plugin/gna_plugin.cpp index 9832d59d527a70..1b1019767f242f 100644 --- a/inference-engine/src/gna_plugin/gna_plugin.cpp +++ b/inference-engine/src/gna_plugin/gna_plugin.cpp @@ -54,6 +54,7 @@ #include #include #include +#include #include "transformations/remove_extra_reshapes.hpp" @@ -662,6 +663,7 @@ void GNAPlugin::LoadNetwork(CNNNetwork & _network) { manager.register_pass(); // WA: ConvertPriorBox must be executed before the 1st ConstantFolding pass manager.register_pass(); + manager.register_pass(); manager.register_pass(); manager.register_pass(); manager.register_pass(); diff --git a/inference-engine/src/gna_plugin/optimizer/gna_pass_manager.cpp b/inference-engine/src/gna_plugin/optimizer/gna_pass_manager.cpp index 52bc0d1d43fe63..35c9d2206a642e 100644 --- a/inference-engine/src/gna_plugin/optimizer/gna_pass_manager.cpp +++ b/inference-engine/src/gna_plugin/optimizer/gna_pass_manager.cpp @@ -1189,7 +1189,7 @@ void InsertConcatAligningFilterPass::run() { getCreatorLayer(outData) = filterWithQuant; filterWithQuant->outData.push_back(outData); - CNNNetworkInsertLayer(prevLayer, l, filterWithQuant); + CNNNetworkInsertLayer(prevLayer, l, filterWithQuant, invalid_data_idx, input_idx); } offset += outputSize; } diff --git a/inference-engine/src/inference_engine/CMakeLists.txt b/inference-engine/src/inference_engine/CMakeLists.txt index 99dfa1b64010f2..68e0f131721af4 100644 --- a/inference-engine/src/inference_engine/CMakeLists.txt +++ b/inference-engine/src/inference_engine/CMakeLists.txt @@ -201,7 +201,6 @@ if(WIN32) endif() target_link_libraries(${TARGET_NAME}_s PRIVATE openvino::itt ${CMAKE_DL_LIBS} ${NGRAPH_LIBRARIES} - inference_engine_snippets inference_engine_transformations pugixml) target_compile_definitions(${TARGET_NAME}_s PUBLIC USE_STATIC_IE) diff --git a/inference-engine/src/inference_engine/ie_common.cpp b/inference-engine/src/inference_engine/ie_common.cpp index c10c7a6c7bc3e4..effee536d59993 100644 --- a/inference-engine/src/inference_engine/ie_common.cpp +++ b/inference-engine/src/inference_engine/ie_common.cpp @@ -124,19 +124,19 @@ TBlob::~TBlob() { free(); } -template class TBlob; -template class TBlob; -template class TBlob; -template class TBlob; -template class TBlob; -template class TBlob; -template class TBlob; -template class TBlob; -template class TBlob; -template class TBlob; -template class TBlob; -template class TBlob; -template class TBlob; -template class TBlob; +template class INFERENCE_ENGINE_API_CLASS(TBlob); +template class INFERENCE_ENGINE_API_CLASS(TBlob); +template class INFERENCE_ENGINE_API_CLASS(TBlob); +template class INFERENCE_ENGINE_API_CLASS(TBlob); +template class INFERENCE_ENGINE_API_CLASS(TBlob); +template class INFERENCE_ENGINE_API_CLASS(TBlob); +template class INFERENCE_ENGINE_API_CLASS(TBlob); +template class INFERENCE_ENGINE_API_CLASS(TBlob); +template class INFERENCE_ENGINE_API_CLASS(TBlob); +template class INFERENCE_ENGINE_API_CLASS(TBlob); +template class INFERENCE_ENGINE_API_CLASS(TBlob); +template class INFERENCE_ENGINE_API_CLASS(TBlob); +template class INFERENCE_ENGINE_API_CLASS(TBlob); +template class INFERENCE_ENGINE_API_CLASS(TBlob); } // namespace InferenceEngine diff --git a/inference-engine/src/legacy_api/CMakeLists.txt b/inference-engine/src/legacy_api/CMakeLists.txt index b03c329ca44cf3..ca65d596e60868 100644 --- a/inference-engine/src/legacy_api/CMakeLists.txt +++ b/inference-engine/src/legacy_api/CMakeLists.txt @@ -40,7 +40,6 @@ target_include_directories(${TARGET_NAME}_obj PRIVATE ${PUBLIC_HEADERS_DIR} ${CMAKE_CURRENT_SOURCE_DIR}/src ${IE_MAIN_SOURCE_DIR}/src/inference_engine # For CNNNetworkNGraphImpl - $ $ $ $ @@ -61,7 +60,7 @@ add_library(${TARGET_NAME} SHARED ie_add_vs_version_file(NAME ${TARGET_NAME} FILEDESCRIPTION "Inference Engine Legacy library") -target_link_libraries(${TARGET_NAME} PUBLIC inference_engine inference_engine_snippets +target_link_libraries(${TARGET_NAME} PUBLIC inference_engine PRIVATE pugixml openvino::itt ${NGRAPH_LIBRARIES} inference_engine_transformations) diff --git a/inference-engine/src/legacy_api/src/convert_function_to_cnn_network.cpp b/inference-engine/src/legacy_api/src/convert_function_to_cnn_network.cpp index 0b66531044a62b..6c76ac47e0222a 100644 --- a/inference-engine/src/legacy_api/src/convert_function_to_cnn_network.cpp +++ b/inference-engine/src/legacy_api/src/convert_function_to_cnn_network.cpp @@ -39,7 +39,6 @@ #include "legacy/ngraph_ops/rnn_sequence_ie.hpp" #include "legacy/ngraph_ops/lstm_sequence_ie.hpp" #include "legacy/ngraph_ops/gru_sequence_ie.hpp" -#include "snippets/op/subgraph.hpp" #include "exec_graph_info.hpp" #include "caseless.hpp" @@ -1979,15 +1978,6 @@ void convertFunctionToICNNNetwork(const std::shared_ptrparams[ExecGraphInfoSerialization::ORIGINAL_NAMES] = originalNames; } - if (auto subgraph = ::ngraph::as_type_ptr(layer)) { - std::string names = ""; - for (const auto& op : subgraph->get_body()->get_ordered_ops()) { - names += ", " + op->get_friendly_name(); - } - - cnnLayer->params["originalLayersNames"] += names; - } - std::string primitivesPriority = ::ngraph::getPrimitivesPriority(layer); if (!primitivesPriority.empty()) { cnnLayer->params["PrimitivesPriority"] = primitivesPriority; diff --git a/inference-engine/src/low_precision_transformations/include/low_precision/convolution_backprop_data.hpp b/inference-engine/src/low_precision_transformations/include/low_precision/convolution_backprop_data.hpp new file mode 100644 index 00000000000000..d6bbe504dc6eea --- /dev/null +++ b/inference-engine/src/low_precision_transformations/include/low_precision/convolution_backprop_data.hpp @@ -0,0 +1,25 @@ +// Copyright (C) 2021 Intel Corporation +// SPDX-License-Identifier: Apache-2.0 +// + +#pragma once + +#include +#include "weightable_layer_transformation.hpp" + +namespace ngraph { +namespace pass { +namespace low_precision { + +class TRANSFORMATIONS_API ConvolutionBackpropDataTransformation : public WeightableLayerTransformation { +public: + ConvolutionBackpropDataTransformation(const Params& params); + void registerMatcherIn(GraphRewrite& pass, TransformationContext& context) const override; + bool transform(TransformationContext& context, ngraph::pattern::Matcher &m) const override; + bool canBeTransformed(const TransformationContext& context, std::shared_ptr op) const override; + bool isQuantized(std::shared_ptr layer) const noexcept override; +}; + +} // namespace low_precision +} // namespace pass +} // namespace ngraph diff --git a/inference-engine/src/low_precision_transformations/include/low_precision/layer_transformation.hpp b/inference-engine/src/low_precision_transformations/include/low_precision/layer_transformation.hpp index 36b1293cd425b3..06a37ab8b22015 100644 --- a/inference-engine/src/low_precision_transformations/include/low_precision/layer_transformation.hpp +++ b/inference-engine/src/low_precision_transformations/include/low_precision/layer_transformation.hpp @@ -45,6 +45,13 @@ class TRANSFORMATIONS_API DataPrecision { public: DataPrecision() : precision(element::undefined), min(0.f), max(0.f), hasZeroPoint(false) {} + explicit DataPrecision(const element::Type& precision) { + this->precision = precision; + min = getMinValue(precision, 256); + max = getMaxValue(precision, 256); + hasZeroPoint = false; + } + DataPrecision(const element::Type precision, const float min, const float max, const bool hasZeroPoint) : precision(precision), min(min), @@ -122,29 +129,6 @@ class TRANSFORMATIONS_API DataPrecision { static element::Type getPrecision(const size_t /* quantizationLevels */, const bool signedInterval) { return signedInterval ? element::i8 : element::u8; } - - static float getMin(const size_t quantizationLevels, const bool signedInterval) { - if (quantizationLevels == 255) { - return signedInterval ? -127.0f : 0.0f; - } else if (quantizationLevels == 256) { - return signedInterval ? -128.0f : 0.0f; - } else { - // THROW_TRANSFORMATION_EXCEPTION << "quantization level " << quantizationLevels << " is not supported"; - // FIXME: not completed - return signedInterval ? -128.0f : 0.0f; - } - } - - static float getMax(const size_t quantizationLevels, const bool signedInterval) { - if ((quantizationLevels == 255) || (quantizationLevels == 256)) { - return signedInterval ? 127.0f : 255.0f; - } else { - // THROW_TRANSFORMATION_EXCEPTION << "quantization level " << quantizationLevels << " is not supported"; - // FIXME: not completed - // return quantizationLevels - 1.0; - return signedInterval ? 127.0f : 255.0f; - } - } }; inline bool operator==(const DataPrecision& value1, const DataPrecision& value2) { @@ -181,7 +165,8 @@ class TRANSFORMATIONS_API LayerTransformation { std::vector precisionsOnActivations = { element::u8, element::i8 }, std::vector precisionsOnWeights = { element::i8 }, element::Type deqPrecision = element::f32, - bool support3DTensorOnActivations = true) : + bool support3DTensorOnActivations = true, + bool deconvolutionSpecificChannelsRatio = false) : updatePrecisions(updatePrecisions), quantizedTensorAlignmentOnActivations(quantizedTensorAlignmentOnActivations), quantizedTensorAlignmentOnWeights(quantizedTensorAlignmentOnWeights), @@ -189,7 +174,8 @@ class TRANSFORMATIONS_API LayerTransformation { precisionsOnActivations(precisionsOnActivations), precisionsOnWeights(precisionsOnWeights), deqPrecision(deqPrecision), - support3DTensorOnActivations(support3DTensorOnActivations) { + support3DTensorOnActivations(support3DTensorOnActivations), + deconvolutionSpecificChannelsRatio(deconvolutionSpecificChannelsRatio) { if (precisionsOnActivations.size() == 0ul) { THROW_TRANSFORMATION_EXCEPTION << "precisions on activations are not specisifed"; } @@ -234,6 +220,11 @@ class TRANSFORMATIONS_API LayerTransformation { return *this; } + Params& setDeconvolutionSpecificChannelsRatio(const bool deconvolutionSpecificChannelsRatio) { + this->deconvolutionSpecificChannelsRatio = deconvolutionSpecificChannelsRatio; + return *this; + } + bool updatePrecisions; QuantizedTensorAlignment quantizedTensorAlignmentOnActivations; QuantizedTensorAlignment quantizedTensorAlignmentOnWeights; @@ -242,6 +233,7 @@ class TRANSFORMATIONS_API LayerTransformation { std::vector precisionsOnWeights; element::Type deqPrecision; bool support3DTensorOnActivations; + bool deconvolutionSpecificChannelsRatio; }; class PrecisionDetails { @@ -318,6 +310,7 @@ class TRANSFORMATIONS_API LayerTransformation { std::vector precisionsOnWeights; element::Type deqPrecision; bool support3DTensorOnActivations; + bool deconvolutionSpecificChannelsRatio; // absolute value, used to determine quantization interval asymmetry float quantizationIntervalAsymmetryThreshold; diff --git a/inference-engine/src/low_precision_transformations/include/low_precision/network_helper.hpp b/inference-engine/src/low_precision_transformations/include/low_precision/network_helper.hpp index 9846ef50d6aa2d..8cf52a13fe20ca 100644 --- a/inference-engine/src/low_precision_transformations/include/low_precision/network_helper.hpp +++ b/inference-engine/src/low_precision_transformations/include/low_precision/network_helper.hpp @@ -109,7 +109,8 @@ class TRANSFORMATIONS_API NetworkHelper { const float max, const bool hasZeroPoint, const bool updatePrecision, - const element::Type deqPrecision = element::f32); + const element::Type deqPrecision = element::f32, + const size_t outChannelsShapeIndex = 0); static std::shared_ptr updateFakeQuantize( std::shared_ptr fq, @@ -183,7 +184,7 @@ class TRANSFORMATIONS_API NetworkHelper { static std::shared_ptr toScalarIfPossible(std::shared_ptr node); static std::shared_ptr fold_fake_quantize(const std::shared_ptr& fq); - static std::shared_ptr fold_fake_quantize(const std::shared_ptr& fq, const bool roundValues); + static std::shared_ptr fold_fake_quantize(const std::shared_ptr& fq, const bool roundValues, int outChannelsShapeIndex = 0); static FakeQuantizeDequantization foldDequantization(const std::shared_ptr& node, const size_t branchIndex, const bool inPlace = false); @@ -191,8 +192,16 @@ class TRANSFORMATIONS_API NetworkHelper { static std::shared_ptr fuseConvert(const std::shared_ptr& fakeQuantize); + static std::vector precisionIntersection( + const std::vector& v1, + const std::vector& v2) noexcept; + private: - static std::shared_ptr foldFakeQuantize(const std::shared_ptr& fq, const bool roundValues, const bool roundValuesWasSet); + static std::shared_ptr foldFakeQuantize( + const std::shared_ptr& fq, + const bool roundValues, + const bool roundValuesWasSet, + int outChannelsShapeIndex = 0); // 1 - on weights // 0 - weightable layer was not found diff --git a/inference-engine/src/low_precision_transformations/include/low_precision/transformer.hpp b/inference-engine/src/low_precision_transformations/include/low_precision/transformer.hpp index 7a10d1daeb1b74..8de3fba36d5906 100644 --- a/inference-engine/src/low_precision_transformations/include/low_precision/transformer.hpp +++ b/inference-engine/src/low_precision_transformations/include/low_precision/transformer.hpp @@ -303,10 +303,6 @@ class TRANSFORMATIONS_API LowPrecisionTransformer : public IParamsManager, ILaye std::map>> transformations, GraphRewrite& pass, TransformationContext& context); - - std::vector precisionIntersection( - const std::vector& v1, - const std::vector& v2) const noexcept; }; class TRANSFORMATIONS_API TypeRelaxedReplacer : public GraphRewrite { diff --git a/inference-engine/src/low_precision_transformations/include/low_precision/weightable_layer_transformation.hpp b/inference-engine/src/low_precision_transformations/include/low_precision/weightable_layer_transformation.hpp index 94b81f2b2af785..aeb0a6d9abd576 100644 --- a/inference-engine/src/low_precision_transformations/include/low_precision/weightable_layer_transformation.hpp +++ b/inference-engine/src/low_precision_transformations/include/low_precision/weightable_layer_transformation.hpp @@ -22,7 +22,7 @@ class TRANSFORMATIONS_API WeightableLayerTransformation : public LayerTransforma bool isPrecisionPreserved(std::shared_ptr layer) const noexcept override; protected: - void decomposeFakeQuantizeForWeightsPath(std::shared_ptr weightableLayer) const; + void decomposeFakeQuantizeForWeightsPath(const std::shared_ptr& weightableLayer, size_t outChannelsShapeIndex = 0ul) const; static bool isGroup(const std::shared_ptr& node); static bool isDepthwise(const std::shared_ptr& node); diff --git a/inference-engine/src/low_precision_transformations/src/add.cpp b/inference-engine/src/low_precision_transformations/src/add.cpp index 85aef194893107..915e87d2f60803 100644 --- a/inference-engine/src/low_precision_transformations/src/add.cpp +++ b/inference-engine/src/low_precision_transformations/src/add.cpp @@ -42,6 +42,7 @@ std::shared_ptr replaceToSubtract(const std::shared_ptr& const auto parent = add->get_input_node_shared_ptr(dataBranchIndex); if (is_type(parent) || is_type(parent) || + is_type(parent) || (is_type(parent) && (is_type(parent->get_input_node_ptr(0)) || is_type(parent->get_input_node_ptr(1))))) { return nullptr; diff --git a/inference-engine/src/low_precision_transformations/src/concat.cpp b/inference-engine/src/low_precision_transformations/src/concat.cpp index 24cc5940c1bb1f..4988e29b1e289a 100644 --- a/inference-engine/src/low_precision_transformations/src/concat.cpp +++ b/inference-engine/src/low_precision_transformations/src/concat.cpp @@ -50,14 +50,14 @@ bool ConcatTransformation::transform(TransformationContext& context, ngraph::pat return false; } - DataPrecision dataPrecision = getDataPrecision(fq, QuantizationDetails::getDetails(fq), false); - if (dataPrecision.precision == ngraph::element::undefined) { + std::vector concatParentsChildrensPrecisions = precisionsOnActivations; + fillAvailablePrecisions(subgraph.quantizationLayers[0], concatParentsChildrensPrecisions); + if (concatParentsChildrensPrecisions.empty()) { return false; } - std::unordered_map dequantizations; for (size_t i = 0; i < subgraph.quantizationLayers.size(); ++i) { - const std::shared_ptr fq = ngraph::as_type_ptr(subgraph.quantizationLayers[i]); + fq = ngraph::as_type_ptr(subgraph.quantizationLayers[i]); if (fq == nullptr) { return false; } @@ -72,21 +72,20 @@ bool ConcatTransformation::transform(TransformationContext& context, ngraph::pat if (quantizationDetails.inputHighValues.size() != 1ul) { return false; } + std::vector fqChildrensPrecisions = precisionsOnActivations; + fillAvailablePrecisions(subgraph.quantizationLayers[i], fqChildrensPrecisions); + concatParentsChildrensPrecisions = NetworkHelper::precisionIntersection(concatParentsChildrensPrecisions, fqChildrensPrecisions); - const DataPrecision dataPrecision2 = getDataPrecision(subgraph.quantizationLayers[i]->shared_from_this(), quantizationDetails, false); - if (dataPrecision2.precision == ngraph::element::undefined) { + if (concatParentsChildrensPrecisions.empty()) { return false; } - - if (dataPrecision.precision != dataPrecision2.precision) { - // quantization levels are the same, difference can be in sign - // wider interval (precision) is preferable: use signed if least one interval is signed - dataPrecision = dataPrecision.precision.is_signed() ? dataPrecision : dataPrecision2; - } } - if (dataPrecision.precision == ngraph::element::undefined) { - return false; + DataPrecision dataPrecision; + if (std::find(concatParentsChildrensPrecisions.begin(), concatParentsChildrensPrecisions.end(), element::i8) != concatParentsChildrensPrecisions.end()) { + dataPrecision = DataPrecision(element::i8); + } else { + dataPrecision = DataPrecision(concatParentsChildrensPrecisions[0]); } std::vector quantizationLayersDetails; diff --git a/inference-engine/src/low_precision_transformations/src/concat_multi_channels.cpp b/inference-engine/src/low_precision_transformations/src/concat_multi_channels.cpp index 62d958d22b4037..dc81d51cd717de 100644 --- a/inference-engine/src/low_precision_transformations/src/concat_multi_channels.cpp +++ b/inference-engine/src/low_precision_transformations/src/concat_multi_channels.cpp @@ -27,7 +27,9 @@ bool ConcatMultiChannelsTransformation::isMultiChannel(const std::vector& concat : concatLayers) { const std::vector> children = getChildrenRecursivelyExceptPrecisionPreserved(concat); for (const std::shared_ptr& child : children) { - if (is_type(child.get())) { + if ((is_type(child.get()) || + is_type(child.get())) && + this->layerTransformationsManager->isQuantized(child)) { return false; } } diff --git a/inference-engine/src/low_precision_transformations/src/convolution.cpp b/inference-engine/src/low_precision_transformations/src/convolution.cpp index ff5ca944df5796..6496ee4ee54eab 100644 --- a/inference-engine/src/low_precision_transformations/src/convolution.cpp +++ b/inference-engine/src/low_precision_transformations/src/convolution.cpp @@ -42,7 +42,27 @@ bool ConvolutionTransformation::transform(TransformationContext &context, ngraph auto convolution = m.get_match_root(); if (!canConvolutionBeTransformed(context, convolution)) { - return false; + auto weightInput = convolution->get_input_node_shared_ptr(1); + std::shared_ptr reshapeFromWeights = as_type_ptr(weightInput); + FakeQuantizeDequantization dequantization = reshapeFromWeights == nullptr ? + NetworkHelper::getDequantization(convolution, 1ul) : + NetworkHelper::getDequantization(reshapeFromWeights); + if (dequantization.empty()) { + const auto fqOnWeights = getFakeQuantizeOnWeights(convolution); + std::shared_ptr resultConstant = NetworkHelper::fold_fake_quantize(fqOnWeights); + if (reshapeFromWeights != nullptr) { + resultConstant = fold_reshape( + resultConstant, + reshapeFromWeights->input_value(1), + false); + } + if (as_type_ptr(resultConstant)) { + replace_node(weightInput, resultConstant); + } + } else { + NetworkHelper::foldDequantization(dequantization.multiply, 0, true); + } + return true; } convolution = NetworkHelper::separateInStandaloneBranch(convolution); diff --git a/inference-engine/src/low_precision_transformations/src/convolution_backprop_data.cpp b/inference-engine/src/low_precision_transformations/src/convolution_backprop_data.cpp new file mode 100644 index 00000000000000..a73ee1de155781 --- /dev/null +++ b/inference-engine/src/low_precision_transformations/src/convolution_backprop_data.cpp @@ -0,0 +1,218 @@ +// Copyright (C) 2021 Intel Corporation +// SPDX-License-Identifier: Apache-2.0 +// + +#include "low_precision/convolution_backprop_data.hpp" + +#include +#include +#include +#include +#include + +#include "low_precision/network_helper.hpp" +#include "low_precision/common/dequantization_op.hpp" + +namespace ngraph { +namespace pass { +namespace low_precision { + +ConvolutionBackpropDataTransformation::ConvolutionBackpropDataTransformation(const Params& params) : WeightableLayerTransformation(params) { +} + +void ConvolutionBackpropDataTransformation::registerMatcherIn(GraphRewrite &pass, TransformationContext &context) const { + addPattern( + pass, + context, + make_op_pattern({ make_op_label(), make_op_label() })); + addPattern( + pass, + context, + make_op_pattern({ make_op_label(), make_op_label() })); + addPattern( + pass, + context, + make_op_pattern( + { make_op_label(), make_op_label(), make_op_label() })); + addPattern( + pass, + context, + make_op_pattern( + { make_op_label(), make_op_label(), make_op_label() })); +} + +bool ConvolutionBackpropDataTransformation::isQuantized(std::shared_ptr layer) const noexcept { + if (deconvolutionSpecificChannelsRatio) { + size_t inputChannels = layer->get_input_shape(0)[1]; + size_t outputChannels = layer->get_output_shape(0)[1]; + if (inputChannels % 4 != 0 || outputChannels % 16 != 0) { + return false; + } + } + return WeightableLayerTransformation::isQuantized(layer, false); +} + +bool ConvolutionBackpropDataTransformation::transform(TransformationContext &context, ngraph::pattern::Matcher &m) const { + auto convolutionBackpropData = m.get_match_root(); + + if (!canBeTransformed(context, convolutionBackpropData)) { + auto weightsInput = convolutionBackpropData->get_input_node_shared_ptr(1); + std::shared_ptr reshapeFromWeights = as_type_ptr(weightsInput); + FakeQuantizeDequantization dequantization = reshapeFromWeights == nullptr ? + NetworkHelper::getDequantization(convolutionBackpropData, 1ul) : + NetworkHelper::getDequantization(reshapeFromWeights); + if (dequantization.empty()) { + const auto fqOnWeights = getFakeQuantizeOnWeights(convolutionBackpropData); + std::shared_ptr resultConstant = NetworkHelper::fold_fake_quantize(fqOnWeights); + if (reshapeFromWeights != nullptr) { + resultConstant = fold_reshape( + resultConstant, + reshapeFromWeights->input_value(1), + false); + } + if (as_type_ptr(resultConstant)) { + replace_node(weightsInput, resultConstant); + } + } else { + NetworkHelper::foldDequantization(dequantization.multiply, 0, true); + } + return true; + } + + convolutionBackpropData = NetworkHelper::separateInStandaloneBranch(convolutionBackpropData); + FakeQuantizeDequantization dequantization = NetworkHelper::getDequantization(convolutionBackpropData); + { + if (dequantization.subtract != nullptr) { + std::shared_ptr layer = dequantization.subtract; + ngraph::pass::low_precision::NetworkHelper::cleanRunTimeInfo(layer); + + NetworkHelper::optimizeSubtract(dequantization.subtract); + } + std::shared_ptr reducedConstant = as_type_ptr(dequantization.multiplyConstant); + std::shared_ptr newMultiplyAfterConst = std::make_shared( + reducedConstant->get_output_element_type(0), + Shape{ 1 }, + reducedConstant->cast_vector()[0]); + auto inputs = convolutionBackpropData->input_values(); + inputs[0] = dequantization.multiply->input_value(0); + const auto copyNode = convolutionBackpropData->copy_with_new_inputs(inputs); + + const auto relaxedConvolutionBackpropData = std::make_shared>( + *as_type_ptr(copyNode), + std::vector{deqPrecision, deqPrecision}, + std::vector{deqPrecision}); + + const auto newMultiplyAfter = std::make_shared>( + std::vector{ deqPrecision, deqPrecision }, + std::vector{ dequantization.multiply->get_output_element_type(0) }, + ngraph::op::TemporaryReplaceOutputType(relaxedConvolutionBackpropData, deqPrecision).get(), + ngraph::op::TemporaryReplaceOutputType(newMultiplyAfterConst, deqPrecision).get()); + + replace_node(convolutionBackpropData, newMultiplyAfter); + convolutionBackpropData = newMultiplyAfter->input_value(0).get_node_shared_ptr(); + inputs[0] = convolutionBackpropData->get_input_node_ptr(0)->input_value(0); + if (is_type(convolutionBackpropData->get_input_node_ptr(0))) { + auto newConvolution = convolutionBackpropData->copy_with_new_inputs(inputs); + replace_node(convolutionBackpropData, newConvolution); + convolutionBackpropData = newConvolution; + } + } + + { + decomposeFakeQuantizeForWeightsPath(convolutionBackpropData, 1ul); + + dequantization = NetworkHelper::getDequantization(convolutionBackpropData, 1ul); + + if (is_type(dequantization.data.get_node())) { + const std::shared_ptr fq = as_type_ptr(dequantization.data.get_node_shared_ptr()); + std::shared_ptr newFQ = NetworkHelper::fold_fake_quantize(fq, true); + NetworkHelper::copyInfo(fq, newFQ); + replace_node(fq, newFQ); + } + + std::shared_ptr multiplyFromWeights = as_type_ptr( + convolutionBackpropData->input_value(1).get_node_shared_ptr()); + std::shared_ptr subtractFromWeights = as_type_ptr(multiplyFromWeights->get_input_node_shared_ptr(0)); + + { + Shape newScaleShape = multiplyFromWeights->get_input_shape(1); + auto inputs = convolutionBackpropData->input_values(); + inputs[1] = multiplyFromWeights->input_value(0); + auto newMultiplyAfter = std::make_shared( + convolutionBackpropData->copy_with_new_inputs(inputs), + foldConvert( + fold_reshape( + multiplyFromWeights->input_value(1), + std::make_shared(element::u64, Shape{ newScaleShape.size() }, newScaleShape), + false), + convolutionBackpropData->get_output_element_type(0))); + replace_node(convolutionBackpropData, newMultiplyAfter); + convolutionBackpropData = newMultiplyAfter->input_value(0).get_node_shared_ptr(); + } + + if (subtractFromWeights != nullptr) { + // optimize zero point on weights + auto optimizedSubtract = NetworkHelper::optimizeSubtract(subtractFromWeights); + if (optimizedSubtract == nullptr) { + subtractFromWeights = nullptr; + } else { + subtractFromWeights = as_type_ptr(optimizedSubtract); + + const Shape weightsShape = subtractFromWeights->input(0).get_shape(); + Shape zeroPointShape(weightsShape.size(), 1ul); + zeroPointShape[1] = weightsShape[1]; + + auto zeroPointConstant = fold( + subtractFromWeights->get_input_node_shared_ptr(1), + std::make_shared(element::i32, Shape{zeroPointShape.size()}, zeroPointShape)); + replace_node(subtractFromWeights->get_input_node_shared_ptr(1), zeroPointConstant); + } + } + + std::shared_ptr convertFromWeights = + as_type_ptr( + subtractFromWeights == nullptr ? + multiplyFromWeights->get_input_node_shared_ptr(0) : + subtractFromWeights->get_input_node_shared_ptr(0)); + if (convertFromWeights != nullptr) { + auto inputs = convolutionBackpropData->input_values(); + inputs[1] = convolutionBackpropData->get_input_node_ptr(1)->input_value(0); + // remove Convert on weights + auto newConvolution = convolutionBackpropData->clone_with_new_inputs(inputs); + replace_node(convolutionBackpropData, newConvolution); + convolutionBackpropData = newConvolution; + } + } + std::shared_ptr finalDequantization = NetworkHelper::optimizeMultipliesAfter( + convolutionBackpropData->output(0).get_target_inputs().begin()->get_node()->shared_from_this()); + ngraph::copy_runtime_info({ convolutionBackpropData, finalDequantization }, finalDequantization); + updateOutput(context, finalDequantization, convolutionBackpropData); + + auto onWeights = convolutionBackpropData->get_input_node_shared_ptr(1); + if (is_type(onWeights)) { + onWeights = onWeights->get_input_node_shared_ptr(0); + } + + if (is_type(onWeights)) { + auto& rt = onWeights->get_rt_info(); + rt["DISABLED_CONSTANT_FOLDING"] = std::make_shared>(""); + } + + return true; +} + +bool ConvolutionBackpropDataTransformation::canBeTransformed(const TransformationContext& context, std::shared_ptr op) const { + if (deconvolutionSpecificChannelsRatio) { + size_t inputChannels = op->get_input_shape(0)[1]; + size_t outputChannels = op->get_output_shape(0)[1]; + if (inputChannels % 4 != 0 || outputChannels % 16 != 0) { + return false; + } + } + + return canConvolutionBeTransformed(context, op); +} + +} // namespace low_precision +} // namespace pass +} // namespace ngraph diff --git a/inference-engine/src/low_precision_transformations/src/fake_quantize.cpp b/inference-engine/src/low_precision_transformations/src/fake_quantize.cpp index 41b9851d5e3ff9..53fe2702984909 100644 --- a/inference-engine/src/low_precision_transformations/src/fake_quantize.cpp +++ b/inference-engine/src/low_precision_transformations/src/fake_quantize.cpp @@ -20,7 +20,7 @@ void FakeQuantizeTransformation::registerMatcherIn(GraphRewrite& pass, Transform bool FakeQuantizeTransformation::transform(TransformationContext& context, ngraph::pattern::Matcher &m) const { std::shared_ptr layer = std::dynamic_pointer_cast(m.get_match_root()); - if (!NetworkHelper::isQuantizeSupported(layer)) { + if (!QuantizationDetails::outputLayoutIsSupported(layer)) { return false; } @@ -149,7 +149,9 @@ std::shared_ptr FakeQuantizeTransformation::fuseElementwis inputHighConst_f32 = fq::updateShape(fold(inputHighConst_f32, value), fakeQuantize->get_output_shape(0)); } else if (is_type(eltwise) && checkElementwise(eltwise)) { if (is_type(fq::getData(eltwise)) || - is_type(fq::getData(eltwise))) { + is_type(fq::getData(eltwise)) || + is_type(fq::getData(eltwise)) || + is_type(fq::getData(eltwise))) { return nullptr; } diff --git a/inference-engine/src/low_precision_transformations/src/fuse_multiply_to_fake_quantize.cpp b/inference-engine/src/low_precision_transformations/src/fuse_multiply_to_fake_quantize.cpp index c1b7f4e907b6a8..734d9abec435ec 100644 --- a/inference-engine/src/low_precision_transformations/src/fuse_multiply_to_fake_quantize.cpp +++ b/inference-engine/src/low_precision_transformations/src/fuse_multiply_to_fake_quantize.cpp @@ -45,11 +45,18 @@ bool FuseMultiplyToFakeQuantizeTransformation::transform(TransformationContext& const auto fakeQuantizeParent = fakeQuantize->get_input_node_shared_ptr(0); const size_t parentIndex = NetworkHelper::getParentOutputIndex(fakeQuantizeParent, fakeQuantize); + const auto inputLow = foldConvert(fakeQuantize->input_value(1), deqPrecision); + const auto inputHigh = foldConvert(fakeQuantize->input_value(2), deqPrecision); + NetworkHelper::copyInfo(fakeQuantize->get_input_node_shared_ptr(1), inputLow); + NetworkHelper::copyInfo(fakeQuantize->get_input_node_shared_ptr(2), inputHigh); + NetworkHelper::copyInfo(fakeQuantize->get_input_node_shared_ptr(3), outputLowConst_f32); + NetworkHelper::copyInfo(fakeQuantize->get_input_node_shared_ptr(4), outputHighConst_f32); + auto newFakeQuantize = std::make_shared>( opset1::FakeQuantize( fakeQuantizeParent->output(parentIndex), - foldConvert(fakeQuantize->input_value(1), deqPrecision), - foldConvert(fakeQuantize->input_value(2), deqPrecision), + inputLow, + inputHigh, outputLowConst_f32, outputHighConst_f32, fakeQuantize->get_levels()), diff --git a/inference-engine/src/low_precision_transformations/src/fuse_subtract_to_fake_quantize.cpp b/inference-engine/src/low_precision_transformations/src/fuse_subtract_to_fake_quantize.cpp index 2e3f2e23d3f428..8d8d9968802e44 100644 --- a/inference-engine/src/low_precision_transformations/src/fuse_subtract_to_fake_quantize.cpp +++ b/inference-engine/src/low_precision_transformations/src/fuse_subtract_to_fake_quantize.cpp @@ -45,11 +45,18 @@ bool FuseSubtractToFakeQuantizeTransformation::transform(TransformationContext& const auto fakeQuantizeParent = fakeQuantize->get_input_node_shared_ptr(0); const size_t parentIndex = NetworkHelper::getParentOutputIndex(fakeQuantizeParent, fakeQuantize); + const auto inputLow = foldConvert(fakeQuantize->input_value(1), deqPrecision); + const auto inputHigh = foldConvert(fakeQuantize->input_value(2), deqPrecision); + NetworkHelper::copyInfo(fakeQuantize->get_input_node_shared_ptr(1), inputLow); + NetworkHelper::copyInfo(fakeQuantize->get_input_node_shared_ptr(2), inputHigh); + NetworkHelper::copyInfo(fakeQuantize->get_input_node_shared_ptr(3), outputLowConst_f32); + NetworkHelper::copyInfo(fakeQuantize->get_input_node_shared_ptr(4), outputHighConst_f32); + auto newFakeQuantize = std::make_shared>( opset1::FakeQuantize( fakeQuantizeParent->output(parentIndex), - foldConvert(fakeQuantize->input_value(1), deqPrecision), - foldConvert(fakeQuantize->input_value(2), deqPrecision), + inputLow, + inputHigh, outputLowConst_f32, outputHighConst_f32, fakeQuantize->get_levels()), @@ -76,7 +83,8 @@ bool FuseSubtractToFakeQuantizeTransformation::canBeTransformed(const Transforma for (const auto& target : children) { const auto convolution = is_type(target.get_node()); const auto groupConvolution = is_type(target.get_node()); - if (convolution || groupConvolution) { + const auto convolutionBackpropData = is_type(target.get_node()); + if (convolution || groupConvolution || convolutionBackpropData) { return false; } } diff --git a/inference-engine/src/low_precision_transformations/src/layer_transformation.cpp b/inference-engine/src/low_precision_transformations/src/layer_transformation.cpp index 834aa6931c5a61..0fc0a9dc4fc52d 100644 --- a/inference-engine/src/low_precision_transformations/src/layer_transformation.cpp +++ b/inference-engine/src/low_precision_transformations/src/layer_transformation.cpp @@ -32,6 +32,7 @@ LayerTransformation::LayerTransformation(const Params& params) : precisionsOnWeights(params.precisionsOnWeights), deqPrecision(params.deqPrecision), support3DTensorOnActivations(params.support3DTensorOnActivations), + deconvolutionSpecificChannelsRatio(params.deconvolutionSpecificChannelsRatio), quantizationIntervalAsymmetryThreshold(0.002f), zeroThreshold(1.e-6f), minQuantizationLevels(2ul), diff --git a/inference-engine/src/low_precision_transformations/src/network_helper.cpp b/inference-engine/src/low_precision_transformations/src/network_helper.cpp index dbca7606e7322a..4a1e942e5753ba 100644 --- a/inference-engine/src/low_precision_transformations/src/network_helper.cpp +++ b/inference-engine/src/low_precision_transformations/src/network_helper.cpp @@ -69,7 +69,8 @@ bool NetworkHelper::isConstantPath(const std::shared_ptr& op) { return is_type(node) || is_type(node) || is_type(node) || - is_type(node); + is_type(node) || + is_type(node); }; if (isNotConstantPathOperation(op)) { @@ -440,8 +441,11 @@ std::shared_ptr NetworkHelper::fold_fake_quantize(const std::shared_ptr NetworkHelper::fold_fake_quantize(const std::shared_ptr& fq, const bool roundValues) { - return foldFakeQuantize(fq, roundValues, true); +std::shared_ptr NetworkHelper::fold_fake_quantize( + const std::shared_ptr& fq, + const bool roundValues, + const int outChannelsShapeIndex) { + return foldFakeQuantize(fq, roundValues, true, outChannelsShapeIndex); } FakeQuantizeDequantization NetworkHelper::foldDequantization(const std::shared_ptr& node, const size_t branchIndex, const bool inPlace) { @@ -591,7 +595,8 @@ std::shared_ptr NetworkHelper::fuseConvert(const std::shar std::shared_ptr NetworkHelper::foldFakeQuantize( const std::shared_ptr& fq, const bool roundValuesArg, - const bool roundValuesWasSet) { + const bool roundValuesWasSet, + const int outChannelsShapeIndex) { if (is_type(fq->get_input_node_shared_ptr(0)) && is_type(fq->get_input_node_shared_ptr(1)) && is_type(fq->get_input_node_shared_ptr(2)) && @@ -630,10 +635,20 @@ std::shared_ptr NetworkHelper::foldFakeQuantize( if (constShape.empty() || constShape.size() > 5lu) { THROW_IE_LPT_EXCEPTION(*fq) << "Unexpected dimensions count " << constShape.size(); } + if (outChannelsShapeIndex != 0 && outChannelsShapeIndex != 1) { + THROW_IE_LPT_EXCEPTION(*fq) << "Unexpected outChannelsShapeIndex " << outChannelsShapeIndex; + } - // OIDHW - const size_t OC = constShape[0]; - const size_t IC = constShape.size() > 1lu ? constShape[1] : 1; + size_t OC; + size_t IC; + // OIDHW or IODHW + if (constShape.size() == 1) { + OC = constShape[0]; + IC = 1; + } else { + OC = constShape[outChannelsShapeIndex]; + IC = constShape[outChannelsShapeIndex == 0 ? 1 : 0]; + } const size_t D = constShape.size() > 4lu ? constShape[constShape.size() - 3] : 1; const size_t H = constShape.size() > 2lu ? constShape.size() == 3lu ? constShape[2] : constShape[constShape.size() - 2] : 1; const size_t W = constShape.size() > 3lu ? constShape[constShape.size() - 1] : 1; @@ -667,29 +682,35 @@ std::shared_ptr NetworkHelper::foldFakeQuantize( auto levels_1 = fq->get_levels() - 1.f; - //const size_t DHW = D * H * W; + const size_t DHW = D * H * W; const size_t IDHW = IC * D * H * W; const auto values = constant->cast_vector(); std::vector quantizedValues(OC * IC * D * H * W); for (size_t oc = 0; oc < OC; ++oc) { - for (size_t iidx = 0; iidx < IDHW; ++iidx) { - const float inputLow = inputLowValues[isInputLowBroadcasted ? 0 : oc]; - const float inputHigh = inputHighValues[isInputHighBroadcasted ? 0 : oc]; - const float outputLow = outputLowValues[isOutputLowBroadcasted ? 0 : oc]; - const float outputHigh = outputHighValues[isOutputHighBroadcasted ? 0 : oc]; - - const size_t idx = oc * IDHW + iidx; - - if (values[idx] <= inputLow) { - quantizedValues[idx] = roundValues ? std::roundf(outputLow) : outputLow; - } else if (values[idx] > inputHigh) { - quantizedValues[idx] = roundValues ? std::roundf(outputHigh) : outputHigh; - } else { - const float value = std::roundf((values[idx] - inputLow) / (inputHigh - inputLow) * levels_1) / - levels_1 * (outputHigh - outputLow) + outputLow; - quantizedValues[idx] = roundValues ? std::roundf(value) : value; + const float inputLow = inputLowValues[isInputLowBroadcasted ? 0 : oc]; + const float inputHigh = inputHighValues[isInputHighBroadcasted ? 0 : oc]; + const float outputLow = outputLowValues[isOutputLowBroadcasted ? 0 : oc]; + const float outputHigh = outputHighValues[isOutputHighBroadcasted ? 0 : oc]; + for (size_t ic = 0; ic < IC; ++ic) { + for (size_t iidx = 0; iidx < DHW; ++iidx) { + size_t idx; + if (outChannelsShapeIndex == 0) { + idx = oc * IDHW + ic * DHW + iidx; + } else { + idx = ic * IDHW + oc * DHW + iidx; + } + + if (values[idx] <= inputLow) { + quantizedValues[idx] = roundValues ? std::roundf(outputLow) : outputLow; + } else if (values[idx] > inputHigh) { + quantizedValues[idx] = roundValues ? std::roundf(outputHigh) : outputHigh; + } else { + const float value = std::roundf((values[idx] - inputLow) / (inputHigh - inputLow) * levels_1) / + levels_1 * (outputHigh - outputLow) + outputLow; + quantizedValues[idx] = roundValues ? std::roundf(value) : value; + } } } } @@ -818,7 +839,8 @@ std::tuple, std::shared_ptr> NetworkHelper::decompos const float max, const bool hasZeroPoint, const bool updatePrecision, - const element::Type deqPrecision) { + const element::Type deqPrecision, + const size_t outChannelsShapeIndex) { using std::make_shared; const auto outputLow = fq->input_value(3); @@ -898,7 +920,8 @@ std::tuple, std::shared_ptr> NetworkHelper::decompos newMax->output(0), fq->get_levels(), fq->get_auto_broadcast()), - true); + true, + outChannelsShapeIndex); NetworkHelper::copyInfo(fq, newFQ); std::shared_ptr convert2; @@ -1548,12 +1571,12 @@ bool NetworkHelper::checkZeroPoint(const std::shared_ptr& node, const Data if (is_type(node)) { const auto parent = node->get_input_node_shared_ptr(0); const auto intNode = is_type(parent) ? parent : node; - const auto intType = intNode->get_input_element_type(0); - if (intType == element::u8 || intType == element::i8) { - min = DataPrecision::getMinValue(intType, 256) - 0.5f; - max = DataPrecision::getMaxValue(intType, 256) + 0.5f; + const auto type = intNode->get_input_element_type(0); + if (type == element::u8 || type == element::i8) { + min = DataPrecision::getMinValue(type, 256) - 0.5f; + max = DataPrecision::getMaxValue(type, 256) + 0.5f; } else { - return false; + return type == element::f32 || type == element::f16; } auto subtract1input = node->get_input_node_shared_ptr(1); if (is_type(subtract1input)) { @@ -1595,6 +1618,23 @@ bool NetworkHelper::checkZeroPoint(const std::shared_ptr& node, const Data return true; } +std::vector NetworkHelper::precisionIntersection( + const std::vector& v1, + const std::vector& v2) noexcept { + std::vector v3; + + auto v1Copy = v1; + auto v2Copy = v2; + + std::sort(v1Copy.begin(), v1Copy.end()); + std::sort(v2Copy.begin(), v2Copy.end()); + + std::set_intersection(v1Copy.begin(), v1Copy.end(), + v2Copy.begin(), v2Copy.end(), + std::back_inserter(v3)); + return v3; +} + } // namespace low_precision } // namespace pass } // namespace ngraph diff --git a/inference-engine/src/low_precision_transformations/src/transformer.cpp b/inference-engine/src/low_precision_transformations/src/transformer.cpp index d8b484bcbcebc1..4debb5868b6d96 100644 --- a/inference-engine/src/low_precision_transformations/src/transformer.cpp +++ b/inference-engine/src/low_precision_transformations/src/transformer.cpp @@ -34,6 +34,7 @@ #include "low_precision/avg_pool.hpp" #include "low_precision/clamp.hpp" #include "low_precision/convolution.hpp" +#include "low_precision/convolution_backprop_data.hpp" #include "low_precision/depth_to_space.hpp" #include "low_precision/fake_quantize.hpp" #include "low_precision/group_convolution.hpp" @@ -220,6 +221,7 @@ LowPrecisionTransformations LowPrecisionTransformer::getAllTransformations(const add(params). add(params). add(params). + add(params). add(params). add(params). add(params). @@ -338,6 +340,7 @@ TypeRelaxedReplacer::TypeRelaxedReplacer() { make_matcher_type_relaxed(this); make_matcher_type_relaxed(this); make_matcher_type_relaxed(this); + make_matcher_type_relaxed(this); make_matcher_type_relaxed(this); make_matcher_type_relaxed(this); make_matcher_type_relaxed(this); @@ -430,23 +433,6 @@ void LowPrecisionTransformer::transform(std::shared_ptr network) { network->validate_nodes_and_infer_types(); } -std::vector LowPrecisionTransformer::precisionIntersection( - const std::vector& v1, - const std::vector& v2) const noexcept { - std::vector v3; - - auto v1Copy = v1; - auto v2Copy = v2; - - std::sort(v1Copy.begin(), v1Copy.end()); - std::sort(v2Copy.begin(), v2Copy.end()); - - std::set_intersection(v1Copy.begin(), v1Copy.end(), - v2Copy.begin(), v2Copy.end(), - std::back_inserter(v3)); - return v3; -} - std::vector LowPrecisionTransformer::getPrecisionsOnActivations(const Node& op) const noexcept { const std::string operantionType = LowPrecisionTransformations::getType(op); const std::vector transformation = transformations.find(operantionType); @@ -456,7 +442,7 @@ std::vector LowPrecisionTransformer::getPrecisionsOnActivations(c std::vector precisions = transformation[0]->getPrecisionsOnActivations(); for (const auto& transform : transformation) { - precisions = precisionIntersection(precisions, transform->getPrecisionsOnActivations()); + precisions = NetworkHelper::precisionIntersection(precisions, transform->getPrecisionsOnActivations()); } return precisions; } diff --git a/inference-engine/src/low_precision_transformations/src/weightable_layer_transformation.cpp b/inference-engine/src/low_precision_transformations/src/weightable_layer_transformation.cpp index b3651cdf231b09..726fc893975594 100644 --- a/inference-engine/src/low_precision_transformations/src/weightable_layer_transformation.cpp +++ b/inference-engine/src/low_precision_transformations/src/weightable_layer_transformation.cpp @@ -26,7 +26,7 @@ bool WeightableLayerTransformation::canConvolutionBeTransformed(const Transforma return false; } - if (updatePrecisions && !NetworkHelper::checkZeroPoint(dequantization.subtract)) { + if (!NetworkHelper::checkZeroPoint(dequantization.subtract)) { return false; } @@ -46,24 +46,10 @@ bool WeightableLayerTransformation::canConvolutionBeTransformed(const Transforma return false; } if (!NetworkHelper::checkZeroPoint(fqOnWeights, dataPrecision)) { - const std::shared_ptr resultConstant = NetworkHelper::fold_fake_quantize(fqOnWeights); - if (as_type_ptr(resultConstant)) { - replace_node(fqOnWeights, resultConstant); - } return false; } } else { if (!NetworkHelper::checkZeroPoint(dequantization.subtract)) { - const auto resultDequantization = NetworkHelper::foldDequantization(dequantization.multiply, 0, true); - if (resultDequantization.empty() && reshapeFromWeights) { - const auto foldedReshape = fold( - reshapeFromWeights->get_input_node_shared_ptr(0), - reshapeFromWeights->get_input_node_shared_ptr(1), - reshapeFromWeights->get_special_zero()); - if (is_type(foldedReshape)) { - replace_node(reshapeFromWeights, foldedReshape); - } - } return false; } } @@ -170,9 +156,11 @@ bool WeightableLayerTransformation::canBeTransformed(const TransformationContext return false; } - if ( // Check if all dimensions of scale except the first one (which is O-Output channels dimension) are all ones - (shape_size(constOutputShape) != constOutputShape[0]) || - ((constOutputShape[0] != 1ul) && (fqFromWeights->get_output_shape(0)[0] != constOutputShape[0]))) { + const size_t outChannelsShapeIndex = is_type(layer) ? 1ul : 0ul; + if ( // Check if all dimensions of scale except the output channels are all ones + (shape_size(constOutputShape) != constOutputShape[outChannelsShapeIndex]) || + ((constOutputShape[outChannelsShapeIndex] != 1ul) && + (fqFromWeights->get_output_shape(0)[outChannelsShapeIndex] != constOutputShape[outChannelsShapeIndex]))) { return false; } } else { @@ -256,7 +244,7 @@ bool WeightableLayerTransformation::isPrecisionPreserved(std::shared_ptr l return false; } -void WeightableLayerTransformation::decomposeFakeQuantizeForWeightsPath(std::shared_ptr node) const { +void WeightableLayerTransformation::decomposeFakeQuantizeForWeightsPath(const std::shared_ptr& node, const size_t outChannelsShapeIndex) const { const auto fq = getFakeQuantizeOnWeights(node); if (fq == nullptr) { return; @@ -270,7 +258,9 @@ void WeightableLayerTransformation::decomposeFakeQuantizeForWeightsPath(std::sha dataPrecision.min, dataPrecision.max, dataPrecision.hasZeroPoint, - updatePrecisions); + updatePrecisions, + element::f32, + outChannelsShapeIndex); std::shared_ptr fqOnWeights = std::get<0>(tuple); if (as_type_ptr(fqOnWeights) == nullptr) { diff --git a/inference-engine/src/mkldnn_plugin/mkldnn/iml_type_mapper.cpp b/inference-engine/src/mkldnn_plugin/mkldnn/iml_type_mapper.cpp index bdc6795b13731f..c7278944df0cba 100644 --- a/inference-engine/src/mkldnn_plugin/mkldnn/iml_type_mapper.cpp +++ b/inference-engine/src/mkldnn_plugin/mkldnn/iml_type_mapper.cpp @@ -9,7 +9,7 @@ using namespace MKLDNNPlugin; impl_desc_type MKLDNNPlugin::parse_impl_name(std::string impl_desc_name) { impl_desc_type res = impl_desc_type::unknown; -#define REPLACE_WORD(_wrd, _sub) int pos = impl_desc_name.find(#_wrd); \ +#define REPLACE_WORD(_wrd, _sub) auto pos = impl_desc_name.find(#_wrd); \ if (pos != std::string::npos) impl_desc_name.replace(pos, std::string(#_wrd).length(), #_sub); REPLACE_WORD(simple, ref); diff --git a/inference-engine/src/mkldnn_plugin/mkldnn_plugin.cpp b/inference-engine/src/mkldnn_plugin/mkldnn_plugin.cpp index 32efc8d09ac43a..3ab7622ac91d24 100644 --- a/inference-engine/src/mkldnn_plugin/mkldnn_plugin.cpp +++ b/inference-engine/src/mkldnn_plugin/mkldnn_plugin.cpp @@ -76,6 +76,7 @@ #include #include #include +#include #include #include #include @@ -328,7 +329,8 @@ static void Transformation(CNNNetwork& clonedNetwork, const Config& conf) { .add( LayerTransformation::Params(params).setPrecisionsOnActivations({ ngraph::element::u8 }).setSupportAsymmetricQuantization(true)) .addStandaloneCleanup( - LayerTransformation::Params(params).setPrecisionsOnActivations({ ngraph::element::u8 }))); + LayerTransformation::Params(params).setPrecisionsOnActivations({ ngraph::element::u8 })) + .remove()); transformer.transform(nGraphFunc); } diff --git a/inference-engine/src/mkldnn_plugin/nodes/cum_sum.cpp b/inference-engine/src/mkldnn_plugin/nodes/cum_sum.cpp index 2ed69db46b1892..8940527713cd36 100644 --- a/inference-engine/src/mkldnn_plugin/nodes/cum_sum.cpp +++ b/inference-engine/src/mkldnn_plugin/nodes/cum_sum.cpp @@ -159,7 +159,7 @@ class CumSumImpl: public ExtLayerBase { for (size_t iwork = start; iwork < end; ++iwork) { std::vector forStartOffset(numOfDims); forStartOffset[axis] = 0; - for (int64_t offsetIdx = 0, countersIdx = 0; offsetIdx < numOfDims; ++offsetIdx) { + for (size_t offsetIdx = 0, countersIdx = 0; offsetIdx < numOfDims; ++offsetIdx) { if (offsetIdx == axis) { continue; } diff --git a/inference-engine/src/mkldnn_plugin/nodes/mkldnn_fake_quantize_node.cpp b/inference-engine/src/mkldnn_plugin/nodes/mkldnn_fake_quantize_node.cpp index 0a5ad38507bda5..b12bed6a47672b 100644 --- a/inference-engine/src/mkldnn_plugin/nodes/mkldnn_fake_quantize_node.cpp +++ b/inference-engine/src/mkldnn_plugin/nodes/mkldnn_fake_quantize_node.cpp @@ -10,6 +10,7 @@ #include #include #include "utils/general_utils.h" +#include "utils/cpu_utils.hpp" #include #include @@ -841,7 +842,7 @@ bool MKLDNNFakeQuantizeNode::isSupportedOperation(const std::shared_ptrget_input_size(); i++) { size_t count_not_unit_axis = 0; - auto shape = fq->get_input_shape(i); + auto shape = getNormalizedDimsBySize(fq->get_input_shape(i), fq->get_input_shape(0).size()); if (ngraph::shape_size(shape) != 1) { size_t not_unit_axis = 0; @@ -885,9 +886,7 @@ MKLDNNFakeQuantizeNode::MKLDNNFakeQuantizeNode(const std::shared_ptrget_output_size() != 1) IE_THROW() << errorPrefix << "has incorrect number of output edges: " << fq->get_output_size(); - auto initAxisIdx = [&](size_t edgeIdx) { - const auto &inputDims = fq->get_input_shape(edgeIdx); - + auto initAxisIdx = [&](const ngraph::Shape& inputDims) { size_t axisIdx = 0; for (int i = 1; i < inputDims.size(); i++) { if (inputDims[i] > 1) { @@ -898,35 +897,36 @@ MKLDNNFakeQuantizeNode::MKLDNNFakeQuantizeNode(const std::shared_ptrget_input_shape(0).size() == 1 ? 0 : 1; + const size_t dataNDims = fq->get_input_shape(0).size(); + axis = dataNDims == 1 ? 0 : 1; int axisSize = -1; - auto inputLowAxis = initAxisIdx(1); - const auto ilShape = fq->get_input_shape(1); + const auto ilShape = getNormalizedDimsBySize(fq->get_input_shape(1), dataNDims); + auto inputLowAxis = initAxisIdx(ilShape); isInputLowBroadcasted = (ngraph::is_scalar(ilShape) || ilShape[inputLowAxis] == 1); if (!isInputLowBroadcasted) { axis = inputLowAxis; axisSize = ilShape[inputLowAxis]; } - auto inputHighAxis = initAxisIdx(2); - const auto ihShape = fq->get_input_shape(2); + const auto ihShape = getNormalizedDimsBySize(fq->get_input_shape(2), dataNDims); + auto inputHighAxis = initAxisIdx(ihShape); isInputHighBroadcasted = (ngraph::is_scalar(ihShape) || ihShape[inputHighAxis] == 1); if (!isInputHighBroadcasted) { axis = inputHighAxis; axisSize = ihShape[inputHighAxis]; } - auto outputLowAxis = initAxisIdx(3); - const auto olShape = fq->get_input_shape(3); + const auto olShape = getNormalizedDimsBySize(fq->get_input_shape(3), dataNDims); + auto outputLowAxis = initAxisIdx(olShape); isOutputLowBroadcasted = (ngraph::is_scalar(olShape) || olShape[outputLowAxis] == 1); if (!isOutputLowBroadcasted) { axis = outputLowAxis; axisSize = olShape[outputLowAxis]; } - auto outputHighAxis = initAxisIdx(4); - const auto ohShape = fq->get_input_shape(4); + const auto ohShape = getNormalizedDimsBySize(fq->get_input_shape(4), dataNDims); + auto outputHighAxis = initAxisIdx(ohShape); isOutputHighBroadcasted = (ngraph::is_scalar(ohShape) || ohShape[outputHighAxis] == 1); if (!isOutputHighBroadcasted) { axis = outputHighAxis; diff --git a/inference-engine/src/offline_transformations/src/moc_transformations.cpp b/inference-engine/src/offline_transformations/src/moc_transformations.cpp index eec2491a28edbf..35f4a575c15c74 100644 --- a/inference-engine/src/offline_transformations/src/moc_transformations.cpp +++ b/inference-engine/src/offline_transformations/src/moc_transformations.cpp @@ -5,10 +5,16 @@ #include #include "moc_transformations.hpp" +#include "pruning.hpp" +#include NGRAPH_RTTI_DEFINITION(ngraph::pass::MOCTransformations, "MOCTransformations", 0); -bool ngraph::pass::MOCTransformations::run_on_function(std::shared_ptr) { +bool ngraph::pass::MOCTransformations::run_on_function(std::shared_ptr f) { + ngraph::pass::Manager m(get_pass_config()); + m.register_pass(); + m.run_passes(f); + return false; } \ No newline at end of file diff --git a/inference-engine/src/snippets/CMakeLists.txt b/inference-engine/src/snippets/CMakeLists.txt index 482f7e52bec362..45e4b9bb352fa2 100644 --- a/inference-engine/src/snippets/CMakeLists.txt +++ b/inference-engine/src/snippets/CMakeLists.txt @@ -52,7 +52,8 @@ ie_developer_export_targets(${TARGET_NAME}) # install -install(TARGETS ${TARGET_NAME} - RUNTIME DESTINATION ${IE_CPACK_RUNTIME_PATH} COMPONENT core - ARCHIVE DESTINATION ${IE_CPACK_ARCHIVE_PATH} COMPONENT core - LIBRARY DESTINATION ${IE_CPACK_LIBRARY_PATH} COMPONENT core) +# TODO: uncomment once snippets are integrated into CPU plugin +# install(TARGETS ${TARGET_NAME} +# RUNTIME DESTINATION ${IE_CPACK_RUNTIME_PATH} COMPONENT core +# ARCHIVE DESTINATION ${IE_CPACK_ARCHIVE_PATH} COMPONENT core +# LIBRARY DESTINATION ${IE_CPACK_LIBRARY_PATH} COMPONENT core) diff --git a/inference-engine/src/transformations/include/ngraph_ops/deconvolution_ie.hpp b/inference-engine/src/transformations/include/ngraph_ops/deconvolution_ie.hpp index 96422f61e3b6c8..3aa4a6492d123f 100644 --- a/inference-engine/src/transformations/include/ngraph_ops/deconvolution_ie.hpp +++ b/inference-engine/src/transformations/include/ngraph_ops/deconvolution_ie.hpp @@ -29,6 +29,7 @@ class TRANSFORMATIONS_API DeconvolutionIE : public Op { const Strides& dilations, const CoordinateDiff& pads_begin, const CoordinateDiff& pads_end, + const element::Type output_type, const size_t& group = 1, const PadType& auto_pad = PadType::EXPLICIT, const CoordinateDiff& output_padding = {}, @@ -41,6 +42,7 @@ class TRANSFORMATIONS_API DeconvolutionIE : public Op { const Strides& dilations, const CoordinateDiff& pads_begin, const CoordinateDiff& pads_end, + const element::Type output_type, const size_t& group = 1, const PadType& auto_pad = PadType::EXPLICIT, const CoordinateDiff& output_padding = {}, @@ -79,6 +81,7 @@ class TRANSFORMATIONS_API DeconvolutionIE : public Op { size_t m_group; CoordinateDiff m_output_padding; std::shared_ptr m_output_shape; + element::Type m_output_type; }; } // namespace op diff --git a/inference-engine/src/transformations/include/transformations/op_conversions/convert_padded2valid_conv.hpp b/inference-engine/src/transformations/include/transformations/op_conversions/convert_padded2valid_conv.hpp new file mode 100644 index 00000000000000..ef5983c1ee6cd3 --- /dev/null +++ b/inference-engine/src/transformations/include/transformations/op_conversions/convert_padded2valid_conv.hpp @@ -0,0 +1,26 @@ +// Copyright (C) 2021 Intel Corporation +// SPDX-License-Identifier: Apache-2.0 +// + +#pragma once + +#include +#include + +namespace ngraph { +namespace pass { + + class TRANSFORMATIONS_API ConvertPadded2ValidConv; + +} // namespace pass +} // namespace ngraph + +/** + * @ingroup ie_transformation_common_api + * @brief ConvertPadded2ValidConv transformation breaks down 2d conv into set of 1d conv. + */ +class ngraph::pass::ConvertPadded2ValidConv : public ngraph::pass::FunctionPass { +public: + NGRAPH_RTTI_DECLARATION; + bool run_on_function(std::shared_ptr f) override; +}; diff --git a/inference-engine/src/transformations/include/transformations/op_conversions/einsum_decomposition.hpp b/inference-engine/src/transformations/include/transformations/op_conversions/einsum_decomposition.hpp new file mode 100644 index 00000000000000..68281a94b75b82 --- /dev/null +++ b/inference-engine/src/transformations/include/transformations/op_conversions/einsum_decomposition.hpp @@ -0,0 +1,28 @@ +// Copyright (C) 2021 Intel Corporation +// SPDX-License-Identifier: Apache-2.0 +// + +#pragma once + +#include +#include +#include + +namespace ngraph { +namespace pass { + +class TRANSFORMATIONS_API EinsumDecomposition; + +} // namespace pass +} // namespace ngraph + +/** + * @ingroup ie_transformation_common_api + * @brief EinsumDecomposition transformation decomposes Einsum-7 operation into a sub-graph with more simple operations: + * Transpose, Reshape, MatMul, ReduceSum, Unsqueeze, ShapeOf, ReduceProd, StridedSlice, and Concat + */ +class ngraph::pass::EinsumDecomposition : public ngraph::pass::MatcherPass { +public: + NGRAPH_RTTI_DECLARATION; + EinsumDecomposition(); +}; diff --git a/inference-engine/src/transformations/src/ngraph_ops/deconvolution_ie.cpp b/inference-engine/src/transformations/src/ngraph_ops/deconvolution_ie.cpp index ef9bc90bd1ea8c..e8940700dbeece 100644 --- a/inference-engine/src/transformations/src/ngraph_ops/deconvolution_ie.cpp +++ b/inference-engine/src/transformations/src/ngraph_ops/deconvolution_ie.cpp @@ -13,6 +13,7 @@ #include "ngraph/util.hpp" #include "ngraph/validation_util.hpp" #include "ngraph/opsets/opset1.hpp" +#include "ngraph_ops/type_relaxed.hpp" using namespace std; using namespace ngraph; @@ -25,6 +26,7 @@ op::DeconvolutionIE::DeconvolutionIE(const Output& data, const Strides& dilations, const CoordinateDiff& pads_begin, const CoordinateDiff& pads_end, + const element::Type output_type, const size_t& group, const PadType& auto_pad, const CoordinateDiff& output_padding, @@ -37,7 +39,8 @@ op::DeconvolutionIE::DeconvolutionIE(const Output& data, , m_auto_pad(auto_pad) , m_group(group) , m_output_padding(output_padding) - , m_output_shape(output_shape) { + , m_output_shape(output_shape) + , m_output_type(output_type) { constructor_validate_and_infer_types(); } @@ -48,6 +51,7 @@ op::DeconvolutionIE::DeconvolutionIE(const Output& data, const Strides& dilations, const CoordinateDiff& pads_begin, const CoordinateDiff& pads_end, + const element::Type output_type, const size_t& group, const PadType& auto_pad, const CoordinateDiff& output_padding, @@ -60,7 +64,8 @@ op::DeconvolutionIE::DeconvolutionIE(const Output& data, , m_auto_pad(auto_pad) , m_group(group) , m_output_padding(output_padding) - , m_output_shape(output_shape) { + , m_output_shape(output_shape) + , m_output_type(output_type) { constructor_validate_and_infer_types(); } @@ -81,13 +86,32 @@ void op::DeconvolutionIE::validate_and_infer_types() { } Output conv; if (m_output_shape) { - conv = std::make_shared(input_value(0), weights, m_output_shape, - m_strides, m_pads_begin, m_pads_end, m_dilations, m_auto_pad, m_output_padding); + conv = std::make_shared>( + std::vector{ element::f32, element::f32 }, + std::vector{ element::f32 }, + ngraph::op::TemporaryReplaceOutputType(input_value(0), element::f32).get(), + ngraph::op::TemporaryReplaceOutputType(weights, element::f32).get(), + m_output_shape, + m_strides, + m_pads_begin, + m_pads_end, + m_dilations, + m_auto_pad, + m_output_padding); } else { - conv = std::make_shared(input_value(0), weights, - m_strides, m_pads_begin, m_pads_end, m_dilations, m_auto_pad, m_output_padding); + conv = std::make_shared>( + std::vector{ element::f32, element::f32 }, + std::vector{ element::f32 }, + ngraph::op::TemporaryReplaceOutputType(input_value(0), element::f32).get(), + ngraph::op::TemporaryReplaceOutputType(weights, element::f32).get(), + m_strides, + m_pads_begin, + m_pads_end, + m_dilations, + m_auto_pad, + m_output_padding); } - set_output_type(0, conv.get_element_type(), conv.get_partial_shape()); + set_output_type(0, m_output_type, conv.get_partial_shape()); } shared_ptr op::DeconvolutionIE::clone_with_new_inputs(const ngraph::OutputVector &new_args) const { @@ -99,6 +123,7 @@ shared_ptr op::DeconvolutionIE::clone_with_new_inputs(const ngraph::Output m_dilations, m_pads_begin, m_pads_end, + m_output_type, m_group, m_auto_pad, m_output_padding, @@ -111,6 +136,7 @@ shared_ptr op::DeconvolutionIE::clone_with_new_inputs(const ngraph::Output m_dilations, m_pads_begin, m_pads_end, + m_output_type, m_group, m_auto_pad, m_output_padding, diff --git a/inference-engine/src/transformations/src/transformations/common_optimizations/common_optimizations.cpp b/inference-engine/src/transformations/src/transformations/common_optimizations/common_optimizations.cpp index b8aaa7d09ef201..bd44380f6275d3 100644 --- a/inference-engine/src/transformations/src/transformations/common_optimizations/common_optimizations.cpp +++ b/inference-engine/src/transformations/src/transformations/common_optimizations/common_optimizations.cpp @@ -58,6 +58,7 @@ #include "transformations/op_conversions/convert_gelu.hpp" #include "transformations/op_conversions/convert_interpolate1_to_interpolate4.hpp" #include "transformations/op_conversions/batch_norm_decomposition.hpp" +#include "transformations/op_conversions/einsum_decomposition.hpp" #include "transformations/op_conversions/gelu7_downgrade.hpp" #include "transformations/op_conversions/reduce_l1_decomposition.hpp" #include "transformations/op_conversions/reduce_l2_decomposition.hpp" @@ -146,6 +147,7 @@ bool ngraph::pass::CommonOptimizations::run_on_function(std::shared_ptradd_matcher(); decomp->add_matcher(); decomp->add_matcher(); + decomp->add_matcher(); decomp->set_name("ngraph::pass::CommonDecompositions"); // CF is required after all decompositions diff --git a/inference-engine/src/transformations/src/transformations/op_conversions/convert_convolutions.cpp b/inference-engine/src/transformations/src/transformations/op_conversions/convert_convolutions.cpp index 5b7965762a59c5..1f0fb32ae6be46 100644 --- a/inference-engine/src/transformations/src/transformations/op_conversions/convert_convolutions.cpp +++ b/inference-engine/src/transformations/src/transformations/op_conversions/convert_convolutions.cpp @@ -113,6 +113,7 @@ ngraph::pass::ConvertDeconvolution::ConvertDeconvolution() { deconv->get_dilations(), deconv->get_pads_begin(), deconv->get_pads_end(), + deconv->get_output_element_type(0), 1 /* groups */, deconv->get_auto_pad(), deconv->get_output_padding(), @@ -158,6 +159,7 @@ ngraph::pass::ConvertGroupDeconvolution::ConvertGroupDeconvolution() { gconv->get_dilations(), gconv->get_pads_begin(), gconv->get_pads_end(), + gconv->get_output_element_type(0), group, gconv->get_auto_pad(), gconv->get_output_padding(), diff --git a/inference-engine/src/transformations/src/transformations/op_conversions/convert_padded2valid_conv.cpp b/inference-engine/src/transformations/src/transformations/op_conversions/convert_padded2valid_conv.cpp new file mode 100644 index 00000000000000..b5d1023a20d09e --- /dev/null +++ b/inference-engine/src/transformations/src/transformations/op_conversions/convert_padded2valid_conv.cpp @@ -0,0 +1,372 @@ +// Copyright (C) 2021 Intel Corporation +// SPDX-License-Identifier: Apache-2.0 +// + +#include "transformations/op_conversions/convert_padded2valid_conv.hpp" + +#include + +#include +#include +#include +#include +#include +#include "itt.hpp" + +using namespace ngraph; +using namespace op; + +static bool TransposeOrderMatches(std::shared_ptr transpose, std::vector order) { + if (!transpose) + return false; + const Output& transpose_order = transpose->input_value(1); + auto transpose_order_dim = transpose_order.get_shape().size(); + + if (transpose_order_dim != 1 || transpose_order.get_shape()[0] != order.size()) + return false; + + auto const_with_order_values = std::dynamic_pointer_cast(transpose_order.get_node_shared_ptr()); + if (!const_with_order_values) + return false; + + const int64_t* data = const_with_order_values->get_data_ptr(); + if (!data) + return false; + + for (size_t i = 0; i < order.size(); i++) { + if (order[i] != data[i]) + return false; + } + + return true; +} + +static std::shared_ptr FlatCrop(Output input, size_t offset, size_t size) { + auto shape = input.get_shape(); + if (shape.size() == 1) { + return std::make_shared( + input, // data + ngraph::opset1::Constant::create(ngraph::element::i64, ngraph::Shape{ 1 }, { offset }), // begin slice index + ngraph::opset1::Constant::create(ngraph::element::i64, ngraph::Shape{ 1 }, { offset + size }), // end slice index + ngraph::opset1::Constant::create(ngraph::element::i64, ngraph::Shape{ 1 }, { 1 }), // strides + std::vector{0}, // begin mask + std::vector{0}); // end mask + } else if (shape.size() == 2) { + return std::make_shared( + input, // data + ngraph::opset1::Constant::create(ngraph::element::i64, ngraph::Shape{ 2 }, { (size_t)0, offset }), // begin sice index + ngraph::opset1::Constant::create(ngraph::element::i64, ngraph::Shape{ 2 }, { (size_t)0, offset + size }), // end slice index + ngraph::opset1::Constant::create(ngraph::element::i64, ngraph::Shape{ 2 }, { (size_t)1, (size_t)1 }), // strides + std::vector{1, 0}, // begin mask + std::vector{1, 0}); // end mask + } + return nullptr; +} + +NGRAPH_RTTI_DEFINITION(ngraph::pass::ConvertPadded2ValidConv, "ConvertPadded2ValidConv", 0); +bool ngraph::pass::ConvertPadded2ValidConv::run_on_function(std::shared_ptr f) { + // Traverse nGraph Function in topological order + bool is_graph_modfied = false; + for (auto& node : f->get_ordered_ops()) { + auto conv = std::dynamic_pointer_cast (node); + if (nullptr == conv || transformation_callback(conv)) { + continue; + } + + const Output& input = conv->input_value(0); + const Output& filters = conv->input_value(1); + auto output_shape = conv->get_output_shape(0); + auto padding_type = conv->get_auto_pad(); + + // we support only 2D conv batch 1 + if (input.get_shape().size() != 4 || + filters.get_shape().size() != 4 || + output_shape.size() != 4 || + conv->get_dilations().size() != 2 || + conv->get_strides().size() != 2 || + input.get_shape()[0] != 1) { + continue; + } + // we are looking for Transpose(NHWC->NCHW) => conv => Transpose(NCHW->NHWC) + // so required network must be in NHWC order like in TF + // supported cases: + // - Transpose(NHWC->NCHW) => conv => Transpose(NCHW->NHWC) + // - Transpose(NHWC->NCHW) => conv => broadcasted add (BIAS) => Transpose(NCHW->NHWC) + // - Transpose(NHWC->NCHW) => conv => broadcasted add (BIAS) => MaxPooling => Transpose(NCHW->NHWC) (2d max pool case) + // - Transpose(NHWC->NCHW) => conv => broadcasted add (BIAS) => ActivationFunction => Transpose(NCHW->NHWC) + // - Transpose(NHWC->NCHW) => conv => broadcasted add (BIAS) => MaxPool => ActivationFunction => Transpose(NCHW->NHWC) + // - Transpose(NHWC->NCHW) => conv => Transpose(NCHW->NHWC) => BIAS (output of MO --disable_nhwc_to_nchw option) + // - Transpose(NHWC->NCHW) => conv => Transpose(NCHW->NHWC) => BIAS => AF (output of MO --disable_nhwc_to_nchw option) + auto leading_transpose = std::dynamic_pointer_cast(input.get_node_shared_ptr()); + if (!leading_transpose || !TransposeOrderMatches(leading_transpose, { 0, 3, 1, 2 })) + continue; + + // check if convolution output port is connected with only one Op + auto output_0 = node->get_output_target_inputs(0); + if (output_0.size() != 1) + continue; + + auto filter_values = std::dynamic_pointer_cast(filters.get_node_shared_ptr()); + if (!filter_values) { + continue; + } + size_t input_channel_count = input.get_shape()[1]; + size_t input_height = input.get_shape()[2]; + size_t input_width = input.get_shape()[3]; + + size_t filter_count = filters.get_shape()[0]; + + size_t filter_height = filters.get_shape()[2]; + size_t filter_width = filters.get_shape()[3]; + + auto output_0_node = output_0.begin()->get_node()->shared_from_this(); + auto trailing_transpose = std::dynamic_pointer_cast(output_0_node); + auto conv_bias = std::dynamic_pointer_cast(output_0_node); + auto max_pool = std::dynamic_pointer_cast(output_0_node); + auto af = std::dynamic_pointer_cast(output_0_node); + std::shared_ptrlast_op_in_sequence_for_replacement = trailing_transpose; + + std::shared_ptr bias_const; + if (leading_transpose && trailing_transpose && conv) { + auto trailing_transpose_output_0 = trailing_transpose->get_output_target_inputs(0); + if (trailing_transpose_output_0.size() == 1) { + auto trailing_transpose_output_0_node = trailing_transpose_output_0.begin()->get_node()->shared_from_this(); + auto add_op = std::dynamic_pointer_cast(trailing_transpose_output_0_node); + max_pool = std::dynamic_pointer_cast(trailing_transpose_output_0_node); + af = std::dynamic_pointer_cast(trailing_transpose_output_0_node); + if (add_op) { + auto add_const = std::dynamic_pointer_cast(add_op->input_value(1).get_node_shared_ptr()); + if (add_const) { + auto bias_size = shape_size(add_const->get_shape()); + // the add maybe normal add not bias, than we just go further + if (bias_size == filter_count) { + conv_bias = add_op; + last_op_in_sequence_for_replacement = add_op; + + auto bias_output_0 = add_op->get_output_target_inputs(0); + if (bias_output_0.size() == 1) { + auto bias_output_0_node = bias_output_0.begin()->get_node()->shared_from_this(); + max_pool = std::dynamic_pointer_cast(bias_output_0_node); + af = std::dynamic_pointer_cast(bias_output_0_node); + } + } + } + } + } + } else if (!trailing_transpose && conv_bias) { + // the NCHW order + auto bias_output_0 = conv_bias->get_output_target_inputs(0); + if (bias_output_0.size() != 1) + continue; + + auto bias_output_0_node = bias_output_0.begin()->get_node()->shared_from_this(); + trailing_transpose = std::dynamic_pointer_cast(bias_output_0_node); + last_op_in_sequence_for_replacement = trailing_transpose; + max_pool = std::dynamic_pointer_cast(bias_output_0_node); + af = std::dynamic_pointer_cast(bias_output_0_node); + } + + if (max_pool) { + auto maxpool_output_0 = max_pool->get_output_target_inputs(0); + if (maxpool_output_0.size() != 1) + continue; + auto maxpool_output_0_node = maxpool_output_0.begin()->get_node()->shared_from_this(); + // disable_nhwc_to_nchw option case + if (!trailing_transpose) { + trailing_transpose = std::dynamic_pointer_cast(maxpool_output_0_node); + last_op_in_sequence_for_replacement = trailing_transpose; + } else { + last_op_in_sequence_for_replacement = max_pool; + } + af = std::dynamic_pointer_cast(maxpool_output_0_node); + } + + //and finally activation function + if (af) { + auto af_output_0 = af->get_output_target_inputs(0); + if (af_output_0.size() != 1) + continue; + auto af_output_0_node = af_output_0.begin()->get_node()->shared_from_this(); + if (!trailing_transpose) { + trailing_transpose = std::dynamic_pointer_cast(af_output_0_node); + last_op_in_sequence_for_replacement = trailing_transpose; + } else { + last_op_in_sequence_for_replacement = af; + } + } + + if (!last_op_in_sequence_for_replacement || !trailing_transpose || !TransposeOrderMatches(trailing_transpose, { 0, 2, 3, 1 })) + continue; + + size_t filter_dilation_x = conv->get_dilations()[1]; + size_t filter_dilation_y = conv->get_dilations()[0]; + + size_t filter_stride_x = conv->get_strides()[1]; + size_t filter_stride_y = conv->get_strides()[0]; + + // we are assuming VALID conv + size_t pads_begin_x = 0; + size_t pads_begin_y = 0; + size_t pads_end_x = 0; + size_t pads_end_y = 0; + + size_t output_channel_count = filter_count; + size_t output_height = 0; + size_t output_width = 0; + + switch (padding_type) { + case ngraph::op::PadType::EXPLICIT: + pads_begin_y = conv->get_pads_begin()[0]; + pads_begin_x = conv->get_pads_begin()[1]; + pads_end_y = conv->get_pads_end()[0]; + pads_end_x = conv->get_pads_end()[1]; + break; + case ngraph::op::PadType::VALID: + // all padding equal to 0 - already set + break; + case ngraph::op::PadType::SAME_LOWER: + case ngraph::op::PadType::SAME_UPPER: + { + output_height = output_shape[2]; + output_width = output_shape[3]; + + size_t pad_begin_n_end_y = output_height * filter_stride_y + (filter_height)*filter_dilation_y - input_height - 1; + size_t pad_begin_n_end_x = output_width * filter_stride_x + (filter_width)*filter_dilation_x - input_width - 1; + pads_begin_y = (ngraph::op::PadType::SAME_LOWER == padding_type) ? (pad_begin_n_end_y >> 1) + (pad_begin_n_end_y & 1) : (pad_begin_n_end_y >> 1); + pads_end_y = (ngraph::op::PadType::SAME_UPPER == padding_type) ? (pad_begin_n_end_y >> 1) + (pad_begin_n_end_y & 1) : (pad_begin_n_end_y >> 1); + pads_begin_x = (ngraph::op::PadType::SAME_LOWER == padding_type) ? (pad_begin_n_end_x >> 1) + (pad_begin_n_end_x & 1) : (pad_begin_n_end_x >> 1); + pads_end_x = (ngraph::op::PadType::SAME_UPPER == padding_type) ? (pad_begin_n_end_x >> 1) + (pad_begin_n_end_x & 1) : (pad_begin_n_end_x >> 1); + + break; + } + default: + break; + } + output_height = (input_height + pads_begin_y + pads_end_y - ((filter_height - 1) * filter_dilation_y + 1)) / filter_stride_y + 1; + output_width = (input_width + pads_begin_x + pads_end_x - ((filter_width - 1) * filter_dilation_x + 1)) / filter_stride_x + 1; + + if (output_channel_count != output_shape[1] || + output_height != output_shape[2] || + output_width != output_shape[3]) { + continue; + } + + // No padding - there is no need to decompose such convolution + if (pads_begin_y == 0 && pads_end_y == 0 && pads_begin_x == 0 && pads_end_x == 0) + continue; + + // All checks applied - now we may start to do transformations + + size_t flat_left_padding = input_channel_count * pads_begin_x; + size_t flat_right_padding = input_channel_count * pads_end_x; + size_t flat_top_padding = input_channel_count * (pads_begin_x + input_width + pads_end_x) * pads_begin_y; + size_t flat_bottom_padding = input_channel_count * (pads_begin_x + input_width + pads_end_x) * pads_end_y; + size_t biggest_padding = std::max(std::max(flat_left_padding, flat_right_padding), std::max(flat_top_padding, flat_bottom_padding)); + size_t padded_row_size = input_channel_count * (pads_begin_x + input_width + pads_end_x); + + if (input_height > 1 && (flat_top_padding > 1 || flat_bottom_padding > 1)) { + biggest_padding = biggest_padding > padded_row_size ? biggest_padding : padded_row_size; + } + + auto flat_input = builder::opset1::reshape( + leading_transpose->input_value(0), + Shape{ (size_t)1, shape_size(leading_transpose->input_value(0).get_shape()) }); + // zero padding + auto const_holding_padding = std::make_shared(element::Type_t::f32, Shape{ 1, biggest_padding }, 0); + + // padding + // padding + // ... row ... + // ... row ... + // ........... + // ... row ... + // padding + // padding + + // Add top padding + OutputVector input_rows_to_concat; + + // padding + for (size_t p = 0; p < pads_begin_y; p++) { + if (padded_row_size == biggest_padding) { + input_rows_to_concat.push_back(const_holding_padding); + } else { + auto slice = FlatCrop(const_holding_padding, 0, padded_row_size); + ngraph::copy_runtime_info(conv, slice); + input_rows_to_concat.push_back(slice); + } + } + + // pad every row of input plan + for (size_t h = 0; h < input_height; h++) { + // left padding input right padding + // | | | + // +--------------+-----------+ + // | + // concat + + auto not_padded_row = input_height == 1 ? + flat_input : + FlatCrop(flat_input, h * input_width * input_channel_count, input_width * input_channel_count); + ngraph::copy_runtime_info(conv, not_padded_row); + if (flat_left_padding || flat_right_padding) { + OutputVector single_row_concat_inputs; + if (flat_left_padding) { + if (flat_left_padding == biggest_padding) { + single_row_concat_inputs.push_back(const_holding_padding); + } else { + auto slice = FlatCrop(const_holding_padding, 0, flat_left_padding); + ngraph::copy_runtime_info(conv, slice); + single_row_concat_inputs.push_back(slice); + } + } + single_row_concat_inputs.push_back(not_padded_row); + if (flat_right_padding) { + if (flat_right_padding == biggest_padding) { + single_row_concat_inputs.push_back(const_holding_padding); + } else { + auto slice = FlatCrop(const_holding_padding, 0, flat_right_padding); + ngraph::copy_runtime_info(conv, slice); + single_row_concat_inputs.push_back(slice); + } + } + auto padded_row_concat = std::make_shared(single_row_concat_inputs, 1); + ngraph::copy_runtime_info(conv, padded_row_concat); + input_rows_to_concat.push_back(padded_row_concat); + } else { + input_rows_to_concat.push_back(not_padded_row); + } + } + // Bottom padding + for (size_t p = 0; p < pads_end_y; p++) { + if (padded_row_size == biggest_padding) { + input_rows_to_concat.push_back(const_holding_padding); + } else { + auto slice = FlatCrop(const_holding_padding, 0, padded_row_size); + ngraph::copy_runtime_info(conv, slice); + input_rows_to_concat.push_back(slice); + } + } + auto padded_input_plane = std::make_shared(input_rows_to_concat, 1); + ngraph::copy_runtime_info(conv, padded_input_plane); + + auto padded_input_plane_reshaped = builder::opset1::reshape(padded_input_plane, + Shape{ 1, pads_begin_y + input_height + pads_end_y, pads_begin_x + input_width + pads_end_x, input_channel_count }); + //NHWC => NCHW + auto transposed2chw = builder::opset1::reorder_axes(padded_input_plane_reshaped, { 0, 3, 1, 2 }); + + auto conv_copy = std::make_shared( + transposed2chw->output(0), + conv->input_value(1), + conv->get_strides(), + CoordinateDiff{ 0, 0 }, + CoordinateDiff{ 0, 0 }, + conv->get_dilations(), + PadType::EXPLICIT); + + ngraph::replace_node(conv, conv_copy); + + is_graph_modfied = true; + } + return is_graph_modfied; +} diff --git a/inference-engine/src/transformations/src/transformations/op_conversions/convert_subtract.cpp b/inference-engine/src/transformations/src/transformations/op_conversions/convert_subtract.cpp index 7080688b09c409..b5507ee205f4a7 100644 --- a/inference-engine/src/transformations/src/transformations/op_conversions/convert_subtract.cpp +++ b/inference-engine/src/transformations/src/transformations/op_conversions/convert_subtract.cpp @@ -38,11 +38,14 @@ ngraph::pass::ConvertSubtract::ConvertSubtract() { const std::shared_ptr child = subChildren.begin()->get_node()->shared_from_this(); if (child != nullptr) { if (is_type(child) || + is_type(child) || is_type(child) || + is_type(child) || is_type(child) || - (is_type(child) && + (is_type(child) && (child->output(0).get_target_inputs().size() == 1ul) && - is_type(child->output(0).get_target_inputs().begin()->get_node()->shared_from_this()))) { + (is_type(child->output(0).get_target_inputs().begin()->get_node()->shared_from_this()) || + is_type(child->output(0).get_target_inputs().begin()->get_node()->shared_from_this())))) { const auto input1Type = sub->input(0).get_element_type(); const auto input2Type = sub->input(1).get_element_type(); if (((input1Type == element::u8) && (input2Type == element::u8)) || diff --git a/inference-engine/src/transformations/src/transformations/op_conversions/einsum_decomposition.cpp b/inference-engine/src/transformations/src/transformations/op_conversions/einsum_decomposition.cpp new file mode 100644 index 00000000000000..e715d76c0363e2 --- /dev/null +++ b/inference-engine/src/transformations/src/transformations/op_conversions/einsum_decomposition.cpp @@ -0,0 +1,683 @@ +// Copyright (C) 2021 Intel Corporation +// SPDX-License-Identifier: Apache-2.0 +// + +#include "transformations/op_conversions/einsum_decomposition.hpp" + +#include +#include +#include +#include +#include +#include + +#include "itt.hpp" + +namespace { +/// \brief Check if the EinsumDecomposition transformation is applicable to a given Einsum. +/// The transformation is applicable if input subscript does not have repeated labels and ellipsis. +/// +/// \param subscript A subscript to check its format +/// +/// \return true - applicable, false - not applicable +/// +bool is_subscript_applicable(const std::string& subscript) { + auto labels = ngraph::opset7::Einsum::extract_labels(subscript); + auto unique_labels = std::unordered_set(labels.begin(), labels.end()); + return std::find(labels.begin(), labels.end(), "...") == labels.end() && unique_labels.size() == labels.size(); +} + +/// \brief Compute einsum_path for a given Einsum node meaning that the (pseudo-)optimal +/// order of operands contraction in terms of performance and memory consumption +/// +/// \param einsum_node An input Einsum node +/// +/// \return a vector of pairs with input indices assuming that the intermediate result is +/// appended in the tail +/// +std::vector> compute_einsum_path(std::shared_ptr einsum_node) { + // TODO: implement algorithm for finding (pseudo-)optimal einsum_path + std::vector> einsum_path; + const size_t num_inputs = einsum_node->get_input_size(); + NGRAPH_CHECK(num_inputs > 0); + for (size_t input_ind = num_inputs - 1; input_ind > 0; --input_ind) { + einsum_path.push_back(std::make_pair(0, input_ind)); + } + return einsum_path; +} + +/// \brief Check if the dimension with a given label is reduced. The dimension is reduced +/// if the corresponding label is met in neither the output subscript nor the input subscripts +/// excluding ones specified by a vector excluded_indices +/// +/// \param input_subscripts The vector of the input subscripts +/// \param output_subscript The output subscript +/// \param label_to_check A label that corresponds to dimension to check +/// \param excluded_indices A vector of input subscript indices to be excluded +/// +/// \return true - a dimension to reduce, false - otherwise +/// +bool is_dimension_reduced(const std::vector& input_subscripts, const std::string& output_subscript, + const std::string label_to_check, const std::vector& excluded_indices) { + for (size_t input_ind = 0; input_ind < input_subscripts.size(); ++input_ind) { + const auto& input_subscript = input_subscripts[input_ind]; + // the subscript is checked only if its index is not in excluded indices list + bool check_subscript = (std::find(excluded_indices.begin(), excluded_indices.end(), input_ind) == excluded_indices.end()); + if (check_subscript && input_subscript.find(label_to_check) != std::string::npos) { + return false; + } + } + return output_subscript.find(label_to_check) == std::string::npos; +} + +/// \brief Checks if input vector represents a range [0; n] +/// +/// \param labels_inds Input vector to check +/// +/// \return true - the input vector is a range [0; n]; false - otherwise +/// +bool is_range_0_to_n(const std::vector &labels_inds) { + int64_t check_index = 0; + for (auto index : labels_inds) { + if (check_index != index) { + return false; + } + ++check_index; + } + return true; +} + +/// \brief Generate an input subscript that provides to group dimensions into the common, +/// separate and reduced dimensions after transpose +/// +/// \param input_subscripts A vector of the input subscripts +/// \param common_labels_inds A vector of indices of the common dimensions +/// \param separate_labels_inds A vector of indices of the separate dimensions +/// \param reduced_labels_inds A vector of indices of the reduced dimensions +/// \param is_separate_first A boolean flag. It is true if the separate dimensions +/// goes before the reduced dimensions +/// +/// \return An input subscript for grouping dimensions +/// +std::string generate_grouping_subscript(const std::string& input_subscript, const std::vector& common_labels_inds, + const std::vector& separate_labels_inds, const std::vector& reduced_labels_inds, + bool& is_separate_first) { + // transpose is not needed if common labels, reduced labels + // and separate labels indices go concurrently + std::vector labels_inds = common_labels_inds; + labels_inds.insert(labels_inds.end(), reduced_labels_inds.begin(), reduced_labels_inds.end()); + labels_inds.insert(labels_inds.end(), separate_labels_inds.begin(), separate_labels_inds.end()); + if (is_range_0_to_n(labels_inds)) { + is_separate_first = false; + return input_subscript; + } + + // transpose is not needed if common labels, separate labels + // and reduced labels indices go concurrently + labels_inds = common_labels_inds; + labels_inds.insert(labels_inds.end(), separate_labels_inds.begin(), separate_labels_inds.end()); + labels_inds.insert(labels_inds.end(), reduced_labels_inds.begin(), reduced_labels_inds.end()); + if (is_range_0_to_n(labels_inds)) { + is_separate_first = true; + return input_subscript; + } + + auto labels = ngraph::opset7::Einsum::extract_labels(input_subscript); + std::string required_subscript = ""; + for (auto index : labels_inds) { + required_subscript += labels[index]; + } + is_separate_first = true; + return required_subscript; +} + +/// \brief Update a vector of input nodes and subscripts by removing items for operands +/// with indices input_ind1 and input_ind2 and inserted new input node and the corresponsing +/// subscript in the tail +/// +/// \param input_nodes A vector of the input nodes to update +/// \param input_subscripts A vector of the input subscripts to update +/// \param input_ind1 An index of item to be removed +/// \param input_ind2 An index of item to be removed +/// \param new_node New input node to be inserted in the tail +/// \param new_subscript New input subscript to be inserted in the tail +/// +void update_operands(ngraph::OutputVector& input_nodes, std::vector& input_subscripts, size_t input_ind1, size_t input_ind2, + const ngraph::Output& new_node, const std::string& new_subscript) { + NGRAPH_CHECK(input_ind1 < input_ind2); + NGRAPH_CHECK(input_ind2 < input_nodes.size()); + NGRAPH_CHECK(input_ind2 < input_subscripts.size()); + input_nodes.erase(input_nodes.begin() + input_ind2); + input_nodes.erase(input_nodes.begin() + input_ind1); + input_nodes.push_back(new_node); + input_subscripts.erase(input_subscripts.begin() + input_ind2); + input_subscripts.erase(input_subscripts.begin() + input_ind1); + input_subscripts.push_back(new_subscript); +} + +/// \brief Return input node with computed sub-shape defined by a range [s_begin;s_end) +/// +/// \param data_shape Input node that contains some tensor shape +/// \param s_begin Start index of dimension +/// \param s_end End index of dimension +/// \param subgraph_nodes A vector of operation nodes where to add new ones +/// \param is_product A boolean flag that indicates if to compute a product of +/// dimension sizes in the computed sub-shape +/// +/// \return A vector of input nodes that can be empty (if s_end <= s_begin) +/// or contains just one input node with sub-shape or its product +/// +ngraph::OutputVector compute_sub_shape(const ngraph::Output& data_shape, size_t s_begin, size_t s_end, ngraph::NodeVector& subgraph_nodes, + bool is_product = false) { + int64_t begin = static_cast(s_begin); + int64_t end = static_cast(s_end); + ngraph::OutputVector sub_shape_vector; + if (end <= begin) { + return sub_shape_vector; + } + std::vector begin_mask(1, 0); + std::vector end_mask(1, 0); + auto begin_const = ngraph::opset7::Constant::create(ngraph::element::Type_t::i64, ngraph::Shape {1}, {begin}); + auto end_const = ngraph::opset7::Constant::create(ngraph::element::Type_t::i64, ngraph::Shape {1}, {end}); + auto stride_const = ngraph::opset7::Constant::create(ngraph::element::Type_t::i64, ngraph::Shape {1}, {1}); + auto sub_shape = std::make_shared(data_shape, begin_const, end_const, begin_mask, end_mask); + + if (is_product) { + auto reduce_axis_const = ngraph::opset7::Constant::create(ngraph::element::Type_t::i64, ngraph::Shape {1}, {0}); + auto separate_shape_prod = std::make_shared(sub_shape->output(0), reduce_axis_const, true); + sub_shape_vector.push_back(separate_shape_prod->output(0)); + subgraph_nodes.insert(subgraph_nodes.end(), {reduce_axis_const, separate_shape_prod}); + } else { + sub_shape_vector.push_back(sub_shape->output(0)); + } + subgraph_nodes.insert(subgraph_nodes.end(), {begin_const, end_const, stride_const, sub_shape}); + return sub_shape_vector; +} + +/// \brief Unsqueeze input node by given dimensions if a vector of unsqueezing dimensions +/// is not empty +/// +/// \param input_node Input node to unsqueeze +/// \param unsqueeze_axes A vector of dimensions to be unsqueezed +/// \param subgraph_nodes A vector of operation nodes that is included into a +/// sub-graph decomposing Einsum that is needed for copy_runtime_info +/// +/// \return Unsqueezed input node if a vector of unsqueezing dimensions is not empty, +/// otherwise, the original input node +/// +ngraph::Output unsqueeze_input(const ngraph::Output& input_node, const std::vector& unsqueeze_axes, + ngraph::NodeVector& subgraph_nodes) { + if (unsqueeze_axes.empty()) { + return input_node; + } + auto unsqueeze_axes_const = ngraph::opset7::Constant::create(ngraph::element::Type_t::i64, ngraph::Shape {unsqueeze_axes.size()}, unsqueeze_axes); + auto unsqueeze = std::make_shared(input_node, unsqueeze_axes_const); + subgraph_nodes.insert(subgraph_nodes.end(), {unsqueeze_axes_const, unsqueeze}); + return unsqueeze->output(0); +} + +/// \brief Reshape input node to the new shape specified by sub-shapes of the common, +/// separate and reduced dimensions so that the reshaped input has a format acceptable by MatMul +/// +/// \param input_node Input node to reshape +/// \param common_sub_shape A sub-shape corresponding to the common dimensions +/// \param separate_sub_shape A sub-shape corresponding to the separate dimensions +/// \param reduced_sub_shape_prod A product of the separate dimensions sizes +/// \param is_separate_first true - the separate dimensions placed before reduced +/// dimensions, otherwise, it is after them +/// \param subgraph_nodes A vector of operation nodes that is included into +/// a sub-graph decomposing Einsum that is needed for copy_runtime_info +/// +/// \return Reshaped input node +/// +ngraph::Output reshape_input_for_matmul(const ngraph::Output& input_node, const ngraph::OutputVector& common_sub_shape, + const ngraph::OutputVector& separate_sub_shape, const ngraph::OutputVector& reduced_sub_shape_prod, + bool is_separate_first, ngraph::NodeVector& subgraph_nodes) { + ngraph::OutputVector new_shape_parts; + new_shape_parts.insert(new_shape_parts.end(), common_sub_shape.begin(), common_sub_shape.end()); + + // compute a product of a sub-shape for separate labels + ngraph::OutputVector separate_parts; + if (common_sub_shape.size() > 0 && separate_sub_shape.size() == 0) { + // in this case new dimension corresponding to separate labels must be added + // since MatMul operation is not possible to do without separate dimensions if the + // common dimension presents + auto separate_new_dim = ngraph::opset7::Constant::create(ngraph::element::Type_t::i64, ngraph::Shape {1}, {1}); + separate_parts.push_back(separate_new_dim); + subgraph_nodes.insert(subgraph_nodes.end(), {separate_new_dim}); + } else if (separate_sub_shape.size() > 0) { + // in this case compute a product of separate dimension sizes since they must be + // presented with just one dimension for MatMul + auto reduce_axis_const = ngraph::opset7::Constant::create(ngraph::element::Type_t::i64, ngraph::Shape {1}, {0}); + auto separate_shape_prod = std::make_shared(separate_sub_shape[0], reduce_axis_const, true); + separate_parts.push_back(separate_shape_prod->output(0)); + subgraph_nodes.insert(subgraph_nodes.end(), {reduce_axis_const, separate_shape_prod}); + } + + // form a new shape for input so that collapsed dimensions corresponding + // to the common, separate and reduced dimensions are placed in the correct order + if (is_separate_first) { + new_shape_parts.insert(new_shape_parts.end(), separate_parts.begin(), separate_parts.end()); + new_shape_parts.insert(new_shape_parts.end(), reduced_sub_shape_prod.begin(), reduced_sub_shape_prod.end()); + } else { + new_shape_parts.insert(new_shape_parts.end(), reduced_sub_shape_prod.begin(), reduced_sub_shape_prod.end()); + new_shape_parts.insert(new_shape_parts.end(), separate_parts.begin(), separate_parts.end()); + } + + // in case of scalar reshape is not needed + if (new_shape_parts.size() == 0) { + return input_node; + } + + auto new_shape_op = std::make_shared(new_shape_parts, 0); + + // if new shape is possible to compute on the shape infer stage, insert Constant node immediatelly + // in order to prevent repeated computing during constant-folding pass + std::shared_ptr reshaped_input_op; + if (auto new_shape_const = ngraph::get_constant_from_source(new_shape_op)) { + reshaped_input_op = std::make_shared(input_node, new_shape_const, false); + subgraph_nodes.insert(subgraph_nodes.end(), {new_shape_const}); + } else { + reshaped_input_op = std::make_shared(input_node, new_shape_op->output(0), false); + subgraph_nodes.insert(subgraph_nodes.end(), {new_shape_op}); + } + + subgraph_nodes.insert(subgraph_nodes.end(), {reshaped_input_op}); + return reshaped_input_op->output(0); +} + +/// \brief Transpose one of the Einsum inputs to layout specified through the required +/// subscript +/// +/// \param input_nodes A vector of input nodes to Einsum +/// \param input_subscripts A vector of corresponding subscripts for input nodes +/// \param required_subscript The required subscript that defines layout to which the +/// input is to transpose +/// \param input_ind An index of the input node to be transposed +/// \param subgraph_nodes A vector of operation nodes that is included into +/// a sub-graph decomposing Einsum that is needed for copy_runtime_info +/// +void transpose_input(ngraph::OutputVector& input_nodes, std::vector& input_subscripts, const std::string& required_subscript, size_t input_ind, + ngraph::NodeVector& subgraph_nodes) { + // perform sanity check for arguments + auto num_inputs = input_nodes.size(); + NGRAPH_CHECK(num_inputs == input_subscripts.size(), "Each input must have own subscript."); + NGRAPH_CHECK(input_ind < num_inputs, "Input index is out of range."); + + // generate permutation vector by searching for bijection between input_subscripts + // and required_subscript + std::vector permutation; + const auto& input_subscript = input_subscripts[input_ind]; + + // transpose is not needed since the input subscript is not going to be changed + if (required_subscript == input_subscript) { + return; + } + + // find permutation that establishes bijection between the input subscript + // and the required one + auto labels = ngraph::opset7::Einsum::extract_labels(input_subscript); + auto required_labels = ngraph::opset7::Einsum::extract_labels(required_subscript); + NGRAPH_CHECK(labels.size() == required_labels.size()); + for (const auto& required_label : required_labels) { + auto it = std::find(labels.begin(), labels.end(), required_label); + NGRAPH_CHECK(it != labels.end()); + int64_t found_index = static_cast(it - labels.begin()); + permutation.push_back(found_index); + } + + // create a sub-graph for transposing into the required layout + const auto& input_node = input_nodes[input_ind]; + auto permutation_const = ngraph::opset7::Constant::create(ngraph::element::Type_t::i64, ngraph::Shape {permutation.size()}, permutation); + auto transpose = std::make_shared(input_node, permutation_const); + + // update a vector of inputs and input subscripts + input_nodes[input_ind] = transpose->output(0); + input_subscripts[input_ind] = required_subscript; + + // update a vector of nodes for copy_runtime_info + subgraph_nodes.insert(subgraph_nodes.end(), {permutation_const, transpose}); +} + +/// \brief Find labels (in a given input subscript) that are met once in the equation +/// and reduce dimensions corresponding to such labels +/// +/// \param einsum_decompose_ptr A pointer to Einsum decomposing pass +/// \param input_nodes A vector of input nodes to Einsum operation +/// \param input_subscripts A vector of corresponding subscripts for the input nodes +/// \param output_subscript The output subscript +/// \param input_ind An index of the input node for which it will check +/// dimensions to be reduced +/// \param subgraph_nodes A vector of operation nodes that is included into +/// a sub-graph decomposing Einsum that is needed for copy_runtime_info +/// +void reduce_input(ngraph::pass::EinsumDecomposition *einsum_decompose_ptr, + ngraph::OutputVector& input_nodes, std::vector& input_subscripts, + const std::string& output_subscript, size_t input_ind, ngraph::NodeVector& subgraph_nodes) { + // perform sanity check for arguments + auto num_inputs = input_nodes.size(); + NGRAPH_CHECK(num_inputs == input_subscripts.size(), "Each input must have own subscript."); + NGRAPH_CHECK(input_ind < num_inputs, "Input index is out of range."); + + std::vector reduced_axes; + auto labels = ngraph::opset7::Einsum::extract_labels(input_subscripts[input_ind]); + std::string new_input_subscript = ""; + for (size_t dim_ind = 0; dim_ind < labels.size(); ++dim_ind) { + const auto& label = labels[dim_ind]; + + // check if the current label is met in the other input subscripts + // or the output subscript + bool is_dim_reduced = is_dimension_reduced(input_subscripts, output_subscript, label, {input_ind}); + + // if label is not met, dimension corresponding to the label is to reduce + if (is_dim_reduced) { + reduced_axes.push_back(dim_ind); + } else { + new_input_subscript += label; + } + } + + if (reduced_axes.size() == 0) { + // there is no axis to reduce + return; + } + + // reduce by summed up elements along dimension for which label is met just once + const auto& input_node = input_nodes[input_ind]; + auto axes_const = ngraph::opset7::Constant::create(ngraph::element::Type_t::i64, ngraph::Shape {reduced_axes.size()}, reduced_axes); + auto reduce_sum = einsum_decompose_ptr->register_new_node(input_node, axes_const, false); + + // update a vector of inputs and input subscripts + input_nodes[input_ind] = reduce_sum->output(0); + input_subscripts[input_ind] = new_input_subscript; + + // update a vector of nodes for copy_runtime_info + subgraph_nodes.insert(subgraph_nodes.end(), {axes_const, reduce_sum}); +} + +/// \brief Contract two inputs of Einsum operation according to equation. +/// The result of the contraction is appended into input_nodes along with its subscript. +/// The input nodes for these two operands are removed from input_nodes along with their input +/// subscripts +/// +/// \param einsum_decompose_ptr A pointer to Einsum decomposing pass +/// \param input_nodes A vector of input nodes to Einsum operation +/// \param input_subscripts A vector of corresponding subscripts for the input nodes +/// \param output_subscript The output subscript +/// \param input_ind1 An index of the first operand +/// \param input_ind2 An index of the second operand +/// \param subgraph_nodes A vector of operation nodes that is included into a +/// sub-graph decomposing Einsum that is needed for copy_runtime_info +/// +void contract_two_inputs(ngraph::pass::EinsumDecomposition* einsum_decompose_ptr, + ngraph::OutputVector& input_nodes, std::vector& input_subscripts, + const std::string& output_subscript, size_t input_ind1, + size_t input_ind2, ngraph::NodeVector& subgraph_nodes) { + // assume that input_ind1 < input_ind2 without loss of generality, otherwise, just swap them + if (input_ind2 < input_ind1) { + std::swap(input_ind1, input_ind2); + } + + // perform sanity check for arguments + auto num_inputs = input_nodes.size(); + NGRAPH_CHECK(num_inputs == input_subscripts.size(), "Each input must have own subscript."); + NGRAPH_CHECK(input_ind2 < num_inputs && input_ind1 != input_ind2, "Incorrect input index is specified."); + + const auto& input_node1 = input_nodes[input_ind1]; + const auto& input_node2 = input_nodes[input_ind2]; + + // reduce dimensions for input operands if possible + reduce_input(einsum_decompose_ptr, input_nodes, input_subscripts, output_subscript, input_ind1, subgraph_nodes); + reduce_input(einsum_decompose_ptr, input_nodes, input_subscripts, output_subscript, input_ind2, subgraph_nodes); + + // step 0. split dimensions of both operands into three groups: + // 1. dimension indices with the same labels (in both subscripts) that are NOT reduced - + // common labels (dimensions) + // 2. dimension indices with labels that are met only in one of two subscripts - separate + // labels (dimensions) + // 3. dimension indices with the same labels (in both subscripts) that are reduced - reduced + // labels (dimensions) NOTE: dimension is reduced iff. the corresponding label are met in + // neither the output subscript nor the input subscripts for other Einsum inputs excluding + // two given inputs + auto& input_subscript1 = input_subscripts[input_ind1]; + auto labels1 = ngraph::opset7::Einsum::extract_labels(input_subscript1); + auto& input_subscript2 = input_subscripts[input_ind2]; + auto labels2 = ngraph::opset7::Einsum::extract_labels(input_subscript2); + std::string common_part = ""; + std::string separate_part1 = ""; + std::string separate_part2 = ""; + std::vector common_labels_inds1, common_labels_inds2; + std::vector separate_labels_inds1, separate_labels_inds2; + std::vector reduced_labels_inds1, reduced_labels_inds2; + for (size_t label_ind = 0; label_ind < labels1.size(); ++label_ind) { + const auto& label = labels1[label_ind]; + auto iter = std::find(labels2.begin(), labels2.end(), label); + if (iter != labels2.end()) { + bool is_dim_reduced = is_dimension_reduced(input_subscripts, output_subscript, label, {input_ind1, input_ind2}); + common_part += label; + if (is_dim_reduced) { + reduced_labels_inds1.push_back(static_cast(label_ind)); + reduced_labels_inds2.push_back(static_cast(iter - labels2.begin())); + } else { + common_labels_inds1.push_back(static_cast(label_ind)); + common_labels_inds2.push_back(static_cast(iter - labels2.begin())); + } + } else { + separate_part1 += label; + separate_labels_inds1.push_back(static_cast(label_ind)); + } + } + for (size_t label_ind = 0; label_ind < labels2.size(); ++label_ind) { + const auto& label = labels2[label_ind]; + auto iter = std::find(labels1.begin(), labels1.end(), label); + if (iter == labels1.end()) { + separate_part2 += label; + separate_labels_inds2.push_back(static_cast(label_ind)); + } + } + + // if there is no common dimension to reduce, apply eltwise multiplication + if (reduced_labels_inds1.empty()) { + std::string convenient_subscript = common_part + separate_part2; + std::string resultant_subscript = input_subscript1 + separate_part2; + + // transpose the second operand in order to get the convenient layout + // for further unsqueezing + transpose_input(input_nodes, input_subscripts, convenient_subscript, input_ind2, subgraph_nodes); + + // unsqueeze the first operand with new dimensions in the tail + // and the number of them is equal to the number of separate labels in the second + // subscript + int64_t unsqueeze_dim = labels1.size(); + std::vector unsqueeze_axis1; + for (size_t label_ind = 0; label_ind < separate_labels_inds2.size(); ++label_ind) { + unsqueeze_axis1.push_back(unsqueeze_dim++); + } + const auto& unsqueeze_axis2 = separate_labels_inds1; + + // unsqueeze input operands for elementwise-multiplication with broadcasting + auto unsqueeze_output1 = unsqueeze_input(input_node1, unsqueeze_axis1, subgraph_nodes); + auto unsqueeze_output2 = unsqueeze_input(input_node2, unsqueeze_axis2, subgraph_nodes); + + // multiply both operands with broadcasting + auto mul = std::make_shared(unsqueeze_output1, unsqueeze_output2, ngraph::op::AutoBroadcastSpec::NUMPY); + + // update input operand and input subscript for Einsum operation + update_operands(input_nodes, input_subscripts, input_ind1, input_ind2, mul->output(0), resultant_subscript); + + // update a vector of nodes for copy_runtime_info + subgraph_nodes.insert(subgraph_nodes.end(), {mul}); + return; + } + + // in this case a set of reduced labels is not empty and it can apply MatMul operation + // step 1. transpose both operands so that common labels, separated and reduced labels + // are grouped for both operands + bool is_separate_first1 = false; + auto int_subscript1 = generate_grouping_subscript(input_subscript1, common_labels_inds1, separate_labels_inds1, + reduced_labels_inds1, is_separate_first1); + transpose_input(input_nodes, input_subscripts, int_subscript1, input_ind1, subgraph_nodes); + bool is_separate_first2 = false; + auto int_subscript2 = generate_grouping_subscript(input_subscript2, common_labels_inds2, separate_labels_inds2, + reduced_labels_inds2, is_separate_first2); + transpose_input(input_nodes, input_subscripts, int_subscript2, input_ind2, subgraph_nodes); + + // step 2. reshape both operands so that separate labels and reduced labels are represented + // with just one dimension this is needed by MatMul operation requirement to operands + // format. For example, the shape must be in a format [B1, ..., Bm, X1, Y] or [B1, ..., Bm, + // Y, X2], where B1, ..., Bm are common dimensions, X1 and X2 are collapsed dimensions + // for separate labels and Y is collapsed dimension for reduced labels + // this step is not needed for the operand if it satisfies to one of the requirements: + // 1. there is just one separate dimension and just one reduced dimension + // 2. there is no separate dimension, no common dimensions, and just one reduced dimension + bool no_reshape_for_matmul1 = (reduced_labels_inds1.size() == 1 && separate_labels_inds1.size() == 1) || + (reduced_labels_inds1.size() == 1 && common_labels_inds1.size() == 0 + && separate_labels_inds1.size() == 0); + bool no_reshape_for_matmul2 = (reduced_labels_inds2.size() == 1 && separate_labels_inds2.size() == 1) || + (reduced_labels_inds2.size() == 1 && common_labels_inds2.size() == 0 + && separate_labels_inds2.size() == 0); + // reshape back after MatMul is not needed if one of two requrements satisfies for both operands: + // 1. there is just one separate dimension + // 2. there is no separate dimension and no common dimensions present. + // If there is no separate dimension and common dimensions present, reshape is needed + // because auxiliary separate dimension has been added by Unsqueeze operation + // in the purpose for MatMul + bool no_reshape_back1 = (separate_labels_inds1.size() == 1) || + (common_labels_inds1.size() == 0 && separate_labels_inds1.size() == 0); + bool no_reshape_back2 = (separate_labels_inds2.size() == 1) || + (common_labels_inds2.size() == 0 && separate_labels_inds2.size() == 0); + bool no_reshape_after_matmul = no_reshape_back1 && no_reshape_back2; + + auto matmul_operand1 = input_node1; + auto matmul_operand2 = input_node2; + int64_t common_dims_begin = 0; + int64_t common_dims_end = common_labels_inds1.size(); + ngraph::OutputVector common_sub_shape, separate1_sub_shape, separate2_sub_shape; + if (no_reshape_for_matmul1 == false || no_reshape_for_matmul2 == false) { + auto data_shape1 = std::make_shared(input_node1); + common_sub_shape = compute_sub_shape(data_shape1, common_dims_begin, common_dims_end, subgraph_nodes); + int64_t reduced_dims_begin = (is_separate_first1 ? common_labels_inds1.size() + separate_labels_inds1.size() : common_labels_inds1.size()); + int64_t reduced_dims_end = reduced_dims_begin + reduced_labels_inds1.size(); + auto reduced_sub_shape_prod = compute_sub_shape(data_shape1, reduced_dims_begin, reduced_dims_end, subgraph_nodes, true); + + if (no_reshape_for_matmul1 == false || no_reshape_after_matmul == false) { + int64_t separate1_dims_begin = (is_separate_first1 ? common_labels_inds1.size() : common_labels_inds1.size() + reduced_labels_inds1.size()); + int64_t separate1_dims_end = separate1_dims_begin + separate_labels_inds1.size(); + separate1_sub_shape = compute_sub_shape(data_shape1, separate1_dims_begin, separate1_dims_end, subgraph_nodes); + matmul_operand1 = reshape_input_for_matmul(input_node1, common_sub_shape, separate1_sub_shape, + reduced_sub_shape_prod, is_separate_first1, subgraph_nodes); + } + + if (no_reshape_for_matmul2 == false || no_reshape_after_matmul == false) { + auto data_shape2 = std::make_shared(input_node2); + int64_t separate2_dims_begin = (is_separate_first2 ? common_labels_inds2.size() : common_labels_inds2.size() + reduced_labels_inds2.size()); + int64_t separate2_dims_end = separate2_dims_begin + separate_labels_inds2.size(); + separate2_sub_shape = compute_sub_shape(data_shape2, separate2_dims_begin, separate2_dims_end, subgraph_nodes); + matmul_operand2 = reshape_input_for_matmul(input_node2, common_sub_shape, separate2_sub_shape, + reduced_sub_shape_prod, is_separate_first2, subgraph_nodes); + subgraph_nodes.insert(subgraph_nodes.end(), {data_shape2}); + } + subgraph_nodes.insert(subgraph_nodes.end(), {data_shape1}); + } + + // step 3. apply MatMul operation for formatted inputs + bool transpose_a = (is_separate_first1 ? false : true); + bool transpose_b = (is_separate_first2 ? true : false); + auto matmul = std::make_shared(matmul_operand1, matmul_operand2, transpose_a, transpose_b); + + // step 4. reshape back by unrolling dimensions corresponding to separate labels if needed + // now dimensions corresponding to reduced labels are reduced by the MatMul operation + std::string resultant_subscript = input_subscript1.substr(common_dims_begin, common_dims_end) + separate_part1 + separate_part2; + if (no_reshape_after_matmul) { + // this is a case when Reshape is not needed after MatMul operation + // since there are no collapsed (or auxiliary added) separated dimensions + update_operands(input_nodes, input_subscripts, input_ind1, input_ind2, matmul->output(0), resultant_subscript); + } else { + ngraph::OutputVector new_shape; + new_shape.insert(new_shape.end(), common_sub_shape.begin(), common_sub_shape.end()); + new_shape.insert(new_shape.end(), separate1_sub_shape.begin(), separate1_sub_shape.end()); + new_shape.insert(new_shape.end(), separate2_sub_shape.begin(), separate2_sub_shape.end()); + auto result_shape_op = std::make_shared(new_shape, 0); + + // if new shape is possible to compute on the shape infer stage, insert Constant node immediatelly + // in order to prevent repeated computing during constant-folding pass + std::shared_ptr result_op; + if (auto new_shape_const = ngraph::get_constant_from_source(result_shape_op)) { + result_op = std::make_shared(matmul->output(0), new_shape_const, false); + subgraph_nodes.insert(subgraph_nodes.end(), {new_shape_const}); + } else { + result_op = std::make_shared(matmul->output(0), result_shape_op->output(0), false); + subgraph_nodes.insert(subgraph_nodes.end(), {result_shape_op}); + } + + // update input operand and input subscript for Einsum operation + update_operands(input_nodes, input_subscripts, input_ind1, input_ind2, result_op->output(0), resultant_subscript); + subgraph_nodes.insert(subgraph_nodes.end(), {result_op}); + } + + // update a vector of nodes for copy_runtime_info + subgraph_nodes.insert(subgraph_nodes.end(), {matmul}); +} +} // namespace + +NGRAPH_RTTI_DEFINITION(ngraph::pass::EinsumDecomposition, "EinsumDecomposition", 0); + +ngraph::pass::EinsumDecomposition::EinsumDecomposition() { + // NOTE: The transformation is applicable if Einsum equation does not contain ellipsis label + // and does not contain subscripts with repeated labels. + // For example, the transformation is applicable to Einsum with equation="abc,bd->ad" + // but not applicable to a case with equation="aabc,bd->ad" due to repeated labels + // in the first input subscript. + MATCHER_SCOPE(EinsumDecomposition); + auto einsum = ngraph::pattern::wrap_type(); + ngraph::matcher_pass_callback callback = [this](ngraph::pattern::Matcher& m) { + auto einsum_node = std::dynamic_pointer_cast(m.get_match_root()); + if (!einsum_node) { + return false; + } + + auto equation = einsum_node->get_equation(); + std::vector input_subscripts; + std::string output_subscript; + ngraph::opset7::Einsum::parse_equation(equation, input_subscripts, output_subscript); + + // check that the transformation is applicable + if (std::any_of(input_subscripts.cbegin(), input_subscripts.cend(), [](const std::string& subscript) { + return is_subscript_applicable(subscript) == false; + })) { + return false; + } + + // create a list of input nodes with preserving their order + // and a vector of sub-graph nodes for copy_runtime_info + ngraph::OutputVector input_nodes = einsum_node->input_values(); + ngraph::NodeVector subgraph_nodes; + + // compute einsum path that is used to contract a pair of operands + // in more optimal order + auto einsum_path = compute_einsum_path(einsum_node); + + // contract inputs by Einsum until just one is remained + for (auto const& inds_pair : einsum_path) { + contract_two_inputs(this, input_nodes, input_subscripts, output_subscript, inds_pair.first, inds_pair.second, subgraph_nodes); + } + + // reduce dimensions for the remained input node + NGRAPH_CHECK(input_nodes.size() == 1); + reduce_input(this, input_nodes, input_subscripts, output_subscript, 0, subgraph_nodes); + + // transpose dimensions to layout required by the output subscript + transpose_input(input_nodes, input_subscripts, output_subscript, 0, subgraph_nodes); + + // replace the original Einsum node with the last node from decomposing sub-graph + // preserve the original node name + auto last_node = input_nodes[0].get_node_shared_ptr(); + last_node->set_friendly_name(einsum_node->get_friendly_name()); + ngraph::copy_runtime_info(einsum_node, subgraph_nodes); + ngraph::replace_node(einsum_node, last_node); + return true; + }; + + auto m = std::make_shared(einsum, matcher_name); + register_matcher(m, callback); +} diff --git a/inference-engine/src/vpu/graph_transformer/CMakeLists.txt b/inference-engine/src/vpu/graph_transformer/CMakeLists.txt index 872f0bef2e3b19..bc73ab5b155696 100644 --- a/inference-engine/src/vpu/graph_transformer/CMakeLists.txt +++ b/inference-engine/src/vpu/graph_transformer/CMakeLists.txt @@ -12,9 +12,9 @@ function(add_graph_transformer_target TARGET_NAME STATIC_IE) # To avoid further TBB find_package action in next call of this function. Some version of TBB # has an issue with cmake config which lead to fail in case of multiple call of find_package # from one cmake script file. - set("TBB_FOUND" ${TBB_FOUND} PARENT_SCOPE) - set("TBB_IMPORTED_TARGETS" ${TBB_IMPORTED_TARGETS} PARENT_SCOPE) - set("TBB_VERSION" ${TBB_VERSION} PARENT_SCOPE) + set(TBB_FOUND ${TBB_FOUND} PARENT_SCOPE) + set(TBB_IMPORTED_TARGETS ${TBB_IMPORTED_TARGETS} PARENT_SCOPE) + set(TBB_VERSION ${TBB_VERSION} PARENT_SCOPE) if(CMAKE_CXX_COMPILER_ID STREQUAL "GNU") # TODO: enable some day and fix all warnings diff --git a/inference-engine/tests/functional/inference_engine/CMakeLists.txt b/inference-engine/tests/functional/inference_engine/CMakeLists.txt index e6eefc809afa89..2c4d97b87f212e 100644 --- a/inference-engine/tests/functional/inference_engine/CMakeLists.txt +++ b/inference-engine/tests/functional/inference_engine/CMakeLists.txt @@ -169,9 +169,9 @@ function(ie_headers_compilation_with_custom_flags) # To avoid further TBB find_package action in next call of this function. Some version of TBB # has an issue with cmake config which lead to fail in case of multiple call of find_package # from one cmake script file. - set("TBB_FOUND" ${TBB_FOUND} PARENT_SCOPE) - set("TBB_IMPORTED_TARGETS" ${TBB_IMPORTED_TARGETS} PARENT_SCOPE) - set("TBB_VERSION" ${TBB_VERSION} PARENT_SCOPE) + set(TBB_FOUND ${TBB_FOUND} PARENT_SCOPE) + set(TBB_IMPORTED_TARGETS ${TBB_IMPORTED_TARGETS} PARENT_SCOPE) + set(TBB_VERSION ${TBB_VERSION} PARENT_SCOPE) set_target_properties(${target_name} PROPERTIES CXX_STANDARD ${IE_TEST_CXX_STANDARD} diff --git a/inference-engine/tests/functional/inference_engine/lp_transformations/convolution_backprop_data_transformation.cpp b/inference-engine/tests/functional/inference_engine/lp_transformations/convolution_backprop_data_transformation.cpp new file mode 100644 index 00000000000000..283adb5bf45a3d --- /dev/null +++ b/inference-engine/tests/functional/inference_engine/lp_transformations/convolution_backprop_data_transformation.cpp @@ -0,0 +1,334 @@ +// Copyright (C) 2021 Intel Corporation +// SPDX-License-Identifier: Apache-2.0 +// + +#include "layer_transformation.hpp" + +#include +#include +#include + +#include + +#include +#include +#include +#include + +#include "common_test_utils/ngraph_test_utils.hpp" +#include "simple_low_precision_transformer.hpp" +#include "lpt_ngraph_functions/convolution_backprop_data_function.hpp" + +using namespace testing; +using namespace ngraph; +using namespace ngraph::pass; + +class ConvolutionBackpropDataTransformationTestValues { +public: + class Actual { + public: + ngraph::element::Type precisionBeforeDequantization; + ngraph::builder::subgraph::DequantizationOperations dequantizationOnActivations; + builder::subgraph::FakeQuantizeOnWeights fakeQuantizeOnWeights; + builder::subgraph::DequantizationOperations dequantizationOnWeights; + std::shared_ptr weights; + + Actual() = default; + Actual( + const ngraph::element::Type& precisionBeforeDequantization, + const ngraph::builder::subgraph::DequantizationOperations& dequantizationOnActivations, + const builder::subgraph::FakeQuantizeOnWeights& fakeQuantizeOnWeights, + const std::shared_ptr& weights) : + precisionBeforeDequantization(precisionBeforeDequantization), + dequantizationOnActivations(dequantizationOnActivations), + fakeQuantizeOnWeights(fakeQuantizeOnWeights), + weights(weights) {} + Actual( + const ngraph::element::Type& precisionBeforeDequantization, + const ngraph::builder::subgraph::DequantizationOperations& dequantizationOnActivations, + const builder::subgraph::DequantizationOperations& dequantizationOnWeights, + const std::shared_ptr& weights) : + precisionBeforeDequantization(precisionBeforeDequantization), + dequantizationOnActivations(dequantizationOnActivations), + dequantizationOnWeights(dequantizationOnWeights), + weights(weights) {} + }; + + class Expected { + public: + ngraph::element::Type precisionBeforeDequantization; + ngraph::builder::subgraph::DequantizationOperations dequantizationOnActivations; + builder::subgraph::DequantizationOperations dequantizationOnWeights; + ngraph::builder::subgraph::DequantizationOperations dequantizationAfter; + std::shared_ptr weights; + bool transformed; + }; + + ngraph::pass::low_precision::LayerTransformation::Params params; + Actual actual; + Expected expected; +}; + +typedef std::tuple< + element::Type, + ngraph::Shape, + ConvolutionBackpropDataTransformationTestValues> ConvolutionBackpropDataTransformationParams; + +class ConvolutionBackpropDataTransformation : public LayerTransformation, public testing::WithParamInterface { +public: + void SetUp() override { + const auto netPrecision = std::get<0>(GetParam()); + const auto inputShape = std::get<1>(GetParam()); + auto outputShape = inputShape; + outputShape[1] /= 4; + outputShape[2] *= 2; + outputShape[3] *= 2; + auto testValues = std::get<2>(GetParam()); + + std::shared_ptr actualWeights = pass::low_precision::fold( + testValues.actual.weights, + opset1::Constant::create( + element::i64, + Shape{inputShape.size()}, + Shape{inputShape[1], outputShape[1], 1, 1})); + if (!testValues.actual.fakeQuantizeOnWeights.empty()) { + actualWeights = ngraph::builder::subgraph::ConvolutionBackpropDataFunction::getWeights( + outputShape, + netPrecision, + testValues.actual.fakeQuantizeOnWeights, + as_type_ptr(actualWeights)); + } else { + actualWeights = ngraph::builder::subgraph::ConvolutionBackpropDataFunction::getWeights( + outputShape, + netPrecision, + testValues.actual.dequantizationOnWeights, + as_type_ptr(actualWeights)); + } + + actualFunction = ngraph::builder::subgraph::ConvolutionBackpropDataFunction::getOriginal( + testValues.actual.precisionBeforeDequantization, + netPrecision, + inputShape, + outputShape, + testValues.actual.dequantizationOnActivations, + actualWeights); + + SimpleLowPrecisionTransformer transform; + transform.add(testValues.params); + transform.transform(actualFunction); + std::shared_ptr refWeights = pass::low_precision::fold( + testValues.expected.weights, + opset1::Constant::create( + element::i64, + Shape{inputShape.size()}, + Shape{inputShape[1], outputShape[1], 1, 1})); + + if (!testValues.expected.transformed) { + refWeights = ngraph::builder::subgraph::ConvolutionBackpropDataFunction::getWeights( + outputShape, + netPrecision, + testValues.actual.fakeQuantizeOnWeights, + as_type_ptr(refWeights)); + } else { + refWeights = ngraph::builder::subgraph::ConvolutionBackpropDataFunction::getWeights( + outputShape, + netPrecision, + testValues.expected.dequantizationOnWeights, + as_type_ptr(refWeights)); + } + + referenceFunction = ngraph::builder::subgraph::ConvolutionBackpropDataFunction::getReference( + testValues.expected.precisionBeforeDequantization, + netPrecision, + inputShape, + outputShape, + testValues.expected.dequantizationOnActivations, + refWeights, + testValues.expected.dequantizationAfter); + } + + static std::string getTestCaseName(testing::TestParamInfo obj) { + const auto netPrecision = std::get<0>(obj.param); + auto inputShape = std::get<1>(obj.param); + ConvolutionBackpropDataTransformationTestValues testValues = std::get<2>(obj.param); + + std::ostringstream result; + result << toString(testValues.params) << "_" << + netPrecision << "_" << + inputShape << "_" << + testValues.actual.precisionBeforeDequantization << "_" << + testValues.actual.dequantizationOnActivations << "_" << + testValues.actual.dequantizationOnWeights << "_" << + testValues.actual.fakeQuantizeOnWeights << "_" <<"_weights_" << + testValues.actual.weights->get_element_type() << "_" << "{ " << + testValues.actual.weights->cast_vector()[0] << " }_"; + return result.str(); + } +}; + +TEST_P(ConvolutionBackpropDataTransformation, CompareFunctions) { + actualFunction->validate_nodes_and_infer_types(); + auto res = compare_functions(referenceFunction, actualFunction, true, true, true); + ASSERT_TRUE(res.first) << res.second; +} + +const std::vector netPrecisions = { + element::f32, + element::f16 +}; + +const std::vector shapes = { + ngraph::Shape({ 1, 8, 16, 16 }) +}; + +const std::vector testValues = { + // with zero point + { + LayerTransformation::createParamsU8I8(), + // ActualValues + { + ngraph::element::u8, + {{ngraph::element::f32}, { 128.f }, { 0.02f }}, + { 255ul, Shape({ 1, 1, 1, 1 }), { 0.f }, { 254.f }, { -1.27f }, { 1.27f } }, + op::Constant::create(ngraph::element::i8, ngraph::Shape{}, std::vector{ 2.f }) + }, + // ExpectedValues + { + ngraph::element::u8, + {{}, { { 128.f }, ngraph::element::f32, {}, false }, {}}, + {}, + {{}, {}, {{ 0.0002f }, ngraph::element::f32, { 1, 1, 1, 1 }}}, + op::Constant::create(ngraph::element::i8, ngraph::Shape{}, std::vector{ -125.f }), + true + } + }, + // updatePrecisions = false + { + LayerTransformation::createParamsU8I8().setUpdatePrecisions(false), + // ActualValues + { + ngraph::element::u8, + {{ngraph::element::f32}, { 128.f }, { 0.02f }}, + { 255ul, Shape({ 1, 1, 1, 1 }), { 0.f }, { 254.f }, { -1.27f }, { 1.27f } }, + op::Constant::create(ngraph::element::i8, ngraph::Shape{}, std::vector{ 2.f }) + }, + // ExpectedValues + { + ngraph::element::u8, + {{}, { { 128.f }, ngraph::element::f32, {}, false }, {}}, + {}, + {{}, {}, {{ 0.0002f }, ngraph::element::f32, { 1, 1, 1, 1 }}}, + op::Constant::create(ngraph::element::f32, ngraph::Shape{}, std::vector{ -125.f }), + true + } + }, + // QDq version + { + LayerTransformation::createParamsU8I8(), + // ActualValues + { + ngraph::element::u8, + {{ngraph::element::f32}, { 128.f }, { 0.02f }}, + {{ngraph::element::f32}, { 2.f }, { 0.01f }}, + op::Constant::create(ngraph::element::i8, ngraph::Shape{}, std::vector{ 2.f }) + }, + // ExpectedValues + { + ngraph::element::u8, + {{}, { { 128.f }, ngraph::element::f32, {}, false }, {}}, + {{}, { { 2.f }, ngraph::element::f32, {1, 2, 1, 1}, true, 1ul, element::i8, false, { "DISABLED_CONSTANT_FOLDING" } }, {}}, + {{}, {}, {{ 0.0002f }, ngraph::element::f32, { 1 }}}, + op::Constant::create(ngraph::element::i8, ngraph::Shape{}, std::vector{ 2.f }), + true + } + }, + // without zero point + { + LayerTransformation::createParamsU8I8(), + // ActualValues + { + ngraph::element::u8, + {{ngraph::element::f32}, {}, { 0.02f }}, + { 255ul, Shape({ 1, 1, 1, 1 }), { 0.f }, { 254.f }, { -1.27f }, { 1.27f } }, + op::Constant::create(ngraph::element::i8, ngraph::Shape{}, std::vector{ 2.f }) + }, + // ExpectedValues + { + ngraph::element::u8, + {}, + {}, + {{}, {}, {{ 0.0002f }, ngraph::element::f32, { 1, 1, 1, 1 }}}, + op::Constant::create(ngraph::element::i8, ngraph::Shape{}, std::vector{ -125.f }), + true + } + }, + // QDq version + { + LayerTransformation::createParamsU8I8(), + // ActualValues + { + ngraph::element::u8, + {{ngraph::element::f32}, {}, { 0.02f }}, + {{ngraph::element::f32}, {}, { 0.01f }}, + op::Constant::create(ngraph::element::i8, ngraph::Shape{}, std::vector{ 2.f }) + }, + // ExpectedValues + { + ngraph::element::u8, + {}, + {}, + {{}, {}, {{ 0.0002f }, ngraph::element::f32, {1}}}, + op::Constant::create(ngraph::element::i8, ngraph::Shape{}, std::vector{ 2.f }), + true + } + }, + // per-channel dequantization with the same values + { + LayerTransformation::createParamsU8I8(), + // ActualValues + { + ngraph::element::u8, + {{ngraph::element::f32}, {}, { std::vector{0.02f, 0.02f, 0.02f, 0.02f, 0.02f, 0.02f, 0.02f, 0.02f} }}, + { 255ul, Shape({ 1, 1, 1, 1 }), { 0.f }, { 254.f }, { -1.27f }, { 1.27f } }, + op::Constant::create(ngraph::element::i8, ngraph::Shape{}, std::vector{ 2.f }) + }, + // ExpectedValues + { + ngraph::element::u8, + {}, + {}, + {{}, {}, {{ 0.0002f }, ngraph::element::f32, { 1, 1, 1, 1 }}}, + op::Constant::create(ngraph::element::i8, ngraph::Shape{}, std::vector{ -125.f }), + true + } + }, + // per-channel dequantization with different values + { + LayerTransformation::createParamsU8I8(), + // ActualValues + { + ngraph::element::u8, + {{ngraph::element::f32}, {}, { std::vector{0.02f, 0.01f, 0.02f, 0.01f, 0.02f, 0.01f, 0.02f, 0.01f} }}, + { 255ul, Shape({ 1, 1, 1, 1 }), { 0.f }, { 254.f }, { -1.27f }, { 1.27f } }, + op::Constant::create(ngraph::element::i8, ngraph::Shape{}, std::vector{ 2.f }) + }, + // ExpectedValues + { + ngraph::element::u8, + {{ngraph::element::f32}, {}, { std::vector{0.02f, 0.01f, 0.02f, 0.01f, 0.02f, 0.01f, 0.02f, 0.01f} }}, + {}, + {}, + op::Constant::create(ngraph::element::f32, ngraph::Shape{}, std::vector{ -1.25f }), + true + } + }, +}; + +INSTANTIATE_TEST_CASE_P( + smoke_LPT, + ConvolutionBackpropDataTransformation, + ::testing::Combine( + ::testing::ValuesIn(netPrecisions), + ::testing::ValuesIn(shapes), + ::testing::ValuesIn(testValues)), + ConvolutionBackpropDataTransformation::getTestCaseName); diff --git a/inference-engine/tests/functional/inference_engine/lp_transformations/convolution_qdq_transformation.cpp b/inference-engine/tests/functional/inference_engine/lp_transformations/convolution_qdq_transformation.cpp index c52606641c402b..75b1d965e53416 100644 --- a/inference-engine/tests/functional/inference_engine/lp_transformations/convolution_qdq_transformation.cpp +++ b/inference-engine/tests/functional/inference_engine/lp_transformations/convolution_qdq_transformation.cpp @@ -231,7 +231,7 @@ const std::vector testValues = { } }, - // Actual & Transformed: + // Actual: // // Parameter Constant Constant Constant // |U8 |U8 |FP32 |I8 @@ -246,6 +246,22 @@ const std::vector testValues = { // \FP32 /FP32 // \ / // Convolution + // + // Transformed: + // + // Parameter Constant + // |U8 |U8 + // | | + // Convert Convert + // \FP32 /FP32 + // \ / + // Subtract Constant + // \FP32 /FP32 + // \ / + // Multiply Constant + // \FP32 /FP32 + // \ / + // Convolution { LayerTransformation::createParamsU8I8().setSupportAsymmetricQuantization(true), // ActualValues @@ -262,8 +278,8 @@ const std::vector testValues = { { ngraph::element::u8, {{ngraph::element::f32}, { {127.f}, element::f32, {}, false, 1ul, element::u8, true }, { 0.02f }}, - {{ngraph::element::f32}, { {127.f}, element::f32, {}, false, 1ul, element::i8, true }, { 0.03f }}, - { std::vector{ 2.f }, ngraph::element::f32}, + {}, + { std::vector{ -3.75f }, ngraph::element::f32}, {}, ngraph::element::f32, {} @@ -434,12 +450,8 @@ const std::vector testValues = { { {1000.f}, element::f32, {}, false }, { {0.02f}, element::f32, {}, false } }, - { - { ngraph::element::f32, false }, - { {127.f}, element::f32, {}, false }, - { {0.03f}, element::f32, {}, false } - }, - { std::vector{ 2.f }, ngraph::element::i8}, + {}, + { std::vector{ -3.75f }, ngraph::element::f32}, {}, ngraph::element::f32, {} diff --git a/inference-engine/tests/functional/inference_engine/lp_transformations/convolution_transformation.cpp b/inference-engine/tests/functional/inference_engine/lp_transformations/convolution_transformation.cpp index 4ccbc8f412ad72..8c2d42dfbf3c98 100644 --- a/inference-engine/tests/functional/inference_engine/lp_transformations/convolution_transformation.cpp +++ b/inference-engine/tests/functional/inference_engine/lp_transformations/convolution_transformation.cpp @@ -160,8 +160,8 @@ const std::vector testValues = { { ngraph::element::u8, {{ ngraph::element::f32 }, { 128.f }, { 0.02f }}, - op::Constant::create(ngraph::element::f32, ngraph::Shape{}, std::vector{ 2.f }), - { 255ul, Shape({ 1, 1, 1, 1 }), { 0.f }, { 254.f }, { -1.27f }, { 1.27f } }, + op::Constant::create(ngraph::element::f32, ngraph::Shape{}, std::vector{ -1.25f }), + {}, ngraph::element::f32, {} } @@ -288,13 +288,13 @@ const std::vector testValues = { {{ 128.f, 0.f, 128.f }, ngraph::element::f32, { 1, 3, 1, 1 }}, {{ 0.02f, 0.01f, 0.03f }, ngraph::element::f32, {1, 3, 1, 1}} }, - op::Constant::create(ngraph::element::f32, ngraph::Shape{}, std::vector{ 2.f }), - { 255ul, Shape({ 1, 1, 1, 1 }), { 0.f }, { 254.f }, { -1.27f }, { 1.27f } }, + op::Constant::create(ngraph::element::f32, ngraph::Shape{}, std::vector{ -1.25f }), + {}, ngraph::element::f32, {} } }, - // dequantization in second dimension + // float input { LayerTransformation::createParamsU8I8(), // ActualValues @@ -316,8 +316,8 @@ const std::vector testValues = { {{ 128.f }, ngraph::element::f32, { 1, 1, 1, 1 }}, {{ 0.02f }, ngraph::element::f32, {1, 1, 1, 1}} }, - op::Constant::create(ngraph::element::f32, ngraph::Shape{}, std::vector{ 2.f }), - { 255ul, Shape({ 1, 1, 1, 1 }), { 0.f }, { 254.f }, { -1.27f }, { 1.27f } }, + op::Constant::create(ngraph::element::f32, ngraph::Shape{}, std::vector{ -1.25f }), + {}, ngraph::element::f32, {} } @@ -356,8 +356,8 @@ const std::vector testValues = { { ngraph::element::f32, {{}, {}, { {0.02f}, element::f32 }}, - op::Constant::create(ngraph::element::f32, ngraph::Shape{}, std::vector{ 2.f }), - { 255ul, Shape({ 1, 1, 1, 1 }), { 0.f }, { 254.f }, { -1.27f }, { 1.27f } }, + op::Constant::create(ngraph::element::f32, ngraph::Shape{}, std::vector{ -1.25f }), + {}, ngraph::element::f32, {} } @@ -396,8 +396,8 @@ const std::vector testValues = { { ngraph::element::u8, {{element::f32}, { 1000.f }, { {0.02f}, element::f32 }}, - op::Constant::create(ngraph::element::f32, ngraph::Shape{}, std::vector{ 2.f }), - { 255ul, Shape({ 1, 1, 1, 1 }), { 0.f }, { 254.f }, { -1.27f }, { 1.27f } }, + op::Constant::create(ngraph::element::f32, ngraph::Shape{}, std::vector{ -1.25f }), + {}, ngraph::element::f32, {} } diff --git a/inference-engine/tests/functional/inference_engine/lp_transformations/group_convolution_transformation.cpp b/inference-engine/tests/functional/inference_engine/lp_transformations/group_convolution_transformation.cpp index 7ba3252999e81f..d90999bb8ccad4 100644 --- a/inference-engine/tests/functional/inference_engine/lp_transformations/group_convolution_transformation.cpp +++ b/inference-engine/tests/functional/inference_engine/lp_transformations/group_convolution_transformation.cpp @@ -160,8 +160,8 @@ const std::vector testValues = { { ngraph::element::u8, {{ ngraph::element::f32 }, { 128.f }, { 0.02f }}, - op::Constant::create(ngraph::element::f32, ngraph::Shape{}, std::vector{ 2.f }), - { 255ul, Shape({ 1, 1, 1, 1 }), { 0.f }, { 254.f }, { -1.27f }, { 1.27f } }, + op::Constant::create(ngraph::element::f32, ngraph::Shape{}, std::vector{ -1.25f }), + {}, {}, ngraph::element::f32, {} @@ -286,8 +286,8 @@ const std::vector testValues = { { ngraph::element::f32, {{}, {}, { 0.02f }}, - op::Constant::create(ngraph::element::f32, ngraph::Shape{}, std::vector{ 2.f }), - { 255ul, Shape({ 1, 1, 1, 1 }), { 0.f }, { 254.f }, { -1.27f }, { 1.27f } }, + op::Constant::create(ngraph::element::f32, ngraph::Shape{}, std::vector{ -1.25f }), + {}, {}, ngraph::element::f32, {} @@ -459,8 +459,8 @@ const std::vector testValues = { { ngraph::element::f32, {{}, {}, { 0.02f }}, - op::Constant::create(ngraph::element::f32, ngraph::Shape{}, std::vector{ 2.f }), - { 255ul, Shape({ 1, 1, 1, 1 }), { 0.f }, { 254.f }, { -1.27f }, { 1.27f } }, + op::Constant::create(ngraph::element::f32, ngraph::Shape{}, std::vector{ -1.25f }), + {}, {}, ngraph::element::f32, {} diff --git a/inference-engine/tests/functional/inference_engine/ngraph_reader/einsum_tests.cpp b/inference-engine/tests/functional/inference_engine/ngraph_reader/einsum_tests.cpp index 16da3327ac59b9..a0f5ca24c12f8d 100644 --- a/inference-engine/tests/functional/inference_engine/ngraph_reader/einsum_tests.cpp +++ b/inference-engine/tests/functional/inference_engine/ngraph_reader/einsum_tests.cpp @@ -3,10 +3,14 @@ // #include + +#include "common_test_utils/xml_net_builder/ir_net.hpp" #include "ngraph_reader_tests.hpp" -TEST_F(NGraphReaderTests, ReadEinsumNetwork) { - std::string model = R"V0G0N( +// since EinsumDecomposition is applied, disable these two tests +// until ngraph_reader_test checks only correctness of IR reading +TEST_F(NGraphReaderTests, DISABLED_ReadEinsumNetwork) { + std::string model = R"V0G0N( @@ -66,7 +70,7 @@ TEST_F(NGraphReaderTests, ReadEinsumNetwork) { )V0G0N"; - std::string modelV7 = R"V0G0N( + std::string modelV7 = R"V0G0N( @@ -115,11 +119,11 @@ TEST_F(NGraphReaderTests, ReadEinsumNetwork) { )V0G0N"; - compareIRs(model, modelV7); + compareIRs(model, modelV7); } -TEST_F(NGraphReaderTests, ReadEinsumNetwork2) { - std::string model = R"V0G0N( +TEST_F(NGraphReaderTests, DISABLED_ReadEinsumNetwork2) { + std::string model = R"V0G0N( @@ -199,7 +203,7 @@ TEST_F(NGraphReaderTests, ReadEinsumNetwork2) { )V0G0N"; - std::string modelV7 = R"V0G0N( + std::string modelV7 = R"V0G0N( @@ -266,6 +270,5 @@ TEST_F(NGraphReaderTests, ReadEinsumNetwork2) { )V0G0N"; - compareIRs(model, modelV7); + compareIRs(model, modelV7); } - diff --git a/inference-engine/tests/functional/inference_engine/serialization/single_layer/batch_norm.cpp b/inference-engine/tests/functional/inference_engine/serialization/single_layer/batch_norm.cpp index 04d878727b3e10..cc1cbf7dff2a73 100644 --- a/inference-engine/tests/functional/inference_engine/serialization/single_layer/batch_norm.cpp +++ b/inference-engine/tests/functional/inference_engine/serialization/single_layer/batch_norm.cpp @@ -20,6 +20,7 @@ const std::vector netPrecisions = { }; const std::vector epsilon = { + 0.0, 1e-6, 1e-5, 1e-4 diff --git a/inference-engine/tests/functional/inference_engine/transformations/convert_deconvolution_test.cpp b/inference-engine/tests/functional/inference_engine/transformations/convert_deconvolution_test.cpp index afe15697f0be77..51f60318f1824a 100644 --- a/inference-engine/tests/functional/inference_engine/transformations/convert_deconvolution_test.cpp +++ b/inference-engine/tests/functional/inference_engine/transformations/convert_deconvolution_test.cpp @@ -60,7 +60,7 @@ class ConvertDeconvolutionTest: public CommonTestUtils::TestsCommon, auto input = std::make_shared(ngraph::element::f32, input_shape); auto weights = ngraph::opset1::Constant::create(ngraph::element::f32, weights_shape, {1}); auto conv = std::make_shared(input, weights, ngraph::Strides(spatial_dims, 1), ngraph::Strides(spatial_dims, 1), - ngraph::CoordinateDiff(spatial_dims, 0), ngraph::CoordinateDiff(spatial_dims, 0)); + ngraph::CoordinateDiff(spatial_dims, 0), ngraph::CoordinateDiff(spatial_dims, 0), ngraph::element::f32); return std::make_shared(ngraph::NodeVector{conv}, ngraph::ParameterVector{input}); } diff --git a/inference-engine/tests/functional/plugin/conformance/subgraphs_dumper/src/ops_cache.cpp b/inference-engine/tests/functional/plugin/conformance/subgraphs_dumper/src/ops_cache.cpp index 2981709e5a2bbe..b0105debd1881d 100644 --- a/inference-engine/tests/functional/plugin/conformance/subgraphs_dumper/src/ops_cache.cpp +++ b/inference-engine/tests/functional/plugin/conformance/subgraphs_dumper/src/ops_cache.cpp @@ -42,7 +42,12 @@ void OPCache::update_ops_cache(const std::shared_ptr &func, co for (const auto &op : func->get_ordered_ops()) { if (ngraph::is_type(op) || ngraph::is_type(op) || - ngraph::is_type(op)) { + ngraph::is_type(op) || + // ReadValue and Assign have to be handled in pair + // Will be handled as part of 48838 + ngraph::is_type(op) || + ngraph::is_type(op) + ) { continue; } update_ops_cache(op, source_model); diff --git a/inference-engine/tests/functional/plugin/cpu/bfloat16/bfloat16_helpers.hpp b/inference-engine/tests/functional/plugin/cpu/bfloat16/bfloat16_helpers.hpp index 5fbe3d6c89be2c..2e0d4025b41dca 100644 --- a/inference-engine/tests/functional/plugin/cpu/bfloat16/bfloat16_helpers.hpp +++ b/inference-engine/tests/functional/plugin/cpu/bfloat16/bfloat16_helpers.hpp @@ -143,7 +143,7 @@ class BasicBF16Test : public testing::WithParamInterface, InferenceEngine::SizeVector inputShapes, newInputShapes; InferenceEngine::Precision inputPrecision, netPrecision; std::map expectedPrecisions; - float threshold = 2e-2; // Is enough for tensor having abs maximum values less than 1 + float threshold = 2e-2f; // Is enough for tensor having abs maximum values less than 1 static std::string getTestCaseName(testing::TestParamInfo obj) { InferenceEngine::Precision inputPrecision, netPrecision; diff --git a/inference-engine/tests/functional/plugin/cpu/shared_tests_instances/low_precision_transformations/convolution_backprop_data_transformation.cpp b/inference-engine/tests/functional/plugin/cpu/shared_tests_instances/low_precision_transformations/convolution_backprop_data_transformation.cpp new file mode 100644 index 00000000000000..64ce304a24756f --- /dev/null +++ b/inference-engine/tests/functional/plugin/cpu/shared_tests_instances/low_precision_transformations/convolution_backprop_data_transformation.cpp @@ -0,0 +1,100 @@ +// Copyright (C) 2021 Intel Corporation +// SPDX-License-Identifier: Apache-2.0 +// + +#include + +#include "low_precision_transformations/convolution_backprop_data_transformation.hpp" +#include "common_test_utils/test_constants.hpp" + +using namespace LayerTestsDefinitions; + +namespace { +const std::vector netPrecisions = { + ngraph::element::f32 +}; + +const std::vector trasformationParamValues = { + LayerTestsUtils::LayerTransformationParamsNGraphFactory::createParams().setUpdatePrecisions(true), + LayerTestsUtils::LayerTransformationParamsNGraphFactory::createParams().setUpdatePrecisions(false) +}; + +const std::vector params = { + // FQ on weights + // with zero point + { + {256ul, ngraph::Shape{1, 1, 1, 1}, { 0.f }, { 255.f }, { -12.7f }, { 12.8f }}, + {255ul, ngraph::Shape{1, 1, 1, 1}, { 0.f }, { 254.f }, { -127.f }, { 127.f }}, + "", + "" + }, + // without zero point + { + {256ul, ngraph::Shape{1, 1, 1, 1}, { 0.f }, { 255.f }, { 0.f }, { 25.5f }}, + {255ul, ngraph::Shape{1, 1, 1, 1}, { 0.f }, { 254.f }, { 0.f }, { 25.4f }}, + "", + "" + }, + // with incorrect zero point on activations + { + {256ul, ngraph::Shape{1, 1, 1, 1}, { 5.f }, { 6.f }, { 5.f }, { 6.f }}, + {255ul, ngraph::Shape{1, 1, 1, 1}, { 0.f }, { 254.f }, { 0.f }, { 25.4f }}, + "", + "" + }, + // with incorrect zero point on weights + { + {256ul, ngraph::Shape{1, 1, 1, 1}, { 0.f }, { 255.f }, { 0.f }, { 25.5f }}, + {255ul, ngraph::Shape{1, 1, 1, 1}, { 5.f }, { 6.f }, { 5.f }, { 6.f }}, + "", + "" + }, + // QDq on weights + // with zero point + { + {256ul, ngraph::Shape{1, 1, 1, 1}, { 0.f }, { 255.f }, { -12.7f }, { 12.8f }}, + {{ngraph::element::f32}, { {12.f}, ngraph::element::f32, {}, false }, { {4.f}, ngraph::element::f32, {}, false }}, + "", + "" + }, + // without zero point + { + {256ul, ngraph::Shape{1, 1, 1, 1}, { 0.f }, { 255.f }, { 0.f }, { 25.5f }}, + {{ngraph::element::f32}, {}, { {4.f}, ngraph::element::f32, {}, false }}, + "", + "" + }, + // with incorrect zero point on activations + { + {256ul, ngraph::Shape{1, 1, 1, 1}, { 5.f }, { 6.f }, { 5.f }, { 6.f }}, + {{ngraph::element::f32}, { {12.f}, ngraph::element::f32, {}, false }, { {4.f}, ngraph::element::f32, {}, false }}, + "", + "" + }, + // with incorrect zero point on weights + { + {256ul, ngraph::Shape{1, 1, 1, 1}, { 0.f }, { 255.f }, { -12.7f }, { 12.8f }}, + {{ngraph::element::f32}, { {1000.f}, ngraph::element::f32, {}, false }, { {4.f}, ngraph::element::f32, {}, false }}, + "", + "" + } +}; + +const std::vector inputShapes = { + { 1, 8, 16, 16 } +}; + +const std::vector outputShapes = { + { 16, 16 } +}; + +INSTANTIATE_TEST_CASE_P(smoke_LPT, ConvolutionBackpropDataTransformation, + ::testing::Combine( + ::testing::ValuesIn(netPrecisions), + ::testing::ValuesIn(inputShapes), + ::testing::ValuesIn(outputShapes), + ::testing::Values(CommonTestUtils::DEVICE_CPU), + ::testing::ValuesIn(trasformationParamValues), + ::testing::ValuesIn(params)), + ConvolutionBackpropDataTransformation::getTestCaseName); +} // namespace diff --git a/inference-engine/tests/functional/plugin/cpu/shared_tests_instances/single_layer_tests/batch_norm.cpp b/inference-engine/tests/functional/plugin/cpu/shared_tests_instances/single_layer_tests/batch_norm.cpp index cbe867e859840e..753efd3acaa84c 100644 --- a/inference-engine/tests/functional/plugin/cpu/shared_tests_instances/single_layer_tests/batch_norm.cpp +++ b/inference-engine/tests/functional/plugin/cpu/shared_tests_instances/single_layer_tests/batch_norm.cpp @@ -15,6 +15,7 @@ const std::vector netPrecisions = { }; const std::vector epsilon = { + 0.0, 1e-6, 1e-5, 1e-4 diff --git a/inference-engine/tests/functional/plugin/cpu/shared_tests_instances/single_layer_tests/fake_quantize.cpp b/inference-engine/tests/functional/plugin/cpu/shared_tests_instances/single_layer_tests/fake_quantize.cpp index fcaa41f72f09d3..3dbb6f5fd539b5 100644 --- a/inference-engine/tests/functional/plugin/cpu/shared_tests_instances/single_layer_tests/fake_quantize.cpp +++ b/inference-engine/tests/functional/plugin/cpu/shared_tests_instances/single_layer_tests/fake_quantize.cpp @@ -89,4 +89,27 @@ INSTANTIATE_TEST_CASE_P(smoke_FakeQuantizePerChannelAxis1, FakeQuantizeLayerTest ::testing::Values(CommonTestUtils::DEVICE_CPU), ::testing::Values(config)), FakeQuantizeLayerTest::getTestCaseName); + +const std::vector> inputShapesPerChannel2D = {{1, 10}}; +const std::vector> constShapesPerChannel2D = { {10}, {1, 10}, {1} }; +const auto fqParamsPerChannel2D = ::testing::Combine( + ::testing::ValuesIn(levels), + ::testing::ValuesIn(constShapesPerChannel2D), + ::testing::Values(fqArgs), + ::testing::Values(inputParams) +); + +INSTANTIATE_TEST_CASE_P(smoke_FakeQuantizePerChannel2D, FakeQuantizeLayerTest, + ::testing::Combine( + fqParamsPerChannel2D, + ::testing::ValuesIn(netPrecisions), + ::testing::Values(InferenceEngine::Precision::UNSPECIFIED), + ::testing::Values(InferenceEngine::Precision::UNSPECIFIED), + ::testing::Values(InferenceEngine::Layout::ANY), + ::testing::Values(InferenceEngine::Layout::ANY), + ::testing::ValuesIn(inputShapesPerChannel2D), + ::testing::Values(CommonTestUtils::DEVICE_CPU), + ::testing::Values(config)), + FakeQuantizeLayerTest::getTestCaseName); + } // namespace diff --git a/inference-engine/tests/functional/plugin/gna/pass_tests/padded2valid_conv.cpp b/inference-engine/tests/functional/plugin/gna/pass_tests/padded2valid_conv.cpp new file mode 100644 index 00000000000000..48f28491f35ae1 --- /dev/null +++ b/inference-engine/tests/functional/plugin/gna/pass_tests/padded2valid_conv.cpp @@ -0,0 +1,308 @@ +// Copyright (C) 2021 Intel Corporation +// SPDX-License-Identifier: Apache-2.0 +// + +#include + +#include "common_test_utils/test_common.hpp" +#include +#include +#include +#include +#include +#include + +#include "transformations/init_node_info.hpp" +#include "ngraph_functions/builders.hpp" +#include "shared_test_classes/base/layer_test_utils.hpp" +#include "../shared_tests_instances/skip_tests_check.hpp" + +using namespace ngraph; +using namespace ngraph::opset1; + +namespace LayerTestsDefinitions { + +enum class modelType { + TranspConvTransp = 0, /* Transpose(NHWC->NCHW) => conv => Transpose(NCHW->NHWC) */ + TranspConvBcastAddTransp, /* Transpose(NHWC->NCHW) => conv => broadcasted add (BIAS) => Transpose(NCHW->NHWC) */ + TranspConvBcastAddMaxPoolTransp, /* Transpose(NHWC->NCHW) => conv => broadcasted add (BIAS) => MaxPooling => Transpose(NCHW->NHWC) (2d max pool case) */ + TranspConvBcastAddActTransp, /* Transpose(NHWC->NCHW) => conv => broadcasted add (BIAS) => ActivationFunction => Transpose(NCHW->NHWC) */ + TranspConvBcastAddMaxPoolActTransp, /* Transpose(NHWC->NCHW) => conv => broadcasted add (BIAS) => MaxPool => ActivationFunction => Transpose(NCHW->NHWC) */ + TranspConvTranspBcastAdd, /* Transpose(NHWC->NCHW) => conv => Transpose(NCHW->NHWC) => BIAS (output of MO --disable_nhwc_to_nchw option) */ + TranspConvTranspBcastAddAct /* Transpose(NHWC->NCHW) => conv => Transpose(NCHW->NHWC) => BIAS => AF (output of MO --disable_nhwc_to_nchw option) */ +}; + +typedef std::tuple< + InferenceEngine::SizeVector, // Kernel size + InferenceEngine::SizeVector, // Strides + std::vector, // Pad begin + std::vector, // Pad end + InferenceEngine::SizeVector, // Dilation + size_t, // Num out channels + op::PadType, // Padding type + InferenceEngine::SizeVector, // Bias + InferenceEngine::SizeVector, // Transposed Bias + InferenceEngine::SizeVector // Maxpool +> convSpecificParams; + +typedef std::tuple< + convSpecificParams, // Convolution parameters + InferenceEngine::Precision, // Network Precision + std::string, // Target Device + std::map, // Configuration + InferenceEngine::SizeVector, // Input shapes + modelType // Test model +> padded2ValidParams; + +class Padded2ValidConvTest : public testing::WithParamInterface, + virtual public LayerTestsUtils::LayerTestsCommon { +public: + static std::string getTestCaseName(testing::TestParamInfo obj) { + convSpecificParams convParams; + InferenceEngine::Precision netPrecision; + std::string targetDevice; + std::map configuration; + InferenceEngine::SizeVector inputShapes; + modelType model; + std::tie(convParams, netPrecision, targetDevice, configuration, inputShapes, model) = obj.param; + op::PadType padType; + InferenceEngine::SizeVector kernel, stride, dilation, bias, transpBias, maxpool; + std::vector padBegin, padEnd; + size_t convInput; + std::tie(kernel, stride, padBegin, padEnd, dilation, convInput, padType, bias, transpBias, maxpool) = convParams; + + std::ostringstream result; + result << "M=" << static_cast(model) << "_"; + result << "IS=" << CommonTestUtils::vec2str(inputShapes) << "_"; + result << "K" << CommonTestUtils::vec2str(kernel) << "_"; + result << "S" << CommonTestUtils::vec2str(stride) << "_"; + result << "PB" << CommonTestUtils::vec2str(padBegin) << "_"; + result << "PE" << CommonTestUtils::vec2str(padEnd) << "_"; + result << "D=" << CommonTestUtils::vec2str(dilation) << "_"; + result << "O=" << convInput << "_"; + result << "AP=" << padType << "_"; + result << "B=" << CommonTestUtils::vec2str(bias) << "_"; + result << "B=" << CommonTestUtils::vec2str(transpBias) << "_"; + result << "MP=" << CommonTestUtils::vec2str(maxpool) << "_"; + result << "netPRC=" << netPrecision.name() << "_"; + result << "targetDevice=" << targetDevice << "_"; + for (auto const& configItem : configuration) { + result << "_configItem=" << configItem.first << "_" << configItem.second; + } + return result.str(); + } + +protected: + void SetUp() override { + convSpecificParams convParams; + InferenceEngine::Precision netPrecision; + std::vector inputShape; + modelType model; + std::tie(convParams, netPrecision, targetDevice, configuration, inputShape, model) = this->GetParam(); + op::PadType padType; + InferenceEngine::SizeVector kernel, stride, dilation, bias, transpBias, maxpool; + std::vector padBegin, padEnd; + size_t numOutChannels; + std::tie(kernel, stride, padBegin, padEnd, dilation, numOutChannels, padType, bias, transpBias, maxpool) = convParams; + auto ngPrc = FuncTestUtils::PrecisionUtils::convertIE2nGraphPrc(netPrecision); + + Shape bias_shape{ bias }; + Shape transp_bias_shape{ transpBias }; + Shape maxpool_shape{ maxpool }; + std::vector bias_weights{}; + + auto input = builder::makeParams(ngPrc, { inputShape }); + auto transpose_in_order = op::Constant::create(element::i64, Shape{ 4 }, { 0, 3, 1, 2 }); + auto transpose_in = std::make_shared(input[0], transpose_in_order); + auto filter_size = std::accumulate(std::begin(kernel), std::end(kernel), 1, std::multiplies()); + auto filter_weights = CommonTestUtils::generate_float_numbers(numOutChannels * inputShape[3] * filter_size, -0.5f, 0.5f); + auto conv = builder::makeConvolution(transpose_in, ngPrc, kernel, stride, padBegin, + padEnd, dilation, padType, numOutChannels, false, filter_weights); + auto transpose_out_order = op::Constant::create(element::i64, Shape{ 4 }, { 0, 2, 3, 1 }); + auto bias_const = builder::makeConstant(ngPrc, bias_shape, bias_weights, true); + std::shared_ptr last_op = std::make_shared(conv, transpose_out_order);; + + switch (model) { + case modelType::TranspConvBcastAddTransp: + { + auto bias = std::make_shared(conv, bias_const); + last_op = std::make_shared(bias, transpose_out_order); + } + break; + + case modelType::TranspConvBcastAddMaxPoolTransp: + { + auto bcast_add = std::make_shared(conv, bias_const); + auto maxpool = std::make_shared(bcast_add, Strides{ 1, 1 }, Shape{ 0, 0 }, Shape{ 0, 0 }, maxpool_shape); + last_op = std::make_shared(maxpool, transpose_out_order); + } + break; + + case modelType::TranspConvBcastAddActTransp: + { + auto bcast_add = std::make_shared(conv, bias_const); + auto activation = std::make_shared(bcast_add); + last_op = std::make_shared(activation, transpose_out_order); + } + break; + + case modelType::TranspConvBcastAddMaxPoolActTransp: + { + auto bcast_add = std::make_shared(conv, bias_const); + auto max_pool = std::make_shared(bcast_add, Strides{ 1, 1 }, Shape{ 0, 0 }, Shape{ 0, 0 }, maxpool_shape); + auto activation = std::make_shared(max_pool); + last_op = std::make_shared(activation, transpose_out_order); + } + break; + + case modelType::TranspConvTranspBcastAdd: + { + bias_const = std::make_shared(ngPrc, transp_bias_shape); + last_op = std::make_shared(last_op, bias_const); + } + break; + + case modelType::TranspConvTranspBcastAddAct: + { + bias_const = builder::makeConstant(ngPrc, transp_bias_shape, bias_weights, true); + auto bcast_add = std::make_shared(last_op, bias_const); + last_op = std::make_shared(bcast_add); + } + break; + + case modelType::TranspConvTransp: + default: + break; + } + + function = std::make_shared(NodeVector{ last_op }, ParameterVector{ input }); + } +}; + +class GnaPadded2Valid2DConvTest : public Padded2ValidConvTest, GnaLayerTestCheck { +protected: + void Run() override { + GnaLayerTestCheck::SkipTestCheck(); + + if (!GnaLayerTestCheck::skipTest) { + Padded2ValidConvTest::Run(); + } + } + + void SetUp() override { + Padded2ValidConvTest::SetUp(); + } +}; + +TEST_P(Padded2ValidConvTest, CompareWithRefs) { + Run(); +} + +TEST_P(GnaPadded2Valid2DConvTest, CompareWithRefs) { + Run(); +} + +const std::vector netPrecisions = { + InferenceEngine::Precision::FP32, + //TODO: some tests fail for FP16 + //InferenceEngine::Precision::FP16 +}; + +const std::vector> configs = { + { + {"GNA_DEVICE_MODE", "GNA_SW_EXACT"}, + {"GNA_SCALE_FACTOR_0", "1"} + } +}; + +const std::vector padTypes = { + op::PadType::EXPLICIT, + op::PadType::SAME_LOWER, + //TODO: SAME_UPPER fails for 1d conv + //op::PadType::SAME_UPPER, + op::PadType::VALID +}; + +const std::vector models = { + modelType::TranspConvTransp, + modelType::TranspConvBcastAddTransp, + //TODO: this model fails for 1d conv + //modelType::TranspConvBcastAddMaxPoolTransp, + //TODO: disabled models fail with result comparison check + //modelType::TranspConvBcastAddActTransp, + //modelType::TranspConvBcastAddMaxPoolActTransp, + modelType::TranspConvTranspBcastAdd, + //modelType::TranspConvTranspBcastAddAct +}; + +const std::vector> input1DNHWC = { {1, 1, 16, 8} }; +const std::vector> kernels1D = { {1, 2}, {1, 3} //TODO: {1, 4} fails on result comparison for 1d conv +}; +const std::vector> strides1D = { {1, 1} }; +const std::vector> padBegins1D = { {0, 2} }; +const std::vector> padEnds1D = { {0, 3} }; +const std::vector> dilations1D = { {1, 1} }; +const std::vector numOutChannels1D = { 4 }; +const std::vector> biases1D = { {1, 4, 1, 1} }; +const std::vector> transp_biases1D = { {1, 1, 1, 4} }; +const std::vector> maxpools1D = { {1, 2} }; + +const std::vector> input2DNHWC = { {1, 16, 16, 32} }; +const std::vector> kernels2D = { {2, 2}, {4, 1}, {1, 3} }; +//TODO: strides other than {1, 1} fail on result comparison for 2d conv +const std::vector> strides2D = { {1, 1} }; +const std::vector> padBegins2D = { {1, 2} }; +const std::vector> padEnds2D = { {3, 1} }; +const std::vector> dilations2D = { {1, 1} }; +const std::vector numOutChannels2D = { 32 }; +const std::vector> biases2D = { {1, 32, 1, 1} }; +const std::vector> transp_biases2D = { {1, 1, 1, 32} }; +const std::vector> maxpools2D = { {2, 2} }; + +const auto conv1DParams = ::testing::Combine( + ::testing::ValuesIn(kernels1D), + ::testing::ValuesIn(strides1D), + ::testing::ValuesIn(padBegins1D), + ::testing::ValuesIn(padEnds1D), + ::testing::ValuesIn(dilations1D), + ::testing::ValuesIn(numOutChannels1D), + ::testing::ValuesIn(padTypes), + ::testing::ValuesIn(biases1D), + ::testing::ValuesIn(transp_biases1D), + ::testing::ValuesIn(maxpools1D) +); + +const auto conv2DParams = ::testing::Combine( + ::testing::ValuesIn(kernels2D), + ::testing::ValuesIn(strides2D), + ::testing::ValuesIn(padBegins2D), + ::testing::ValuesIn(padEnds2D), + ::testing::ValuesIn(dilations2D), + ::testing::ValuesIn(numOutChannels2D), + ::testing::ValuesIn(padTypes), + ::testing::ValuesIn(biases2D), + ::testing::ValuesIn(transp_biases2D), + ::testing::ValuesIn(maxpools2D) +); + +INSTANTIATE_TEST_CASE_P(smoke_1DTranspConvTransp, Padded2ValidConvTest, + ::testing::Combine( + conv1DParams, + ::testing::ValuesIn(netPrecisions), + ::testing::Values(CommonTestUtils::DEVICE_GNA), + ::testing::ValuesIn(configs), + ::testing::ValuesIn(input1DNHWC), + ::testing::ValuesIn(models)), + Padded2ValidConvTest::getTestCaseName); + +INSTANTIATE_TEST_CASE_P(smoke_2DTranspConvTransp, GnaPadded2Valid2DConvTest, + ::testing::Combine( + conv2DParams, + ::testing::ValuesIn(netPrecisions), + ::testing::Values(CommonTestUtils::DEVICE_GNA), + ::testing::ValuesIn(configs), + ::testing::ValuesIn(input2DNHWC), + ::testing::ValuesIn(models)), + GnaPadded2Valid2DConvTest::getTestCaseName); + +} // namespace LayerTestsDefinitions diff --git a/inference-engine/tests/functional/plugin/gna/shared_tests_instances/behavior/infer_request_config.cpp b/inference-engine/tests/functional/plugin/gna/shared_tests_instances/behavior/infer_request_config.cpp index 2d502168f46d42..62d2a94ffa81da 100644 --- a/inference-engine/tests/functional/plugin/gna/shared_tests_instances/behavior/infer_request_config.cpp +++ b/inference-engine/tests/functional/plugin/gna/shared_tests_instances/behavior/infer_request_config.cpp @@ -19,7 +19,9 @@ namespace { const std::vector> Inconfigs = { {{InferenceEngine::GNAConfigParams::KEY_GNA_SCALE_FACTOR, "1.0"}}, {{InferenceEngine::GNAConfigParams::KEY_GNA_PRECISION, "I8"}}, - {{InferenceEngine::GNAConfigParams::KEY_GNA_FIRMWARE_MODEL_IMAGE, "gfile"}}, + {{InferenceEngine::GNAConfigParams::KEY_GNA_FIRMWARE_MODEL_IMAGE, "gfile"}, + {InferenceEngine::GNAConfigParams::KEY_GNA_EXEC_TARGET, InferenceEngine::GNAConfigParams::GNA_TARGET_2_0}, + {InferenceEngine::GNAConfigParams::KEY_GNA_COMPILE_TARGET, InferenceEngine::GNAConfigParams::GNA_TARGET_2_0}}, {{InferenceEngine::GNAConfigParams::KEY_GNA_DEVICE_MODE, InferenceEngine::GNAConfigParams::GNA_AUTO}}, {{InferenceEngine::GNAConfigParams::KEY_GNA_DEVICE_MODE, InferenceEngine::GNAConfigParams::GNA_SW_FP32}}, {{InferenceEngine::GNAConfigParams::KEY_GNA_DEVICE_MODE, InferenceEngine::GNAConfigParams::GNA_SW}}, diff --git a/inference-engine/tests/functional/plugin/gpu/shared_tests_instances/low_precision_transformations/convolution_backprop_data_transformation.cpp b/inference-engine/tests/functional/plugin/gpu/shared_tests_instances/low_precision_transformations/convolution_backprop_data_transformation.cpp new file mode 100644 index 00000000000000..d33e3c42f9e242 --- /dev/null +++ b/inference-engine/tests/functional/plugin/gpu/shared_tests_instances/low_precision_transformations/convolution_backprop_data_transformation.cpp @@ -0,0 +1,103 @@ +// Copyright (C) 2021 Intel Corporation +// SPDX-License-Identifier: Apache-2.0 +// + +#include + +#include "low_precision_transformations/convolution_backprop_data_transformation.hpp" +#include "common_test_utils/test_constants.hpp" + +using namespace LayerTestsDefinitions; + +namespace { +const std::vector netPrecisions = { + ngraph::element::f32, + ngraph::element::f16 +}; + +const std::vector trasformationParamValues = { + LayerTestsUtils::LayerTransformationParamsNGraphFactory::createParams().setUpdatePrecisions(true), + LayerTestsUtils::LayerTransformationParamsNGraphFactory::createParams().setUpdatePrecisions(false) +}; + +const std::vector params = { + // FQ on weights + // with zero point + { + {256ul, ngraph::Shape{1, 1, 1, 1}, { 0.f }, { 255.f }, { -12.7f }, { 12.8f }}, + {255ul, ngraph::Shape{1, 1, 1, 1}, { 0.f }, { 254.f }, { -127.f }, { 127.f }}, + "", + "" + }, + // without zero point + { + {256ul, ngraph::Shape{1, 1, 1, 1}, { 0.f }, { 255.f }, { 0.f }, { 25.5f }}, + {255ul, ngraph::Shape{1, 1, 1, 1}, { 0.f }, { 254.f }, { 0.f }, { 25.4f }}, + "", + "" + }, + // TODO: check fails in CI +// // with incorrect zero point on activations +// { +// {256ul, ngraph::Shape{1, 1, 1, 1}, { 5.f }, { 6.f }, { 5.f }, { 6.f }}, +// {255ul, ngraph::Shape{1, 1, 1, 1}, { 0.f }, { 254.f }, { 0.f }, { 25.4f }}, +// "", +// "" +// }, +// // with incorrect zero point on weights +// { +// {256ul, ngraph::Shape{1, 1, 1, 1}, { 0.f }, { 255.f }, { 0.f }, { 25.5f }}, +// {255ul, ngraph::Shape{1, 1, 1, 1}, { 5.f }, { 6.f }, { 5.f }, { 6.f }}, +// "", +// "" +// }, + // QDq on weights + // with zero point + { + {256ul, ngraph::Shape{1, 1, 1, 1}, { 0.f }, { 255.f }, { -12.7f }, { 12.8f }}, + {{ngraph::element::f32}, { {12.f}, ngraph::element::f32, {}, false }, { {4.f}, ngraph::element::f32, {}, false }}, + "", + "" + }, + // without zero point + { + {256ul, ngraph::Shape{1, 1, 1, 1}, { 0.f }, { 255.f }, { 0.f }, { 25.5f }}, + {{ngraph::element::f32}, {}, { {4.f}, ngraph::element::f32, {}, false }}, + "", + "" + }, + // with incorrect zero point on activations + { + {256ul, ngraph::Shape{1, 1, 1, 1}, { 5.f }, { 6.f }, { 5.f }, { 6.f }}, + {{ngraph::element::f32}, { {12.f}, ngraph::element::f32, {}, false }, { {4.f}, ngraph::element::f32, {}, false }}, + "", + "" + }, + // with incorrect zero point on weights + { + {256ul, ngraph::Shape{1, 1, 1, 1}, { 0.f }, { 255.f }, { -12.7f }, { 12.8f }}, + {{ngraph::element::f32}, { {1000.f}, ngraph::element::f32, {}, false }, { {4.f}, ngraph::element::f32, {}, false }}, + "", + "" + } +}; + +const std::vector inputShapes = { + { 1, 8, 16, 16 }, + { 1, 32, 16, 16 } +}; + +const std::vector outputShapes = { + { 16, 16 } +}; + +INSTANTIATE_TEST_CASE_P(smoke_LPT, ConvolutionBackpropDataTransformation, + ::testing::Combine( + ::testing::ValuesIn(netPrecisions), + ::testing::ValuesIn(inputShapes), + ::testing::ValuesIn(outputShapes), + ::testing::Values(CommonTestUtils::DEVICE_GPU), + ::testing::ValuesIn(trasformationParamValues), + ::testing::ValuesIn(params)), + ConvolutionBackpropDataTransformation::getTestCaseName); +} // namespace diff --git a/inference-engine/tests/functional/plugin/shared/include/behavior/set_preprocess.hpp b/inference-engine/tests/functional/plugin/shared/include/behavior/set_preprocess.hpp index c498d7963e36b7..0f1704601e7baa 100644 --- a/inference-engine/tests/functional/plugin/shared/include/behavior/set_preprocess.hpp +++ b/inference-engine/tests/functional/plugin/shared/include/behavior/set_preprocess.hpp @@ -111,7 +111,7 @@ TEST_P(PreprocessTest, SetMeanImagePreProcessGetBlob) { auto lockedMem = inBlob->buffer(); auto *inData = lockedMem.as(); for (size_t i = 0; i < inBlob->size(); i++) - inData[i] = i; + inData[i] = static_cast(i); } req.Infer(); @@ -182,7 +182,7 @@ TEST_P(PreprocessTest, SetMeanImagePreProcessSetBlob) { auto lockedMem = inBlob->buffer(); auto *inData = lockedMem.as(); for (size_t i = 0; i < inBlob->size(); i++) - inData[i] = i; + inData[i] = static_cast(i); } req.Infer(); @@ -243,7 +243,7 @@ TEST_P(PreprocessTest, SetMeanValuePreProcessGetBlob) { auto lockedMem = inBlob->buffer(); auto *inData = lockedMem.as(); for (size_t i = 0; i < inBlob->size(); i++) - inData[i] = i; + inData[i] = static_cast(i); } req.Infer(); @@ -308,7 +308,7 @@ TEST_P(PreprocessTest, SetMeanValuePreProcessSetBlob) { auto lockedMem = inBlob->buffer(); auto *inData = lockedMem.as(); for (size_t i = 0; i < inBlob->size(); i++) - inData[i] = i; + inData[i] = static_cast(i); } req.Infer(); @@ -363,7 +363,7 @@ TEST_P(PreprocessTest, ReverseInputChannelsPreProcessGetBlob) { auto lockedMem = inBlob->buffer(); auto *inData = lockedMem.as(); for (size_t i = 0; i < inBlob->size(); i++) - inData[i] = i; + inData[i] = static_cast(i); } req.Infer(); @@ -430,7 +430,7 @@ TEST_P(PreprocessTest, ReverseInputChannelsPreProcessSetBlob) { auto lockedMem = inBlob->buffer(); auto *inData = lockedMem.as(); for (size_t i = 0; i < inBlob->size(); i++) - inData[i] = i; + inData[i] = static_cast(i); } req.Infer(); @@ -500,7 +500,7 @@ TEST_P(PreprocessTest, SetScalePreProcessGetBlob) { auto lockedMem = inBlob->buffer(); auto *inData = lockedMem.as(); for (size_t i = 0; i < inBlob->size(); i++) - inData[i] = i; + inData[i] = static_cast(i); } req.Infer(); @@ -566,7 +566,7 @@ TEST_P(PreprocessTest, SetScalePreProcessSetBlob) { auto lockedMem = inBlob->buffer(); auto *inData = lockedMem.as(); for (size_t i = 0; i < inBlob->size(); i++) - inData[i] = i; + inData[i] = static_cast(i); } req.Infer(); @@ -721,11 +721,11 @@ TEST_P(PreprocessConversionTest, Infer) { if (iPrecision == InferenceEngine::Precision::FP32) { auto *inData = lockedMem.as(); for (size_t i = 0; i < inBlob->size(); i++) - inData[desc.offset(i)] = i; + inData[desc.offset(i)] = static_cast(i); } else if (iPrecision == InferenceEngine::Precision::U8) { auto *inData = lockedMem.as(); for (size_t i = 0; i < inBlob->size(); i++) - inData[desc.offset(i)] = i; + inData[desc.offset(i)] = static_cast(i); } else { ASSERT_TRUE(false); } diff --git a/inference-engine/tests/functional/plugin/shared/include/low_precision_transformations/convolution_backprop_data_transformation.hpp b/inference-engine/tests/functional/plugin/shared/include/low_precision_transformations/convolution_backprop_data_transformation.hpp new file mode 100644 index 00000000000000..39d5ea583916e5 --- /dev/null +++ b/inference-engine/tests/functional/plugin/shared/include/low_precision_transformations/convolution_backprop_data_transformation.hpp @@ -0,0 +1,65 @@ +// Copyright (C) 2021 Intel Corporation +// SPDX-License-Identifier: Apache-2.0 +// + +#pragma once + +#include +#include +#include + + +#include "shared_test_classes/base/low_precision_transformations/layer_transformation.hpp" +#include "lpt_ngraph_functions/common/fake_quantize_on_data.hpp" +#include "lpt_ngraph_functions/common/fake_quantize_on_weights.hpp" +#include "lpt_ngraph_functions/common/dequantization_operations.hpp" + +namespace LayerTestsDefinitions { + +class ConvolutionBackpropDataTransformationParam { +public: + ngraph::builder::subgraph::FakeQuantizeOnData fakeQuantizeOnData; + ngraph::builder::subgraph::FakeQuantizeOnWeights fakeQuantizeOnWeights; + ngraph::builder::subgraph::DequantizationOperations dequantizationOnWeights; + std::string layerName; + std::string expectedKernelType; + + ConvolutionBackpropDataTransformationParam() = default; + ConvolutionBackpropDataTransformationParam( + const ngraph::builder::subgraph::FakeQuantizeOnData& fakeQuantizeOnData, + const ngraph::builder::subgraph::FakeQuantizeOnWeights& fakeQuantizeOnWeights, + std::string layerName, + std::string expectedKernelType) : + fakeQuantizeOnData(fakeQuantizeOnData), fakeQuantizeOnWeights(fakeQuantizeOnWeights), + layerName(std::move(layerName)), expectedKernelType(std::move(expectedKernelType)) {} + ConvolutionBackpropDataTransformationParam( + const ngraph::builder::subgraph::FakeQuantizeOnData& fakeQuantizeOnData, + ngraph::builder::subgraph::DequantizationOperations dequantizationOnWeights, + std::string layerName, + std::string expectedKernelType) : + fakeQuantizeOnData(fakeQuantizeOnData), dequantizationOnWeights(std::move(dequantizationOnWeights)), + layerName(std::move(layerName)), expectedKernelType(std::move(expectedKernelType)) {} +}; + +typedef std::tuple< + ngraph::element::Type, // netPrecision + ngraph::Shape, // inputShape + ngraph::Shape, // outputShape + std::string, // targetDevice + ngraph::pass::low_precision::LayerTransformation::Params, + ConvolutionBackpropDataTransformationParam +> ConvolutionBackpropDataTransformationParams; + +class ConvolutionBackpropDataTransformation : + public testing::WithParamInterface, + public LayerTestsUtils::LayerTransformation { +public: + static std::string getTestCaseName(testing::TestParamInfo obj); + +protected: + void SetUp() override; + + void Run() override; +}; + +} // namespace LayerTestsDefinitions diff --git a/inference-engine/tests/functional/plugin/shared/src/behavior/stress_tests.cpp b/inference-engine/tests/functional/plugin/shared/src/behavior/stress_tests.cpp index 31e07b5c962edf..df964b76887eec 100644 --- a/inference-engine/tests/functional/plugin/shared/src/behavior/stress_tests.cpp +++ b/inference-engine/tests/functional/plugin/shared/src/behavior/stress_tests.cpp @@ -30,7 +30,7 @@ TEST_P(MultipleAllocations, InferWorksCorrectAfterAllocations) { auto ie = PluginCache::get().ie(); std::cout << "Load the network " << m_allocationsCount << " times..." << std::flush; - for (int i = 0; i < m_allocationsCount; ++i) { + for (unsigned int i = 0; i < m_allocationsCount; ++i) { ie->LoadNetwork(cnnNet, targetDevice, configuration); } diff --git a/inference-engine/tests/functional/plugin/shared/src/low_precision_transformations/convolution_backprop_data_transformation.cpp b/inference-engine/tests/functional/plugin/shared/src/low_precision_transformations/convolution_backprop_data_transformation.cpp new file mode 100644 index 00000000000000..951af4fdd4e0e0 --- /dev/null +++ b/inference-engine/tests/functional/plugin/shared/src/low_precision_transformations/convolution_backprop_data_transformation.cpp @@ -0,0 +1,77 @@ +// Copyright (C) 2021 Intel Corporation +// SPDX-License-Identifier: Apache-2.0 +// + +#include "low_precision_transformations/convolution_backprop_data_transformation.hpp" + +#include +#include +#include + +#include "lpt_ngraph_functions/convolution_backprop_data_function.hpp" + +namespace LayerTestsDefinitions { + +std::string ConvolutionBackpropDataTransformation::getTestCaseName(testing::TestParamInfo obj) { + ngraph::element::Type netPrecision; + ngraph::Shape inputShape; + ngraph::Shape outputShape; + std::string targetDevice; + ngraph::pass::low_precision::LayerTransformation::Params params; + ConvolutionBackpropDataTransformationParam param; + std::tie(netPrecision, inputShape, outputShape, targetDevice, params, param) = obj.param; + + std::ostringstream result; + result << getTestCaseNameByParams(netPrecision, inputShape, targetDevice, params) << "_" << + outputShape << "_" << + param.fakeQuantizeOnData << "_" << + param.fakeQuantizeOnWeights << "_" << + param.dequantizationOnWeights; + return result.str(); +} + +void ConvolutionBackpropDataTransformation::SetUp() { + threshold = 0.1f; + + ngraph::element::Type netPrecision; + ngraph::Shape inputShape; + ngraph::Shape outputShape; + ngraph::pass::low_precision::LayerTransformation::Params params; + ConvolutionBackpropDataTransformationParam param; + std::tie(netPrecision, inputShape, outputShape, targetDevice, params, param) = this->GetParam(); + + std::shared_ptr weights; + + if (!param.fakeQuantizeOnWeights.empty()) { + weights = ngraph::builder::subgraph::ConvolutionBackpropDataFunction::getWeights( + ngraph::Shape{inputShape[1], inputShape[1] / 2, 1, 1}, + netPrecision, + param.fakeQuantizeOnWeights); + } else { + weights = ngraph::builder::subgraph::ConvolutionBackpropDataFunction::getWeights( + ngraph::Shape{inputShape[1], inputShape[1] / 2, 1, 1}, + netPrecision, + param.dequantizationOnWeights); + } + + function = ngraph::builder::subgraph::ConvolutionBackpropDataFunction::get( + netPrecision, + inputShape, + outputShape, + param.fakeQuantizeOnData, + weights); +} + +void ConvolutionBackpropDataTransformation::Run() { + LayerTestsCommon::Run(); + + const auto params = std::get<5>(GetParam()); + const auto actualType = getRuntimePrecision(params.layerName); + EXPECT_EQ(actualType, params.expectedKernelType); +} + +TEST_P(ConvolutionBackpropDataTransformation, CompareWithRefImpl) { + Run(); +}; + +} // namespace LayerTestsDefinitions diff --git a/inference-engine/tests/ie_test_utils/common_test_utils/ngraph_test_utils.cpp b/inference-engine/tests/ie_test_utils/common_test_utils/ngraph_test_utils.cpp index fdfc95c0226e0c..0189de6a543fc2 100644 --- a/inference-engine/tests/ie_test_utils/common_test_utils/ngraph_test_utils.cpp +++ b/inference-engine/tests/ie_test_utils/common_test_utils/ngraph_test_utils.cpp @@ -22,9 +22,6 @@ #include #include #include - -#include "details/ie_exception.hpp" - namespace { inline namespace tools { bool isTypeRelaxed(const std::string &type) { diff --git a/inference-engine/tests/ie_test_utils/common_test_utils/ngraph_test_utils.hpp b/inference-engine/tests/ie_test_utils/common_test_utils/ngraph_test_utils.hpp index e58fa5edb9f455..929a0e5452fa70 100644 --- a/inference-engine/tests/ie_test_utils/common_test_utils/ngraph_test_utils.hpp +++ b/inference-engine/tests/ie_test_utils/common_test_utils/ngraph_test_utils.hpp @@ -4,6 +4,7 @@ #pragma once +#include #include #include @@ -13,6 +14,7 @@ #include #include +#include "ie_common.h" #include "test_common.hpp" #define DYN ngraph::Dimension::dynamic() @@ -569,7 +571,11 @@ struct Equal { if (lhs_bit_size != rhs_bit_size) return false; for (size_t bit_idx = 0; bit_idx < lhs_bit_size; bit_idx++) { - const uint8_t byte_idx = bit_idx / BITS_IN_BYTE_COUNT; + const auto byte_idx_result(bit_idx / BITS_IN_BYTE_COUNT); + if (byte_idx_result > std::numeric_limits::max()) + IE_THROW() << "(bit_idx / BITS_IN_BYTE_COUNT) bigger than uint8_t::max_value"; + + const uint8_t byte_idx(static_cast(byte_idx_result)); const uint8_t bit_in_byte_idx = 7 - (bit_idx % BITS_IN_BYTE_COUNT); if (extract_bit(lhs[byte_idx], bit_in_byte_idx) != diff --git a/inference-engine/tests/ie_test_utils/functional_test_utils/layer_tests_summary/merge_xmls.py b/inference-engine/tests/ie_test_utils/functional_test_utils/layer_tests_summary/merge_xmls.py index f874f01ebab658..d17c60b6b79a08 100644 --- a/inference-engine/tests/ie_test_utils/functional_test_utils/layer_tests_summary/merge_xmls.py +++ b/inference-engine/tests/ie_test_utils/functional_test_utils/layer_tests_summary/merge_xmls.py @@ -28,7 +28,11 @@ def aggregate_test_results(results: ET.SubElement, xml_reports: list): timestamp = None for xml in xml_reports: logger.info(f" Processing: {xml}") - xml_root = ET.parse(xml).getroot() + try: + xml_root = ET.parse(xml).getroot() + except ET.ParseError: + logger.error(f' {xml} is corrupted and skipped') + continue xml_timestamp = xml_root.get("timestamp") if (timestamp is None) or (xml_timestamp < timestamp): timestamp = xml_timestamp diff --git a/inference-engine/tests/ie_test_utils/functional_test_utils/layer_tests_summary/summarize.py b/inference-engine/tests/ie_test_utils/functional_test_utils/layer_tests_summary/summarize.py index 9ba9dea8a98373..519cef7d01ac04 100644 --- a/inference-engine/tests/ie_test_utils/functional_test_utils/layer_tests_summary/summarize.py +++ b/inference-engine/tests/ie_test_utils/functional_test_utils/layer_tests_summary/summarize.py @@ -22,9 +22,11 @@ def parse_arguments(): report is be kept. """ out_help = "Path where to save html report" + report_tag = "Report tag" parser.add_argument("--xml", help=xml_help, nargs="*", required=True) parser.add_argument("--out", help=out_help, default="") + parser.add_argument("--report_tag", help=report_tag, default="") return parser.parse_args() @@ -137,7 +139,7 @@ def collect_statistic(root: ET.Element): return devices, results, general_pass_rate, pass_rate_avg, general_test_count, trusted_ops -def create_summary(summary_root: ET.Element, output_folder: str): +def create_summary(summary_root: ET.Element, output_folder: str, report_tag: str): device_list, results, general_pass_rate, pass_rate_avg, general_test_count, trusted_ops = \ collect_statistic(summary_root) @@ -157,7 +159,7 @@ def create_summary(summary_root: ET.Element, output_folder: str): res_summary = template.render(ordered_ops=op_list, devices=device_list, results=results, timestamp=timestamp, general_pass_rate=general_pass_rate, pass_rate_avg=pass_rate_avg, verified_operations=verified_operations, trusted_ops=trusted_ops, - general_test_count=general_test_count) + general_test_count=general_test_count, report_tag=report_tag) report_path = os.path.join(output_folder, "report.html") with open(report_path, "w") as f: @@ -168,4 +170,4 @@ def create_summary(summary_root: ET.Element, output_folder: str): if __name__ == "__main__": args = parse_arguments() summary_root = merge_xmls(args.xml) - create_summary(summary_root, args.out) + create_summary(summary_root, args.out, args.report_tag) diff --git a/inference-engine/tests/ie_test_utils/functional_test_utils/layer_tests_summary/template/report_template.html b/inference-engine/tests/ie_test_utils/functional_test_utils/layer_tests_summary/template/report_template.html index 6eeeba16b1a787..26f0923e144fb2 100644 --- a/inference-engine/tests/ie_test_utils/functional_test_utils/layer_tests_summary/template/report_template.html +++ b/inference-engine/tests/ie_test_utils/functional_test_utils/layer_tests_summary/template/report_template.html @@ -26,8 +26,9 @@ integrity="sha384-JZR6Spejh4U02d8jOt6vLEHfe/JQGiRRSQQxSfFWpi1MquVdAyjUar5+76PVCmYl" crossorigin="anonymous"> --> +
-

Operations coverage summary {{ timestamp }}

+

Operations coverage summary: {{report_tag}} {{ timestamp }}


@@ -42,7 +43,7 @@

Operations coverage summary {{ timestamp }}

- + {% for d in devices -%} diff --git a/inference-engine/tests/ngraph_helpers/lpt_ngraph_functions/include/lpt_ngraph_functions/convolution_backprop_data_function.hpp b/inference-engine/tests/ngraph_helpers/lpt_ngraph_functions/include/lpt_ngraph_functions/convolution_backprop_data_function.hpp new file mode 100644 index 00000000000000..fa05d7b3cb18cd --- /dev/null +++ b/inference-engine/tests/ngraph_helpers/lpt_ngraph_functions/include/lpt_ngraph_functions/convolution_backprop_data_function.hpp @@ -0,0 +1,54 @@ +// Copyright (C) 2021 Intel Corporation +// SPDX-License-Identifier: Apache-2.0 +// + +#pragma once + +#include +#include + +#include "lpt_ngraph_functions/common/fake_quantize_on_weights.hpp" +#include "lpt_ngraph_functions/common/fake_quantize_on_data.hpp" +#include "lpt_ngraph_functions/common/dequantization_operations.hpp" + +namespace ngraph { +namespace builder { +namespace subgraph { + +class ConvolutionBackpropDataFunction { +public: + static std::shared_ptr getWeights( + const Shape& shape, + const element::Type& netPrecision, + const builder::subgraph::DequantizationOperations& dequantizationOnWeights, + const std::shared_ptr& value = nullptr); + static std::shared_ptr getWeights( + const Shape& shape, + const element::Type& netPrecision, + const builder::subgraph::FakeQuantizeOnWeights& fqOnWeights, + const std::shared_ptr& value = nullptr); + static std::shared_ptr get( + const element::Type netPrecision, + const Shape& inputShape, + const Shape& outputShape, + const builder::subgraph::FakeQuantizeOnData& fqOnData, + const std::shared_ptr& weights); + static std::shared_ptr getOriginal( + const element::Type precision, + const element::Type netPrecision, + const Shape& inputShape, + const Shape& outputShape, + const builder::subgraph::DequantizationOperations& dequantization, + const std::shared_ptr& weights); + static std::shared_ptr getReference( + const element::Type precision, + const element::Type netPrecision, + const Shape& inputShape, + const Shape& outputShape, + const builder::subgraph::DequantizationOperations& dequantization, + const std::shared_ptr& weights, + const builder::subgraph::DequantizationOperations& dequantizationAfter); +}; +} // namespace subgraph +} // namespace builder +} // namespace ngraph diff --git a/inference-engine/tests/ngraph_helpers/lpt_ngraph_functions/src/convolution_backprop_data_function.cpp b/inference-engine/tests/ngraph_helpers/lpt_ngraph_functions/src/convolution_backprop_data_function.cpp new file mode 100644 index 00000000000000..ae7d3847f69866 --- /dev/null +++ b/inference-engine/tests/ngraph_helpers/lpt_ngraph_functions/src/convolution_backprop_data_function.cpp @@ -0,0 +1,149 @@ +// Copyright (C) 2021 Intel Corporation +// SPDX-License-Identifier: Apache-2.0 +// + +#include "lpt_ngraph_functions/convolution_backprop_data_function.hpp" + +#include +#include +#include "ngraph_functions/subgraph_builders.hpp" +#include "low_precision/network_helper.hpp" + +#include "lpt_ngraph_functions/common/fake_quantize_on_weights.hpp" +#include "lpt_ngraph_functions/common/fake_quantize_on_data.hpp" +#include "lpt_ngraph_functions/common/dequantization_operations.hpp" +#include "lpt_ngraph_functions/common/builders.hpp" +#include "low_precision/common/dequantization_op.hpp" +#include "low_precision/network_helper.hpp" + +using namespace ngraph::pass::low_precision; + +namespace ngraph { +namespace builder { +namespace subgraph { + +std::shared_ptr ConvolutionBackpropDataFunction::get( + const element::Type netPrecision, + const Shape& inputShape, + const Shape& outputShape, + const builder::subgraph::FakeQuantizeOnData& fqOnData, + const std::shared_ptr& weights) { + const auto input = std::make_shared(netPrecision, inputShape); + const auto fq = makeFakeQuantize(input, netPrecision, fqOnData); + + auto convolutionBackpropData = std::make_shared( + fq, + weights, + Strides{ 1, 1 }, + CoordinateDiff{ 0, 0 }, + CoordinateDiff{ 0, 0 }, + Strides{ 1, 1 }); + + ngraph::ResultVector results{ std::make_shared(convolutionBackpropData) }; + return std::make_shared(results, ParameterVector{ input }, "ConvolutionBackpropDataTransformation"); +} + +std::shared_ptr ConvolutionBackpropDataFunction::getWeights( + const Shape& shape, + const element::Type& netPrecision, + const builder::subgraph::FakeQuantizeOnWeights& fqOnWeights, + const std::shared_ptr& value) { + const auto weights = value != nullptr ? + value : + std::make_shared( + element::i8, + shape, + std::vector(shape_size(shape), 1)); + const auto convert = std::make_shared(weights, netPrecision); + OutputVector convertedOutput(1); + convert->constant_fold(convertedOutput, convert->input_values()); + const auto convertedWeights = convertedOutput[0].get_node_shared_ptr(); + const auto fq = makeFakeQuantize(convertedWeights, netPrecision, fqOnWeights); + + return fq; +} + +std::shared_ptr ConvolutionBackpropDataFunction::getWeights( + const Shape& shape, + const element::Type& netPrecision, + const builder::subgraph::DequantizationOperations& dequantizationOnWeights, + const std::shared_ptr& value) { + auto weights = + value != nullptr ? + value : + std::make_shared( + element::i8, + shape, + std::vector(shape_size(shape), 1)); + auto dequantizationStructure = dequantizationOnWeights; + dequantizationStructure.setPrecision(netPrecision); + if (!dequantizationOnWeights.subtract.constantPrecision.is_real()) { + dequantizationStructure.subtract.constantPrecision = dequantizationOnWeights.subtract.constantPrecision; + } + if (weights->get_element_type().is_real()) { + weights = as_type_ptr(fold(weights, netPrecision)); + } + const auto dq = makeDequantization(weights, dequantizationStructure); + + return dq; +} + +std::shared_ptr ConvolutionBackpropDataFunction::getOriginal( + const element::Type precision, + const element::Type netPrecision, + const Shape& inputShape, + const Shape& outputShape, + const builder::subgraph::DequantizationOperations& dequantization, + const std::shared_ptr& weights) { + const auto input = std::make_shared(precision, inputShape); + auto dequantizationStructure = dequantization; + dequantizationStructure.multiply.outPrecision = netPrecision; + const auto activations = makeDequantization(input, dequantizationStructure); + + auto convolutionBackpropData = std::make_shared( + activations, + weights, + Strides{ 1, 1 }, + CoordinateDiff{ 0, 0 }, + CoordinateDiff{ 0, 0 }, + Strides{ 1, 1 }); + + convolutionBackpropData->set_friendly_name("output"); + ngraph::ResultVector results{ std::make_shared(convolutionBackpropData) }; + return std::make_shared(results, ParameterVector{ input }, "ConvolutionBackpropDataTransformation"); +} + +std::shared_ptr ConvolutionBackpropDataFunction::getReference( + const element::Type precision, + const element::Type netPrecision, + const Shape& inputShape, + const Shape& outputShape, + const builder::subgraph::DequantizationOperations& dequantization, + const std::shared_ptr& weights, + const builder::subgraph::DequantizationOperations& dequantizationAfter) { + const auto input = std::make_shared(precision, inputShape); + auto dequantizationStructure = dequantization; + dequantizationStructure.multiply.outPrecision = netPrecision; + const auto activations = makeDequantization(input, dequantizationStructure); + + auto convolutionBackpropData = std::make_shared>( + std::vector{ element::f32, element::f32 }, + std::vector{ dequantizationAfter.empty() ? netPrecision : element::f32 }, + ngraph::op::TemporaryReplaceOutputType(activations, element::f32).get(), + ngraph::op::TemporaryReplaceOutputType(weights, element::f32).get(), + Strides{ 1, 1 }, + CoordinateDiff{ 0, 0 }, + CoordinateDiff{ 0, 0 }, + Strides{ 1, 1 }); + + auto dequantizationStructureAfter = dequantizationAfter; + dequantizationStructureAfter.multiply.outPrecision = netPrecision; + const auto result = makeDequantization(convolutionBackpropData, dequantizationStructureAfter); + result->set_friendly_name("output"); + ngraph::ResultVector results{ std::make_shared(result) }; + return std::make_shared(results, ParameterVector{ input }, "ConvolutionBackpropDataTransformation"); +} + +} // namespace subgraph +} // namespace builder +} // namespace ngraph diff --git a/inference-engine/tests_deprecated/unit/CMakeLists.txt b/inference-engine/tests_deprecated/unit/CMakeLists.txt index 58b4c598fd83a4..18d7724add571a 100644 --- a/inference-engine/tests_deprecated/unit/CMakeLists.txt +++ b/inference-engine/tests_deprecated/unit/CMakeLists.txt @@ -114,7 +114,6 @@ target_link_libraries(${TARGET_NAME} PRIVATE # dynamic libraries inference_engine_transformations inference_engine_lp_transformations - inference_engine_snippets ) if(CMAKE_CXX_COMPILER_ID STREQUAL "GNU") diff --git a/model-optimizer/mo/back/offline_transformations.py b/model-optimizer/mo/back/offline_transformations.py index ee8905356acd1a..a363a1ca250756 100644 --- a/model-optimizer/mo/back/offline_transformations.py +++ b/model-optimizer/mo/back/offline_transformations.py @@ -35,6 +35,7 @@ def apply_offline_transformations(input_model: str, framework: str, transforms: available_transformations[name](net, **args) + ApplyMOCTransformations(net, False) net.serialize(input_model + ".xml", input_model + ".bin") path_to_mapping = input_model + ".mapping" GenerateMappingFile(net, path_to_mapping.encode('utf-8'), extract_names) diff --git a/ngraph/core/include/ngraph/op/constant.hpp b/ngraph/core/include/ngraph/op/constant.hpp index 194d9d205df4cc..650651a37a7059 100644 --- a/ngraph/core/include/ngraph/op/constant.hpp +++ b/ngraph/core/include/ngraph/op/constant.hpp @@ -646,24 +646,26 @@ namespace ngraph } template < ngraph::element::Type_t Type, + typename ValueT, typename std::enable_if::type = true> - static ngraph::fundamental_type_for - value_in_range(const ngraph::fundamental_type_for& value) + static ngraph::fundamental_type_for value_in_range(const ValueT& value) { - NGRAPH_CHECK(0 <= value && value <= 15, + const auto result = ngraph::fundamental_type_for(value); + NGRAPH_CHECK(0 <= result && result <= 15, "assigned value out of range u4 values"); - return value; + return result; } template < ngraph::element::Type_t Type, + typename ValueT, typename std::enable_if::type = true> - static ngraph::fundamental_type_for - value_in_range(const ngraph::fundamental_type_for& value) + static ngraph::fundamental_type_for value_in_range(const ValueT& value) { - NGRAPH_CHECK(-8 <= value && value <= 7, + const auto result = ngraph::fundamental_type_for(value); + NGRAPH_CHECK(-8 <= result && result <= 7, "assigned value out of range i4 values"); - return value; + return result; } bool are_all_data_elements_bitwise_identical() const; diff --git a/ngraph/core/include/ngraph/op/convolution.hpp b/ngraph/core/include/ngraph/op/convolution.hpp index c6516a5572a07f..72a365be533705 100644 --- a/ngraph/core/include/ngraph/op/convolution.hpp +++ b/ngraph/core/include/ngraph/op/convolution.hpp @@ -86,8 +86,8 @@ namespace ngraph class NGRAPH_API ConvolutionBackpropData : public Op { public: - static constexpr NodeTypeInfo type_info{"ConvolutionBackpropData", 1}; - const NodeTypeInfo& get_type_info() const override { return type_info; } + NGRAPH_RTTI_DECLARATION; + /// \brief Constructs a batched-convolution data batch-backprop operation. ConvolutionBackpropData() = default; // clang-format off diff --git a/ngraph/core/include/ngraph/op/einsum.hpp b/ngraph/core/include/ngraph/op/einsum.hpp index 08f066823e9bed..37d1bf482a9b5c 100644 --- a/ngraph/core/include/ngraph/op/einsum.hpp +++ b/ngraph/core/include/ngraph/op/einsum.hpp @@ -38,6 +38,12 @@ namespace ngraph std::shared_ptr clone_with_new_inputs(const OutputVector& new_args) const override; + /// \brief Get an equation of Einsum operation + /// + /// \return Einsum equation + /// + std::string get_equation() const { return m_equation; } + /// \brief Check correctness of equation format and extract input subscripts /// and output subscript /// diff --git a/ngraph/core/include/ngraph/op/group_conv.hpp b/ngraph/core/include/ngraph/op/group_conv.hpp index 3c175b512976ff..ebda0392d96571 100644 --- a/ngraph/core/include/ngraph/op/group_conv.hpp +++ b/ngraph/core/include/ngraph/op/group_conv.hpp @@ -85,8 +85,8 @@ namespace ngraph class NGRAPH_API GroupConvolutionBackpropData : public Op { public: - static constexpr NodeTypeInfo type_info{"GroupConvolutionBackpropData", 1}; - const NodeTypeInfo& get_type_info() const override { return type_info; } + NGRAPH_RTTI_DECLARATION; + /// \brief Constructs a batched-convolution data batch-backprop operation. GroupConvolutionBackpropData(); // clang-format off diff --git a/ngraph/core/src/op/batch_norm.cpp b/ngraph/core/src/op/batch_norm.cpp index 1f772cf67daa3a..57a4ce7f6f7568 100644 --- a/ngraph/core/src/op/batch_norm.cpp +++ b/ngraph/core/src/op/batch_norm.cpp @@ -42,8 +42,8 @@ void op::v0::BatchNormInference::validate_and_infer_types() NODE_VALIDATION_CHECK( this, - m_epsilon > 0, - "Attribute 'epsilon' must have non-zero positive floating-point value. Got: ", + m_epsilon >= 0, + "Attribute 'epsilon' must be a floating-point value greater than or equal to zero. Got: ", m_epsilon); set_output_size(1); @@ -102,8 +102,8 @@ void op::v5::BatchNormInference::validate_and_infer_types() NODE_VALIDATION_CHECK( this, - m_epsilon > 0, - "Attribute 'epsilon' must have non-zero positive floating-point value. Got: ", + m_epsilon >= 0, + "Attribute 'epsilon' must be a floating-point value greater than or equal to zero. Got: ", m_epsilon); set_output_size(1); diff --git a/ngraph/core/src/op/convolution.cpp b/ngraph/core/src/op/convolution.cpp index 667fa933046a1d..6be59d5132e7ff 100644 --- a/ngraph/core/src/op/convolution.cpp +++ b/ngraph/core/src/op/convolution.cpp @@ -102,12 +102,14 @@ shared_ptr op::v1::Convolution::clone_with_new_inputs(const OutputVector& m_auto_pad); } -constexpr NodeTypeInfo op::v1::ConvolutionBackpropData::type_info; shared_ptr op::v1::Convolution::get_default_value() const { return ngraph::make_constant_from_string("0", get_element_type(), get_shape()); } +// *** ConvolutionBackpropData OP SET 1 *** +NGRAPH_RTTI_DEFINITION(op::v1::ConvolutionBackpropData, "ConvolutionBackpropData", 1); + op::v1::ConvolutionBackpropData::ConvolutionBackpropData(const Output& data, const Output& filters, const Output& output_shape, diff --git a/ngraph/core/src/op/group_conv.cpp b/ngraph/core/src/op/group_conv.cpp index 4efbcae117e00a..b9d7cc4be10148 100644 --- a/ngraph/core/src/op/group_conv.cpp +++ b/ngraph/core/src/op/group_conv.cpp @@ -286,7 +286,7 @@ shared_ptr op::v1::GroupConvolution::clone_with_new_inputs(const OutputVec // v1::GroupConvolutionBackpropData //------------------------------------------------------------------------------ -constexpr NodeTypeInfo op::v1::GroupConvolutionBackpropData::type_info; +NGRAPH_RTTI_DEFINITION(op::v1::GroupConvolutionBackpropData, "GroupConvolutionBackpropData", 1); op::v1::GroupConvolutionBackpropData::GroupConvolutionBackpropData() : Op() diff --git a/ngraph/test/type_prop/batch_norm.cpp b/ngraph/test/type_prop/batch_norm.cpp index 13abbdf5a4f942..8a1fafd95b62e6 100644 --- a/ngraph/test/type_prop/batch_norm.cpp +++ b/ngraph/test/type_prop/batch_norm.cpp @@ -497,29 +497,20 @@ TYPED_TEST_P(BatchNormTest, batch_norm_inference_invalid_epsilon) {inputs_et, PartialShape{100}, "variance"} }; - double eps_zero = 0.0; double eps_neg = -1.0; - - const std::vector bn_tests{ - BatchNormInferParams{inputs_et, data_batch_shape, ch_inputs, eps_zero}, - BatchNormInferParams{inputs_et, data_batch_shape, ch_inputs, eps_neg} - }; - - for(const auto& params : bn_tests) + const BatchNormInferParams params{inputs_et, data_batch_shape, ch_inputs, eps_neg}; + try { - try - { - auto bn = makeBatchNormOp(params); - FAIL() << "Invalid 'epsilon' attribute value not detected"; - } - catch (const NodeValidationFailure& error) - { - EXPECT_HAS_SUBSTRING(error.what(), "Attribute 'epsilon' must have non-zero positive floating-point value."); - } - catch (...) - { - FAIL() << "Positive 'epsilon' attribute value check failed for unexpected reason"; - } + auto bn = makeBatchNormOp(params); + FAIL() << "Invalid 'epsilon' attribute value not detected"; + } + catch (const NodeValidationFailure& error) + { + EXPECT_HAS_SUBSTRING(error.what(), "Attribute 'epsilon' must be a floating-point value greater than or equal to zero."); + } + catch (...) + { + FAIL() << "Non-negative 'epsilon' attribute value check failed for unexpected reason"; } } @@ -542,4 +533,4 @@ REGISTER_TYPED_TEST_CASE_P( batch_norm_inference_invalid_epsilon); using Types = ::testing::Types; -INSTANTIATE_TYPED_TEST_CASE_P(type_prop, BatchNormTest, Types, ); +INSTANTIATE_TYPED_TEST_CASE_P(type_prop, BatchNormTest, Types);
Operation