From 02b31b2cc71509bb1fcc967b88dd5760740a2a27 Mon Sep 17 00:00:00 2001
From: Ilya Lavrenov <ilya.lavrenov@intel.com>
Date: Mon, 17 May 2021 21:02:56 +0300
Subject: [PATCH 01/16] Follow-up for PR 5638 (#5659)

---
 cmake/developer_package/download/dependency_solver.cmake    | 6 +++---
 inference-engine/cmake/ie_parallel.cmake                    | 6 +++---
 inference-engine/src/vpu/graph_transformer/CMakeLists.txt   | 6 +++---
 .../tests/functional/inference_engine/CMakeLists.txt        | 6 +++---
 4 files changed, 12 insertions(+), 12 deletions(-)

diff --git a/cmake/developer_package/download/dependency_solver.cmake b/cmake/developer_package/download/dependency_solver.cmake
index 9038f610035860..2f2ab192d3e82d 100644
--- a/cmake/developer_package/download/dependency_solver.cmake
+++ b/cmake/developer_package/download/dependency_solver.cmake
@@ -176,9 +176,9 @@ function(reset_deps_cache)
         foreach(var_name IN LISTS ARGN)
             unset(${var_name} CACHE)
         endforeach()
-        # foreach(var_name IN LISTS ARGN)
-        #     unset(ENV{${var_name}})
-        # endforeach()
+        foreach(var_name IN LISTS ARGN)
+            unset(ENV{${var_name}})
+        endforeach()
     endif()
 endfunction()
 
diff --git a/inference-engine/cmake/ie_parallel.cmake b/inference-engine/cmake/ie_parallel.cmake
index 958ea9b23a74ca..a4960ce3430d8d 100644
--- a/inference-engine/cmake/ie_parallel.cmake
+++ b/inference-engine/cmake/ie_parallel.cmake
@@ -25,9 +25,9 @@ function(set_ie_threading_interface_for TARGET_NAME)
         else()
             find_dependency(TBB COMPONENTS tbb tbbmalloc)
         endif()
-        set("TBB_FOUND" ${TBB_FOUND} PARENT_SCOPE)
-        set("TBB_IMPORTED_TARGETS" ${TBB_IMPORTED_TARGETS} PARENT_SCOPE)
-        set("TBB_VERSION" ${TBB_VERSION} PARENT_SCOPE)
+        set(TBB_FOUND ${TBB_FOUND} PARENT_SCOPE)
+        set(TBB_IMPORTED_TARGETS ${TBB_IMPORTED_TARGETS} PARENT_SCOPE)
+        set(TBB_VERSION ${TBB_VERSION} PARENT_SCOPE)
         if (NOT TBB_FOUND)
             ext_message(WARNING "TBB was not found by the configured TBB_DIR/TBBROOT path.\
                                 SEQ method will be used.")
diff --git a/inference-engine/src/vpu/graph_transformer/CMakeLists.txt b/inference-engine/src/vpu/graph_transformer/CMakeLists.txt
index 872f0bef2e3b19..bc73ab5b155696 100644
--- a/inference-engine/src/vpu/graph_transformer/CMakeLists.txt
+++ b/inference-engine/src/vpu/graph_transformer/CMakeLists.txt
@@ -12,9 +12,9 @@ function(add_graph_transformer_target TARGET_NAME STATIC_IE)
     # To avoid further TBB find_package action in next call of this function. Some version of TBB
     # has an issue with cmake config which lead to fail in case of multiple call of find_package
     # from one cmake script file.
-    set("TBB_FOUND" ${TBB_FOUND} PARENT_SCOPE)
-    set("TBB_IMPORTED_TARGETS" ${TBB_IMPORTED_TARGETS} PARENT_SCOPE)
-    set("TBB_VERSION" ${TBB_VERSION} PARENT_SCOPE)
+    set(TBB_FOUND ${TBB_FOUND} PARENT_SCOPE)
+    set(TBB_IMPORTED_TARGETS ${TBB_IMPORTED_TARGETS} PARENT_SCOPE)
+    set(TBB_VERSION ${TBB_VERSION} PARENT_SCOPE)
 
     if(CMAKE_CXX_COMPILER_ID STREQUAL "GNU")
         # TODO: enable some day and fix all warnings
diff --git a/inference-engine/tests/functional/inference_engine/CMakeLists.txt b/inference-engine/tests/functional/inference_engine/CMakeLists.txt
index e6eefc809afa89..2c4d97b87f212e 100644
--- a/inference-engine/tests/functional/inference_engine/CMakeLists.txt
+++ b/inference-engine/tests/functional/inference_engine/CMakeLists.txt
@@ -169,9 +169,9 @@ function(ie_headers_compilation_with_custom_flags)
     # To avoid further TBB find_package action in next call of this function. Some version of TBB
     # has an issue with cmake config which lead to fail in case of multiple call of find_package
     # from one cmake script file.
-    set("TBB_FOUND" ${TBB_FOUND} PARENT_SCOPE)
-    set("TBB_IMPORTED_TARGETS" ${TBB_IMPORTED_TARGETS} PARENT_SCOPE)
-    set("TBB_VERSION" ${TBB_VERSION} PARENT_SCOPE)
+    set(TBB_FOUND ${TBB_FOUND} PARENT_SCOPE)
+    set(TBB_IMPORTED_TARGETS ${TBB_IMPORTED_TARGETS} PARENT_SCOPE)
+    set(TBB_VERSION ${TBB_VERSION} PARENT_SCOPE)
 
     set_target_properties(${target_name} PROPERTIES
                           CXX_STANDARD ${IE_TEST_CXX_STANDARD}

From 71b87255abb2dda1582f2a959992d1b2388de7aa Mon Sep 17 00:00:00 2001
From: Mikhail Treskin <mikhail.treskin@intel.com>
Date: Mon, 17 May 2021 22:47:29 +0300
Subject: [PATCH 02/16] Skip Assign and ReadValue from extractor. (#5663)

---
 .../plugin/conformance/subgraphs_dumper/src/ops_cache.cpp  | 7 ++++++-
 1 file changed, 6 insertions(+), 1 deletion(-)

diff --git a/inference-engine/tests/functional/plugin/conformance/subgraphs_dumper/src/ops_cache.cpp b/inference-engine/tests/functional/plugin/conformance/subgraphs_dumper/src/ops_cache.cpp
index 2981709e5a2bbe..b0105debd1881d 100644
--- a/inference-engine/tests/functional/plugin/conformance/subgraphs_dumper/src/ops_cache.cpp
+++ b/inference-engine/tests/functional/plugin/conformance/subgraphs_dumper/src/ops_cache.cpp
@@ -42,7 +42,12 @@ void OPCache::update_ops_cache(const std::shared_ptr<ngraph::Function> &func, co
     for (const auto &op : func->get_ordered_ops()) {
         if (ngraph::is_type<ngraph::op::Parameter>(op) ||
             ngraph::is_type<ngraph::op::Constant>(op) ||
-            ngraph::is_type<ngraph::op::Result>(op)) {
+            ngraph::is_type<ngraph::op::Result>(op) ||
+            // ReadValue and Assign have to be handled in pair
+            // Will be handled as part of 48838
+            ngraph::is_type<ngraph::op::AssignBase>(op) ||
+            ngraph::is_type<ngraph::op::ReadValueBase>(op)
+                    ) {
             continue;
         }
         update_ops_cache(op, source_model);

From 4d6d088c02b7fd597dc7d7f7a705f1d0a340bb03 Mon Sep 17 00:00:00 2001
From: Irina Efode <irina.efode@intel.com>
Date: Mon, 17 May 2021 23:08:42 +0300
Subject: [PATCH 03/16] [IE TESTS] Add tag to the report (for identification
 scope) && add handling of exception in merge_xmls (#5660)

---
 .../layer_tests_summary/merge_xmls.py                     | 6 +++++-
 .../layer_tests_summary/summarize.py                      | 8 +++++---
 .../layer_tests_summary/template/report_template.html     | 5 +++--
 3 files changed, 13 insertions(+), 6 deletions(-)

diff --git a/inference-engine/tests/ie_test_utils/functional_test_utils/layer_tests_summary/merge_xmls.py b/inference-engine/tests/ie_test_utils/functional_test_utils/layer_tests_summary/merge_xmls.py
index f874f01ebab658..d17c60b6b79a08 100644
--- a/inference-engine/tests/ie_test_utils/functional_test_utils/layer_tests_summary/merge_xmls.py
+++ b/inference-engine/tests/ie_test_utils/functional_test_utils/layer_tests_summary/merge_xmls.py
@@ -28,7 +28,11 @@ def aggregate_test_results(results: ET.SubElement, xml_reports: list):
     timestamp = None
     for xml in xml_reports:
         logger.info(f" Processing: {xml}")
-        xml_root = ET.parse(xml).getroot()
+        try:
+            xml_root = ET.parse(xml).getroot()
+        except ET.ParseError:
+            logger.error(f' {xml} is corrupted and skipped')
+            continue
         xml_timestamp = xml_root.get("timestamp")
         if (timestamp is None) or (xml_timestamp < timestamp):
             timestamp = xml_timestamp
diff --git a/inference-engine/tests/ie_test_utils/functional_test_utils/layer_tests_summary/summarize.py b/inference-engine/tests/ie_test_utils/functional_test_utils/layer_tests_summary/summarize.py
index 9ba9dea8a98373..519cef7d01ac04 100644
--- a/inference-engine/tests/ie_test_utils/functional_test_utils/layer_tests_summary/summarize.py
+++ b/inference-engine/tests/ie_test_utils/functional_test_utils/layer_tests_summary/summarize.py
@@ -22,9 +22,11 @@ def parse_arguments():
         report is be kept.
     """
     out_help = "Path where to save html report"
+    report_tag = "Report tag"
 
     parser.add_argument("--xml", help=xml_help, nargs="*", required=True)
     parser.add_argument("--out", help=out_help, default="")
+    parser.add_argument("--report_tag", help=report_tag, default="")
 
     return parser.parse_args()
 
@@ -137,7 +139,7 @@ def collect_statistic(root: ET.Element):
     return devices, results, general_pass_rate, pass_rate_avg, general_test_count, trusted_ops
 
 
-def create_summary(summary_root: ET.Element, output_folder: str):
+def create_summary(summary_root: ET.Element, output_folder: str, report_tag: str):
     device_list, results, general_pass_rate, pass_rate_avg, general_test_count, trusted_ops = \
         collect_statistic(summary_root)
 
@@ -157,7 +159,7 @@ def create_summary(summary_root: ET.Element, output_folder: str):
     res_summary = template.render(ordered_ops=op_list, devices=device_list, results=results, timestamp=timestamp,
                                   general_pass_rate=general_pass_rate, pass_rate_avg=pass_rate_avg,
                                   verified_operations=verified_operations, trusted_ops=trusted_ops,
-                                  general_test_count=general_test_count)
+                                  general_test_count=general_test_count, report_tag=report_tag)
 
     report_path = os.path.join(output_folder, "report.html")
     with open(report_path, "w") as f:
@@ -168,4 +170,4 @@ def create_summary(summary_root: ET.Element, output_folder: str):
 if __name__ == "__main__":
     args = parse_arguments()
     summary_root = merge_xmls(args.xml)
-    create_summary(summary_root, args.out)
+    create_summary(summary_root, args.out, args.report_tag)
diff --git a/inference-engine/tests/ie_test_utils/functional_test_utils/layer_tests_summary/template/report_template.html b/inference-engine/tests/ie_test_utils/functional_test_utils/layer_tests_summary/template/report_template.html
index 6eeeba16b1a787..26f0923e144fb2 100644
--- a/inference-engine/tests/ie_test_utils/functional_test_utils/layer_tests_summary/template/report_template.html
+++ b/inference-engine/tests/ie_test_utils/functional_test_utils/layer_tests_summary/template/report_template.html
@@ -26,8 +26,9 @@
         integrity="sha384-JZR6Spejh4U02d8jOt6vLEHfe/JQGiRRSQQxSfFWpi1MquVdAyjUar5+76PVCmYl"
         crossorigin="anonymous"></script>
 -->
+
 <hr class="my-4">
-<h2>Operations coverage summary {{ timestamp }}</h2>
+<h2>Operations coverage summary: {{report_tag}} {{ timestamp }}</h2>
 <hr class="my-4">
 <table class="table table-hover" id="legend">
     <thead>
@@ -42,7 +43,7 @@ <h2>Operations coverage summary {{ timestamp }}</h2>
     </thead>
 </table>
 <table class="table table-hover" id="report">
-    <thead style="position: sticky;top: 0">
+    <thead style="position: sticky; top: 0">
     <tr>
         <th class="table-primary" scope="col">Operation</th>
         {% for d in devices -%}

From f84b25722ca68a5f4fdb3cc5b4ca3f7c1c95db05 Mon Sep 17 00:00:00 2001
From: Ilya Lavrenov <ilya.lavrenov@intel.com>
Date: Tue, 18 May 2021 00:24:37 +0300
Subject: [PATCH 04/16] Removed legacy dependency on snippets (#5656)

---
 inference-engine/src/inference_engine/CMakeLists.txt   |  1 -
 inference-engine/src/legacy_api/CMakeLists.txt         |  3 +--
 .../legacy_api/src/convert_function_to_cnn_network.cpp | 10 ----------
 inference-engine/src/snippets/CMakeLists.txt           |  9 +++++----
 inference-engine/tests_deprecated/unit/CMakeLists.txt  |  1 -
 5 files changed, 6 insertions(+), 18 deletions(-)

diff --git a/inference-engine/src/inference_engine/CMakeLists.txt b/inference-engine/src/inference_engine/CMakeLists.txt
index 99dfa1b64010f2..68e0f131721af4 100644
--- a/inference-engine/src/inference_engine/CMakeLists.txt
+++ b/inference-engine/src/inference_engine/CMakeLists.txt
@@ -201,7 +201,6 @@ if(WIN32)
 endif()
 
 target_link_libraries(${TARGET_NAME}_s PRIVATE openvino::itt ${CMAKE_DL_LIBS} ${NGRAPH_LIBRARIES}
-                                               inference_engine_snippets
                                                inference_engine_transformations pugixml)
 
 target_compile_definitions(${TARGET_NAME}_s PUBLIC USE_STATIC_IE)
diff --git a/inference-engine/src/legacy_api/CMakeLists.txt b/inference-engine/src/legacy_api/CMakeLists.txt
index b03c329ca44cf3..ca65d596e60868 100644
--- a/inference-engine/src/legacy_api/CMakeLists.txt
+++ b/inference-engine/src/legacy_api/CMakeLists.txt
@@ -40,7 +40,6 @@ target_include_directories(${TARGET_NAME}_obj PRIVATE
     ${PUBLIC_HEADERS_DIR}
     ${CMAKE_CURRENT_SOURCE_DIR}/src
     ${IE_MAIN_SOURCE_DIR}/src/inference_engine # For CNNNetworkNGraphImpl
-    $<TARGET_PROPERTY:inference_engine_snippets,INTERFACE_INCLUDE_DIRECTORIES>
     $<TARGET_PROPERTY:inference_engine_transformations,INTERFACE_INCLUDE_DIRECTORIES>
     $<TARGET_PROPERTY:inference_engine_plugin_api,INTERFACE_INCLUDE_DIRECTORIES>
     $<TARGET_PROPERTY:ngraph::ngraph,INTERFACE_INCLUDE_DIRECTORIES>
@@ -61,7 +60,7 @@ add_library(${TARGET_NAME} SHARED
 ie_add_vs_version_file(NAME ${TARGET_NAME}
                        FILEDESCRIPTION "Inference Engine Legacy library")
 
-target_link_libraries(${TARGET_NAME} PUBLIC inference_engine inference_engine_snippets
+target_link_libraries(${TARGET_NAME} PUBLIC inference_engine
                                      PRIVATE pugixml openvino::itt
                                              ${NGRAPH_LIBRARIES} inference_engine_transformations)
 
diff --git a/inference-engine/src/legacy_api/src/convert_function_to_cnn_network.cpp b/inference-engine/src/legacy_api/src/convert_function_to_cnn_network.cpp
index 0b66531044a62b..6c76ac47e0222a 100644
--- a/inference-engine/src/legacy_api/src/convert_function_to_cnn_network.cpp
+++ b/inference-engine/src/legacy_api/src/convert_function_to_cnn_network.cpp
@@ -39,7 +39,6 @@
 #include "legacy/ngraph_ops/rnn_sequence_ie.hpp"
 #include "legacy/ngraph_ops/lstm_sequence_ie.hpp"
 #include "legacy/ngraph_ops/gru_sequence_ie.hpp"
-#include "snippets/op/subgraph.hpp"
 #include "exec_graph_info.hpp"
 
 #include "caseless.hpp"
@@ -1979,15 +1978,6 @@ void convertFunctionToICNNNetwork(const std::shared_ptr<const ::ngraph::Function
             cnnLayer->params[ExecGraphInfoSerialization::ORIGINAL_NAMES] = originalNames;
         }
 
-        if (auto subgraph = ::ngraph::as_type_ptr<ngraph::snippets::op::Subgraph>(layer)) {
-            std::string names = "";
-            for (const auto& op : subgraph->get_body()->get_ordered_ops()) {
-                names += ", " + op->get_friendly_name();
-            }
-
-            cnnLayer->params["originalLayersNames"] += names;
-        }
-
         std::string primitivesPriority = ::ngraph::getPrimitivesPriority(layer);
         if (!primitivesPriority.empty()) {
             cnnLayer->params["PrimitivesPriority"] = primitivesPriority;
diff --git a/inference-engine/src/snippets/CMakeLists.txt b/inference-engine/src/snippets/CMakeLists.txt
index 482f7e52bec362..45e4b9bb352fa2 100644
--- a/inference-engine/src/snippets/CMakeLists.txt
+++ b/inference-engine/src/snippets/CMakeLists.txt
@@ -52,7 +52,8 @@ ie_developer_export_targets(${TARGET_NAME})
 
 # install
 
-install(TARGETS ${TARGET_NAME}
-        RUNTIME DESTINATION ${IE_CPACK_RUNTIME_PATH} COMPONENT core
-        ARCHIVE DESTINATION ${IE_CPACK_ARCHIVE_PATH} COMPONENT core
-        LIBRARY DESTINATION ${IE_CPACK_LIBRARY_PATH} COMPONENT core)
+# TODO: uncomment once snippets are integrated into CPU plugin
+# install(TARGETS ${TARGET_NAME}
+#         RUNTIME DESTINATION ${IE_CPACK_RUNTIME_PATH} COMPONENT core
+#         ARCHIVE DESTINATION ${IE_CPACK_ARCHIVE_PATH} COMPONENT core
+#         LIBRARY DESTINATION ${IE_CPACK_LIBRARY_PATH} COMPONENT core)
diff --git a/inference-engine/tests_deprecated/unit/CMakeLists.txt b/inference-engine/tests_deprecated/unit/CMakeLists.txt
index 58b4c598fd83a4..18d7724add571a 100644
--- a/inference-engine/tests_deprecated/unit/CMakeLists.txt
+++ b/inference-engine/tests_deprecated/unit/CMakeLists.txt
@@ -114,7 +114,6 @@ target_link_libraries(${TARGET_NAME} PRIVATE
     # dynamic libraries
     inference_engine_transformations
     inference_engine_lp_transformations
-    inference_engine_snippets
     )
 
 if(CMAKE_CXX_COMPILER_ID STREQUAL "GNU")

From e41e25533d99d8126fb34d2b87218c463f62db9d Mon Sep 17 00:00:00 2001
From: Vladimir Zinoviev <vladimir.zinoviev@intel.com>
Date: Tue, 18 May 2021 00:59:01 +0300
Subject: [PATCH 05/16] [LPT] ConvolutionBackpropData support (#5313)

* [LPT] ConvolutionBackpropData support

* minor fixes

* [Transformations] Legacy subtract precision keep

* [LPT] ConvolutionBackpropData tests improvements

* [LPT] ConvolutionBackpropData weights folding when can't be transformed

* [LPT] CanBeTransformed unification and convolution weights folding

* [LPT] GPU INT8 optimizations condition flag

* [LPT] Concat precision predict improvement

* [LPT] Turn off asymmetric quantization for Deconvolution on GPU

* [LPT] Improvements from review

* [LPT] Check if layer after concat isQuantized and require per-tensor quantize

* [LPT] Improvement for Deconv->FQ pattern

* [LPT] Commented failing tests
---
 .../src/cldnn_engine/cldnn_engine.cpp         |   4 +
 .../convolution_backprop_data.hpp             |  25 ++
 .../low_precision/layer_transformation.hpp    |  43 +--
 .../include/low_precision/network_helper.hpp  |  15 +-
 .../include/low_precision/transformer.hpp     |   4 -
 .../weightable_layer_transformation.hpp       |   2 +-
 .../low_precision_transformations/src/add.cpp |   1 +
 .../src/concat.cpp                            |  27 +-
 .../src/concat_multi_channels.cpp             |   4 +-
 .../src/convolution.cpp                       |  22 +-
 .../src/convolution_backprop_data.cpp         | 218 ++++++++++++
 .../src/fake_quantize.cpp                     |   6 +-
 .../src/fuse_multiply_to_fake_quantize.cpp    |  11 +-
 .../src/fuse_subtract_to_fake_quantize.cpp    |  14 +-
 .../src/layer_transformation.cpp              |   1 +
 .../src/network_helper.cpp                    | 102 ++++--
 .../src/transformer.cpp                       |  22 +-
 .../src/weightable_layer_transformation.cpp   |  30 +-
 .../src/mkldnn_plugin/mkldnn_plugin.cpp       |   4 +-
 .../include/ngraph_ops/deconvolution_ie.hpp   |   3 +
 .../src/ngraph_ops/deconvolution_ie.cpp       |  40 ++-
 .../op_conversions/convert_convolutions.cpp   |   2 +
 .../op_conversions/convert_subtract.cpp       |   7 +-
 ...nvolution_backprop_data_transformation.cpp | 334 ++++++++++++++++++
 .../convolution_qdq_transformation.cpp        |  30 +-
 .../convolution_transformation.cpp            |  22 +-
 .../group_convolution_transformation.cpp      |  12 +-
 .../convert_deconvolution_test.cpp            |   2 +-
 ...nvolution_backprop_data_transformation.cpp | 100 ++++++
 ...nvolution_backprop_data_transformation.cpp | 103 ++++++
 ...nvolution_backprop_data_transformation.hpp |  65 ++++
 ...nvolution_backprop_data_transformation.cpp |  77 ++++
 .../convolution_backprop_data_function.hpp    |  54 +++
 .../convolution_backprop_data_function.cpp    | 149 ++++++++
 ngraph/core/include/ngraph/op/convolution.hpp |   4 +-
 ngraph/core/include/ngraph/op/group_conv.hpp  |   4 +-
 ngraph/core/src/op/convolution.cpp            |   4 +-
 ngraph/core/src/op/group_conv.cpp             |   2 +-
 38 files changed, 1401 insertions(+), 168 deletions(-)
 create mode 100644 inference-engine/src/low_precision_transformations/include/low_precision/convolution_backprop_data.hpp
 create mode 100644 inference-engine/src/low_precision_transformations/src/convolution_backprop_data.cpp
 create mode 100644 inference-engine/tests/functional/inference_engine/lp_transformations/convolution_backprop_data_transformation.cpp
 create mode 100644 inference-engine/tests/functional/plugin/cpu/shared_tests_instances/low_precision_transformations/convolution_backprop_data_transformation.cpp
 create mode 100644 inference-engine/tests/functional/plugin/gpu/shared_tests_instances/low_precision_transformations/convolution_backprop_data_transformation.cpp
 create mode 100644 inference-engine/tests/functional/plugin/shared/include/low_precision_transformations/convolution_backprop_data_transformation.hpp
 create mode 100644 inference-engine/tests/functional/plugin/shared/src/low_precision_transformations/convolution_backprop_data_transformation.cpp
 create mode 100644 inference-engine/tests/ngraph_helpers/lpt_ngraph_functions/include/lpt_ngraph_functions/convolution_backprop_data_function.hpp
 create mode 100644 inference-engine/tests/ngraph_helpers/lpt_ngraph_functions/src/convolution_backprop_data_function.cpp

diff --git a/inference-engine/src/cldnn_engine/cldnn_engine.cpp b/inference-engine/src/cldnn_engine/cldnn_engine.cpp
index 0bea81efacea19..4aa53beb1e5a86 100644
--- a/inference-engine/src/cldnn_engine/cldnn_engine.cpp
+++ b/inference-engine/src/cldnn_engine/cldnn_engine.cpp
@@ -70,6 +70,7 @@
 #include <low_precision/pull_reshape_through_dequantization.hpp>
 #include <low_precision/pull_transpose_through_dequantization.hpp>
 #include <low_precision/transformer.hpp>
+#include <low_precision/convolution_backprop_data.hpp>
 #include <low_precision/mat_mul.hpp>
 #include <low_precision/strided_slice.hpp>
 #include <low_precision/network_helper.hpp>
@@ -381,6 +382,9 @@ InferenceEngine::CNNNetwork clDNNEngine::CloneAndTransformNetwork(const Inferenc
                 .add<MatMulTransformation, ngraph::opset1::MatMul>(LayerTransformation::Params(params)
                     .setSupportAsymmetricQuantization(false)
                     .setSupport3DTensorOnActivations(false))
+                .add<ConvolutionBackpropDataTransformation, ngraph::opset1::ConvolutionBackpropData>(LayerTransformation::Params(params)
+                    .setSupportAsymmetricQuantization(false)
+                    .setDeconvolutionSpecificChannelsRatio(true))
                 // INT8 StridedSlice not supported
                 .remove<StridedSliceTransformation, ngraph::opset1::StridedSlice>());
 
diff --git a/inference-engine/src/low_precision_transformations/include/low_precision/convolution_backprop_data.hpp b/inference-engine/src/low_precision_transformations/include/low_precision/convolution_backprop_data.hpp
new file mode 100644
index 00000000000000..d6bbe504dc6eea
--- /dev/null
+++ b/inference-engine/src/low_precision_transformations/include/low_precision/convolution_backprop_data.hpp
@@ -0,0 +1,25 @@
+// Copyright (C) 2021 Intel Corporation
+// SPDX-License-Identifier: Apache-2.0
+//
+
+#pragma once
+
+#include <ngraph/ngraph.hpp>
+#include "weightable_layer_transformation.hpp"
+
+namespace ngraph {
+namespace pass {
+namespace low_precision {
+
+class TRANSFORMATIONS_API ConvolutionBackpropDataTransformation : public WeightableLayerTransformation {
+public:
+    ConvolutionBackpropDataTransformation(const Params& params);
+    void registerMatcherIn(GraphRewrite& pass, TransformationContext& context) const override;
+    bool transform(TransformationContext& context, ngraph::pattern::Matcher &m) const override;
+    bool canBeTransformed(const TransformationContext& context, std::shared_ptr<Node> op) const override;
+    bool isQuantized(std::shared_ptr<Node> layer) const noexcept override;
+};
+
+} // namespace low_precision
+} // namespace pass
+} // namespace ngraph
diff --git a/inference-engine/src/low_precision_transformations/include/low_precision/layer_transformation.hpp b/inference-engine/src/low_precision_transformations/include/low_precision/layer_transformation.hpp
index 36b1293cd425b3..06a37ab8b22015 100644
--- a/inference-engine/src/low_precision_transformations/include/low_precision/layer_transformation.hpp
+++ b/inference-engine/src/low_precision_transformations/include/low_precision/layer_transformation.hpp
@@ -45,6 +45,13 @@ class TRANSFORMATIONS_API DataPrecision {
 public:
     DataPrecision() : precision(element::undefined), min(0.f), max(0.f), hasZeroPoint(false) {}
 
+    explicit DataPrecision(const element::Type& precision) {
+        this->precision = precision;
+        min = getMinValue(precision, 256);
+        max = getMaxValue(precision, 256);
+        hasZeroPoint = false;
+    }
+
     DataPrecision(const element::Type precision, const float min, const float max, const bool hasZeroPoint) :
             precision(precision),
             min(min),
@@ -122,29 +129,6 @@ class TRANSFORMATIONS_API DataPrecision {
     static element::Type getPrecision(const size_t /* quantizationLevels */, const bool signedInterval) {
         return signedInterval ? element::i8 : element::u8;
     }
-
-    static float getMin(const size_t quantizationLevels, const bool signedInterval) {
-        if (quantizationLevels == 255) {
-            return signedInterval  ? -127.0f : 0.0f;
-        } else if (quantizationLevels == 256) {
-            return signedInterval ? -128.0f : 0.0f;
-        } else {
-            // THROW_TRANSFORMATION_EXCEPTION << "quantization level " << quantizationLevels << " is not supported";
-            // FIXME: not completed
-            return signedInterval ? -128.0f : 0.0f;
-        }
-    }
-
-    static float getMax(const size_t quantizationLevels, const bool signedInterval) {
-        if ((quantizationLevels == 255) || (quantizationLevels == 256)) {
-            return signedInterval ? 127.0f : 255.0f;
-        } else {
-            // THROW_TRANSFORMATION_EXCEPTION << "quantization level " << quantizationLevels << " is not supported";
-            // FIXME: not completed
-            // return quantizationLevels - 1.0;
-            return signedInterval ? 127.0f : 255.0f;
-        }
-    }
 };
 
 inline bool operator==(const DataPrecision& value1, const DataPrecision& value2) {
@@ -181,7 +165,8 @@ class TRANSFORMATIONS_API LayerTransformation {
                 std::vector<element::Type> precisionsOnActivations = { element::u8, element::i8 },
                 std::vector<element::Type> precisionsOnWeights = { element::i8 },
                 element::Type deqPrecision = element::f32,
-                bool support3DTensorOnActivations = true) :
+                bool support3DTensorOnActivations = true,
+                bool deconvolutionSpecificChannelsRatio = false) :
                 updatePrecisions(updatePrecisions),
                 quantizedTensorAlignmentOnActivations(quantizedTensorAlignmentOnActivations),
                 quantizedTensorAlignmentOnWeights(quantizedTensorAlignmentOnWeights),
@@ -189,7 +174,8 @@ class TRANSFORMATIONS_API LayerTransformation {
                 precisionsOnActivations(precisionsOnActivations),
                 precisionsOnWeights(precisionsOnWeights),
                 deqPrecision(deqPrecision),
-                support3DTensorOnActivations(support3DTensorOnActivations) {
+                support3DTensorOnActivations(support3DTensorOnActivations),
+                deconvolutionSpecificChannelsRatio(deconvolutionSpecificChannelsRatio) {
             if (precisionsOnActivations.size() == 0ul) {
                 THROW_TRANSFORMATION_EXCEPTION << "precisions on activations are not specisifed";
             }
@@ -234,6 +220,11 @@ class TRANSFORMATIONS_API LayerTransformation {
             return *this;
         }
 
+        Params& setDeconvolutionSpecificChannelsRatio(const bool deconvolutionSpecificChannelsRatio) {
+            this->deconvolutionSpecificChannelsRatio = deconvolutionSpecificChannelsRatio;
+            return *this;
+        }
+
         bool updatePrecisions;
         QuantizedTensorAlignment quantizedTensorAlignmentOnActivations;
         QuantizedTensorAlignment quantizedTensorAlignmentOnWeights;
@@ -242,6 +233,7 @@ class TRANSFORMATIONS_API LayerTransformation {
         std::vector<element::Type> precisionsOnWeights;
         element::Type deqPrecision;
         bool support3DTensorOnActivations;
+        bool deconvolutionSpecificChannelsRatio;
     };
 
     class PrecisionDetails {
@@ -318,6 +310,7 @@ class TRANSFORMATIONS_API LayerTransformation {
     std::vector<element::Type> precisionsOnWeights;
     element::Type deqPrecision;
     bool support3DTensorOnActivations;
+    bool deconvolutionSpecificChannelsRatio;
 
     // absolute value, used to determine quantization interval asymmetry
     float quantizationIntervalAsymmetryThreshold;
diff --git a/inference-engine/src/low_precision_transformations/include/low_precision/network_helper.hpp b/inference-engine/src/low_precision_transformations/include/low_precision/network_helper.hpp
index 9846ef50d6aa2d..8cf52a13fe20ca 100644
--- a/inference-engine/src/low_precision_transformations/include/low_precision/network_helper.hpp
+++ b/inference-engine/src/low_precision_transformations/include/low_precision/network_helper.hpp
@@ -109,7 +109,8 @@ class TRANSFORMATIONS_API NetworkHelper {
         const float max,
         const bool hasZeroPoint,
         const bool updatePrecision,
-        const element::Type deqPrecision = element::f32);
+        const element::Type deqPrecision = element::f32,
+        const size_t outChannelsShapeIndex = 0);
 
     static std::shared_ptr<opset1::FakeQuantize> updateFakeQuantize(
         std::shared_ptr<opset1::FakeQuantize> fq,
@@ -183,7 +184,7 @@ class TRANSFORMATIONS_API NetworkHelper {
     static std::shared_ptr<Node> toScalarIfPossible(std::shared_ptr<Node> node);
 
     static std::shared_ptr<Node> fold_fake_quantize(const std::shared_ptr<opset1::FakeQuantize>& fq);
-    static std::shared_ptr<Node> fold_fake_quantize(const std::shared_ptr<opset1::FakeQuantize>& fq, const bool roundValues);
+    static std::shared_ptr<Node> fold_fake_quantize(const std::shared_ptr<opset1::FakeQuantize>& fq, const bool roundValues, int outChannelsShapeIndex = 0);
 
     static FakeQuantizeDequantization foldDequantization(const std::shared_ptr<Node>& node, const size_t branchIndex, const bool inPlace = false);
 
@@ -191,8 +192,16 @@ class TRANSFORMATIONS_API NetworkHelper {
 
     static std::shared_ptr<opset1::FakeQuantize> fuseConvert(const std::shared_ptr<opset1::FakeQuantize>& fakeQuantize);
 
+    static std::vector<element::Type> precisionIntersection(
+            const std::vector<element::Type>& v1,
+            const std::vector<element::Type>& v2) noexcept;
+
 private:
-    static std::shared_ptr<Node> foldFakeQuantize(const std::shared_ptr<opset1::FakeQuantize>& fq, const bool roundValues, const bool roundValuesWasSet);
+    static std::shared_ptr<Node> foldFakeQuantize(
+            const std::shared_ptr<opset1::FakeQuantize>& fq,
+            const bool roundValues,
+            const bool roundValuesWasSet,
+            int outChannelsShapeIndex = 0);
 
     // 1  - on weights
     // 0  - weightable layer was not found
diff --git a/inference-engine/src/low_precision_transformations/include/low_precision/transformer.hpp b/inference-engine/src/low_precision_transformations/include/low_precision/transformer.hpp
index 7a10d1daeb1b74..8de3fba36d5906 100644
--- a/inference-engine/src/low_precision_transformations/include/low_precision/transformer.hpp
+++ b/inference-engine/src/low_precision_transformations/include/low_precision/transformer.hpp
@@ -303,10 +303,6 @@ class TRANSFORMATIONS_API LowPrecisionTransformer : public IParamsManager, ILaye
         std::map<std::string, std::vector<std::pair<std::string, LayerTransformationPtr>>> transformations,
         GraphRewrite& pass,
         TransformationContext& context);
-
-    std::vector<element::Type> precisionIntersection(
-        const std::vector<element::Type>& v1,
-        const std::vector<element::Type>& v2) const noexcept;
 };
 
 class TRANSFORMATIONS_API TypeRelaxedReplacer : public GraphRewrite {
diff --git a/inference-engine/src/low_precision_transformations/include/low_precision/weightable_layer_transformation.hpp b/inference-engine/src/low_precision_transformations/include/low_precision/weightable_layer_transformation.hpp
index 94b81f2b2af785..aeb0a6d9abd576 100644
--- a/inference-engine/src/low_precision_transformations/include/low_precision/weightable_layer_transformation.hpp
+++ b/inference-engine/src/low_precision_transformations/include/low_precision/weightable_layer_transformation.hpp
@@ -22,7 +22,7 @@ class TRANSFORMATIONS_API WeightableLayerTransformation : public LayerTransforma
     bool isPrecisionPreserved(std::shared_ptr<Node> layer) const noexcept override;
 
 protected:
-    void decomposeFakeQuantizeForWeightsPath(std::shared_ptr<Node> weightableLayer) const;
+    void decomposeFakeQuantizeForWeightsPath(const std::shared_ptr<Node>& weightableLayer, size_t outChannelsShapeIndex = 0ul) const;
     static bool isGroup(const std::shared_ptr<Node>& node);
     static bool isDepthwise(const std::shared_ptr<Node>& node);
 
diff --git a/inference-engine/src/low_precision_transformations/src/add.cpp b/inference-engine/src/low_precision_transformations/src/add.cpp
index 85aef194893107..915e87d2f60803 100644
--- a/inference-engine/src/low_precision_transformations/src/add.cpp
+++ b/inference-engine/src/low_precision_transformations/src/add.cpp
@@ -42,6 +42,7 @@ std::shared_ptr<opset1::Subtract> replaceToSubtract(const std::shared_ptr<Node>&
     const auto parent = add->get_input_node_shared_ptr(dataBranchIndex);
     if (is_type<opset1::Convolution>(parent) ||
         is_type<opset1::GroupConvolution>(parent) ||
+        is_type<opset1::ConvolutionBackpropData>(parent) ||
         (is_type<opset1::MatMul>(parent) &&
         (is_type<opset1::Constant>(parent->get_input_node_ptr(0)) || is_type<opset1::Constant>(parent->get_input_node_ptr(1))))) {
         return nullptr;
diff --git a/inference-engine/src/low_precision_transformations/src/concat.cpp b/inference-engine/src/low_precision_transformations/src/concat.cpp
index 24cc5940c1bb1f..4988e29b1e289a 100644
--- a/inference-engine/src/low_precision_transformations/src/concat.cpp
+++ b/inference-engine/src/low_precision_transformations/src/concat.cpp
@@ -50,14 +50,14 @@ bool ConcatTransformation::transform(TransformationContext& context, ngraph::pat
         return false;
     }
 
-    DataPrecision dataPrecision = getDataPrecision(fq, QuantizationDetails::getDetails(fq), false);
-    if (dataPrecision.precision == ngraph::element::undefined) {
+    std::vector<element::Type> concatParentsChildrensPrecisions = precisionsOnActivations;
+    fillAvailablePrecisions(subgraph.quantizationLayers[0], concatParentsChildrensPrecisions);
+    if (concatParentsChildrensPrecisions.empty()) {
         return false;
     }
 
-    std::unordered_map<std::string, ngraph::pass::low_precision::FakeQuantizeDequantization> dequantizations;
     for (size_t i = 0; i < subgraph.quantizationLayers.size(); ++i) {
-        const std::shared_ptr<ngraph::opset1::FakeQuantize> fq = ngraph::as_type_ptr<ngraph::opset1::FakeQuantize>(subgraph.quantizationLayers[i]);
+        fq = ngraph::as_type_ptr<ngraph::opset1::FakeQuantize>(subgraph.quantizationLayers[i]);
         if (fq == nullptr) {
             return false;
         }
@@ -72,21 +72,20 @@ bool ConcatTransformation::transform(TransformationContext& context, ngraph::pat
         if (quantizationDetails.inputHighValues.size() != 1ul) {
             return false;
         }
+        std::vector<element::Type> fqChildrensPrecisions = precisionsOnActivations;
+        fillAvailablePrecisions(subgraph.quantizationLayers[i], fqChildrensPrecisions);
+        concatParentsChildrensPrecisions = NetworkHelper::precisionIntersection(concatParentsChildrensPrecisions, fqChildrensPrecisions);
 
-        const DataPrecision dataPrecision2 = getDataPrecision(subgraph.quantizationLayers[i]->shared_from_this(), quantizationDetails, false);
-        if (dataPrecision2.precision == ngraph::element::undefined) {
+        if (concatParentsChildrensPrecisions.empty()) {
             return false;
         }
-
-        if (dataPrecision.precision != dataPrecision2.precision) {
-            // quantization levels are the same, difference can be in sign
-            // wider interval (precision) is preferable: use signed if least one interval is signed
-            dataPrecision = dataPrecision.precision.is_signed() ? dataPrecision : dataPrecision2;
-        }
     }
 
-    if (dataPrecision.precision == ngraph::element::undefined) {
-        return false;
+    DataPrecision dataPrecision;
+    if (std::find(concatParentsChildrensPrecisions.begin(), concatParentsChildrensPrecisions.end(), element::i8) != concatParentsChildrensPrecisions.end()) {
+        dataPrecision = DataPrecision(element::i8);
+    } else {
+        dataPrecision = DataPrecision(concatParentsChildrensPrecisions[0]);
     }
 
     std::vector<QuantizationDetails> quantizationLayersDetails;
diff --git a/inference-engine/src/low_precision_transformations/src/concat_multi_channels.cpp b/inference-engine/src/low_precision_transformations/src/concat_multi_channels.cpp
index 62d958d22b4037..dc81d51cd717de 100644
--- a/inference-engine/src/low_precision_transformations/src/concat_multi_channels.cpp
+++ b/inference-engine/src/low_precision_transformations/src/concat_multi_channels.cpp
@@ -27,7 +27,9 @@ bool ConcatMultiChannelsTransformation::isMultiChannel(const std::vector<std::sh
     for (const std::shared_ptr<ngraph::opset1::Concat>& concat : concatLayers) {
         const std::vector<std::shared_ptr<ngraph::Node>> children = getChildrenRecursivelyExceptPrecisionPreserved(concat);
         for (const std::shared_ptr<ngraph::Node>& child : children) {
-            if (is_type<ngraph::opset1::Convolution>(child.get())) {
+            if ((is_type<ngraph::opset1::Convolution>(child.get()) ||
+                is_type<ngraph::opset1::ConvolutionBackpropData>(child.get())) &&
+                this->layerTransformationsManager->isQuantized(child)) {
                 return false;
             }
         }
diff --git a/inference-engine/src/low_precision_transformations/src/convolution.cpp b/inference-engine/src/low_precision_transformations/src/convolution.cpp
index ff5ca944df5796..6496ee4ee54eab 100644
--- a/inference-engine/src/low_precision_transformations/src/convolution.cpp
+++ b/inference-engine/src/low_precision_transformations/src/convolution.cpp
@@ -42,7 +42,27 @@ bool ConvolutionTransformation::transform(TransformationContext &context, ngraph
     auto convolution = m.get_match_root();
 
     if (!canConvolutionBeTransformed(context, convolution)) {
-        return false;
+        auto weightInput = convolution->get_input_node_shared_ptr(1);
+        std::shared_ptr<opset1::Reshape> reshapeFromWeights = as_type_ptr<opset1::Reshape>(weightInput);
+        FakeQuantizeDequantization dequantization = reshapeFromWeights == nullptr ?
+                                                    NetworkHelper::getDequantization(convolution, 1ul) :
+                                                    NetworkHelper::getDequantization(reshapeFromWeights);
+        if (dequantization.empty()) {
+            const auto fqOnWeights = getFakeQuantizeOnWeights(convolution);
+            std::shared_ptr<ngraph::Node> resultConstant = NetworkHelper::fold_fake_quantize(fqOnWeights);
+            if (reshapeFromWeights != nullptr) {
+                resultConstant = fold_reshape<opset1::Reshape>(
+                        resultConstant,
+                        reshapeFromWeights->input_value(1),
+                        false);
+            }
+            if (as_type_ptr<opset1::Constant>(resultConstant)) {
+                replace_node(weightInput, resultConstant);
+            }
+        } else {
+            NetworkHelper::foldDequantization(dequantization.multiply, 0, true);
+        }
+        return true;
     }
 
     convolution = NetworkHelper::separateInStandaloneBranch(convolution);
diff --git a/inference-engine/src/low_precision_transformations/src/convolution_backprop_data.cpp b/inference-engine/src/low_precision_transformations/src/convolution_backprop_data.cpp
new file mode 100644
index 00000000000000..a73ee1de155781
--- /dev/null
+++ b/inference-engine/src/low_precision_transformations/src/convolution_backprop_data.cpp
@@ -0,0 +1,218 @@
+// Copyright (C) 2021 Intel Corporation
+// SPDX-License-Identifier: Apache-2.0
+//
+
+#include "low_precision/convolution_backprop_data.hpp"
+
+#include <algorithm>
+#include <memory>
+#include <string>
+#include <vector>
+#include <cassert>
+
+#include "low_precision/network_helper.hpp"
+#include "low_precision/common/dequantization_op.hpp"
+
+namespace ngraph {
+namespace pass {
+namespace low_precision {
+
+ConvolutionBackpropDataTransformation::ConvolutionBackpropDataTransformation(const Params& params) : WeightableLayerTransformation(params) {
+}
+
+void ConvolutionBackpropDataTransformation::registerMatcherIn(GraphRewrite &pass, TransformationContext &context) const {
+    addPattern(
+            pass,
+            context,
+            make_op_pattern<opset1::ConvolutionBackpropData>({ make_op_label<opset1::Multiply>(), make_op_label<opset1::Multiply>() }));
+    addPattern(
+            pass,
+            context,
+            make_op_pattern<opset1::ConvolutionBackpropData>({ make_op_label<opset1::Multiply>(), make_op_label<opset1::FakeQuantize>() }));
+    addPattern(
+            pass,
+            context,
+            make_op_pattern<opset1::ConvolutionBackpropData>(
+                    { make_op_label<opset1::Multiply>(), make_op_label<opset1::Multiply>(), make_op_label<opset1::Constant>() }));
+    addPattern(
+            pass,
+            context,
+            make_op_pattern<opset1::ConvolutionBackpropData>(
+                    { make_op_label<opset1::Multiply>(), make_op_label<opset1::FakeQuantize>(), make_op_label<opset1::Constant>() }));
+}
+
+bool ConvolutionBackpropDataTransformation::isQuantized(std::shared_ptr<Node> layer) const noexcept {
+    if (deconvolutionSpecificChannelsRatio) {
+        size_t inputChannels = layer->get_input_shape(0)[1];
+        size_t outputChannels = layer->get_output_shape(0)[1];
+        if (inputChannels % 4 != 0 || outputChannels % 16 != 0) {
+            return false;
+        }
+    }
+    return WeightableLayerTransformation::isQuantized(layer, false);
+}
+
+bool ConvolutionBackpropDataTransformation::transform(TransformationContext &context, ngraph::pattern::Matcher &m) const {
+    auto convolutionBackpropData = m.get_match_root();
+
+    if (!canBeTransformed(context, convolutionBackpropData)) {
+        auto weightsInput = convolutionBackpropData->get_input_node_shared_ptr(1);
+        std::shared_ptr<opset1::Reshape> reshapeFromWeights = as_type_ptr<opset1::Reshape>(weightsInput);
+        FakeQuantizeDequantization dequantization = reshapeFromWeights == nullptr ?
+                         NetworkHelper::getDequantization(convolutionBackpropData, 1ul) :
+                         NetworkHelper::getDequantization(reshapeFromWeights);
+        if (dequantization.empty()) {
+            const auto fqOnWeights = getFakeQuantizeOnWeights(convolutionBackpropData);
+            std::shared_ptr<ngraph::Node> resultConstant = NetworkHelper::fold_fake_quantize(fqOnWeights);
+            if (reshapeFromWeights != nullptr) {
+                resultConstant = fold_reshape<opset1::Reshape>(
+                        resultConstant,
+                        reshapeFromWeights->input_value(1),
+                        false);
+            }
+            if (as_type_ptr<opset1::Constant>(resultConstant)) {
+                replace_node(weightsInput, resultConstant);
+            }
+        } else {
+            NetworkHelper::foldDequantization(dequantization.multiply, 0, true);
+        }
+        return true;
+    }
+
+    convolutionBackpropData = NetworkHelper::separateInStandaloneBranch(convolutionBackpropData);
+    FakeQuantizeDequantization dequantization = NetworkHelper::getDequantization(convolutionBackpropData);
+    {
+        if (dequantization.subtract != nullptr) {
+            std::shared_ptr<ngraph::Node> layer = dequantization.subtract;
+            ngraph::pass::low_precision::NetworkHelper::cleanRunTimeInfo(layer);
+
+            NetworkHelper::optimizeSubtract(dequantization.subtract);
+        }
+        std::shared_ptr<opset1::Constant> reducedConstant = as_type_ptr<opset1::Constant>(dequantization.multiplyConstant);
+        std::shared_ptr<Node> newMultiplyAfterConst = std::make_shared<opset1::Constant>(
+                reducedConstant->get_output_element_type(0),
+                Shape{ 1 },
+                reducedConstant->cast_vector<float>()[0]);
+        auto inputs = convolutionBackpropData->input_values();
+        inputs[0] = dequantization.multiply->input_value(0);
+        const auto copyNode = convolutionBackpropData->copy_with_new_inputs(inputs);
+
+        const auto relaxedConvolutionBackpropData = std::make_shared<op::TypeRelaxed<opset1::ConvolutionBackpropData>>(
+            *as_type_ptr<opset1::ConvolutionBackpropData>(copyNode),
+            std::vector<element::Type>{deqPrecision, deqPrecision},
+            std::vector<element::Type>{deqPrecision});
+
+        const auto newMultiplyAfter = std::make_shared<op::TypeRelaxed<DequantizationMultiply>>(
+            std::vector<element::Type>{ deqPrecision, deqPrecision },
+            std::vector<element::Type>{ dequantization.multiply->get_output_element_type(0) },
+            ngraph::op::TemporaryReplaceOutputType(relaxedConvolutionBackpropData, deqPrecision).get(),
+            ngraph::op::TemporaryReplaceOutputType(newMultiplyAfterConst, deqPrecision).get());
+
+        replace_node(convolutionBackpropData, newMultiplyAfter);
+        convolutionBackpropData = newMultiplyAfter->input_value(0).get_node_shared_ptr();
+        inputs[0] = convolutionBackpropData->get_input_node_ptr(0)->input_value(0);
+        if (is_type<opset1::Convert>(convolutionBackpropData->get_input_node_ptr(0))) {
+            auto newConvolution = convolutionBackpropData->copy_with_new_inputs(inputs);
+            replace_node(convolutionBackpropData, newConvolution);
+            convolutionBackpropData = newConvolution;
+        }
+    }
+
+    {
+        decomposeFakeQuantizeForWeightsPath(convolutionBackpropData, 1ul);
+
+        dequantization = NetworkHelper::getDequantization(convolutionBackpropData, 1ul);
+
+        if (is_type<opset1::FakeQuantize>(dequantization.data.get_node())) {
+            const std::shared_ptr<opset1::FakeQuantize> fq = as_type_ptr<opset1::FakeQuantize>(dequantization.data.get_node_shared_ptr());
+            std::shared_ptr<ngraph::Node> newFQ = NetworkHelper::fold_fake_quantize(fq, true);
+            NetworkHelper::copyInfo(fq, newFQ);
+            replace_node(fq, newFQ);
+        }
+
+        std::shared_ptr<opset1::Multiply> multiplyFromWeights = as_type_ptr<opset1::Multiply>(
+                convolutionBackpropData->input_value(1).get_node_shared_ptr());
+        std::shared_ptr<opset1::Subtract> subtractFromWeights = as_type_ptr<opset1::Subtract>(multiplyFromWeights->get_input_node_shared_ptr(0));
+
+        {
+            Shape newScaleShape = multiplyFromWeights->get_input_shape(1);
+            auto inputs = convolutionBackpropData->input_values();
+            inputs[1] = multiplyFromWeights->input_value(0);
+            auto newMultiplyAfter = std::make_shared<DequantizationMultiply>(
+                convolutionBackpropData->copy_with_new_inputs(inputs),
+                foldConvert(
+                    fold_reshape<opset1::Reshape>(
+                        multiplyFromWeights->input_value(1),
+                        std::make_shared<opset1::Constant>(element::u64, Shape{ newScaleShape.size() }, newScaleShape),
+                        false),
+                    convolutionBackpropData->get_output_element_type(0)));
+            replace_node(convolutionBackpropData, newMultiplyAfter);
+            convolutionBackpropData = newMultiplyAfter->input_value(0).get_node_shared_ptr();
+        }
+
+        if (subtractFromWeights != nullptr) {
+            // optimize zero point on weights
+            auto optimizedSubtract = NetworkHelper::optimizeSubtract(subtractFromWeights);
+            if (optimizedSubtract == nullptr) {
+                subtractFromWeights = nullptr;
+            } else {
+                subtractFromWeights = as_type_ptr<opset1::Subtract>(optimizedSubtract);
+
+                const Shape weightsShape = subtractFromWeights->input(0).get_shape();
+                Shape zeroPointShape(weightsShape.size(), 1ul);
+                zeroPointShape[1] = weightsShape[1];
+
+                auto zeroPointConstant = fold<opset1::Broadcast>(
+                        subtractFromWeights->get_input_node_shared_ptr(1),
+                        std::make_shared<opset1::Constant>(element::i32, Shape{zeroPointShape.size()}, zeroPointShape));
+                replace_node(subtractFromWeights->get_input_node_shared_ptr(1), zeroPointConstant);
+            }
+        }
+
+        std::shared_ptr<opset1::Convert> convertFromWeights =
+                as_type_ptr<opset1::Convert>(
+                    subtractFromWeights == nullptr ?
+                        multiplyFromWeights->get_input_node_shared_ptr(0) :
+                        subtractFromWeights->get_input_node_shared_ptr(0));
+        if (convertFromWeights != nullptr) {
+            auto inputs = convolutionBackpropData->input_values();
+            inputs[1] = convolutionBackpropData->get_input_node_ptr(1)->input_value(0);
+            // remove Convert on weights
+            auto newConvolution = convolutionBackpropData->clone_with_new_inputs(inputs);
+            replace_node(convolutionBackpropData, newConvolution);
+            convolutionBackpropData = newConvolution;
+        }
+    }
+    std::shared_ptr<ngraph::opset1::Multiply> finalDequantization = NetworkHelper::optimizeMultipliesAfter(
+            convolutionBackpropData->output(0).get_target_inputs().begin()->get_node()->shared_from_this());
+    ngraph::copy_runtime_info({ convolutionBackpropData, finalDequantization }, finalDequantization);
+    updateOutput(context, finalDequantization, convolutionBackpropData);
+
+    auto onWeights = convolutionBackpropData->get_input_node_shared_ptr(1);
+    if (is_type<opset1::Reshape>(onWeights)) {
+        onWeights = onWeights->get_input_node_shared_ptr(0);
+    }
+
+    if (is_type<opset1::Subtract>(onWeights)) {
+        auto& rt = onWeights->get_rt_info();
+        rt["DISABLED_CONSTANT_FOLDING"] = std::make_shared<ngraph::VariantWrapper<std::string>>("");
+    }
+
+    return true;
+}
+
+bool ConvolutionBackpropDataTransformation::canBeTransformed(const TransformationContext& context, std::shared_ptr<Node> op) const {
+    if (deconvolutionSpecificChannelsRatio) {
+        size_t inputChannels = op->get_input_shape(0)[1];
+        size_t outputChannels = op->get_output_shape(0)[1];
+        if (inputChannels % 4 != 0 || outputChannels % 16 != 0) {
+            return false;
+        }
+    }
+
+    return canConvolutionBeTransformed(context, op);
+}
+
+} // namespace low_precision
+} // namespace pass
+} // namespace ngraph
diff --git a/inference-engine/src/low_precision_transformations/src/fake_quantize.cpp b/inference-engine/src/low_precision_transformations/src/fake_quantize.cpp
index 41b9851d5e3ff9..53fe2702984909 100644
--- a/inference-engine/src/low_precision_transformations/src/fake_quantize.cpp
+++ b/inference-engine/src/low_precision_transformations/src/fake_quantize.cpp
@@ -20,7 +20,7 @@ void FakeQuantizeTransformation::registerMatcherIn(GraphRewrite& pass, Transform
 
 bool FakeQuantizeTransformation::transform(TransformationContext& context, ngraph::pattern::Matcher &m) const {
     std::shared_ptr<opset1::FakeQuantize> layer = std::dynamic_pointer_cast<opset1::FakeQuantize>(m.get_match_root());
-    if (!NetworkHelper::isQuantizeSupported(layer)) {
+    if (!QuantizationDetails::outputLayoutIsSupported(layer)) {
         return false;
     }
 
@@ -149,7 +149,9 @@ std::shared_ptr<opset1::FakeQuantize> FakeQuantizeTransformation::fuseElementwis
         inputHighConst_f32 = fq::updateShape(fold<opset1::Add>(inputHighConst_f32, value), fakeQuantize->get_output_shape(0));
     } else if (is_type<opset1::Add>(eltwise) && checkElementwise(eltwise)) {
         if (is_type<opset1::Convolution>(fq::getData(eltwise)) ||
-            is_type<opset1::GroupConvolution>(fq::getData(eltwise))) {
+            is_type<opset1::GroupConvolution>(fq::getData(eltwise)) ||
+            is_type<opset1::ConvolutionBackpropData>(fq::getData(eltwise)) ||
+            is_type<opset1::GroupConvolutionBackpropData>(fq::getData(eltwise))) {
             return nullptr;
         }
 
diff --git a/inference-engine/src/low_precision_transformations/src/fuse_multiply_to_fake_quantize.cpp b/inference-engine/src/low_precision_transformations/src/fuse_multiply_to_fake_quantize.cpp
index c1b7f4e907b6a8..734d9abec435ec 100644
--- a/inference-engine/src/low_precision_transformations/src/fuse_multiply_to_fake_quantize.cpp
+++ b/inference-engine/src/low_precision_transformations/src/fuse_multiply_to_fake_quantize.cpp
@@ -45,11 +45,18 @@ bool FuseMultiplyToFakeQuantizeTransformation::transform(TransformationContext&
     const auto fakeQuantizeParent = fakeQuantize->get_input_node_shared_ptr(0);
     const size_t parentIndex = NetworkHelper::getParentOutputIndex(fakeQuantizeParent, fakeQuantize);
 
+    const auto inputLow = foldConvert(fakeQuantize->input_value(1), deqPrecision);
+    const auto inputHigh = foldConvert(fakeQuantize->input_value(2), deqPrecision);
+    NetworkHelper::copyInfo(fakeQuantize->get_input_node_shared_ptr(1), inputLow);
+    NetworkHelper::copyInfo(fakeQuantize->get_input_node_shared_ptr(2), inputHigh);
+    NetworkHelper::copyInfo(fakeQuantize->get_input_node_shared_ptr(3), outputLowConst_f32);
+    NetworkHelper::copyInfo(fakeQuantize->get_input_node_shared_ptr(4), outputHighConst_f32);
+
     auto newFakeQuantize = std::make_shared<op::TypeRelaxed<opset1::FakeQuantize>>(
         opset1::FakeQuantize(
             fakeQuantizeParent->output(parentIndex),
-            foldConvert(fakeQuantize->input_value(1), deqPrecision),
-            foldConvert(fakeQuantize->input_value(2), deqPrecision),
+            inputLow,
+            inputHigh,
             outputLowConst_f32,
             outputHighConst_f32,
             fakeQuantize->get_levels()),
diff --git a/inference-engine/src/low_precision_transformations/src/fuse_subtract_to_fake_quantize.cpp b/inference-engine/src/low_precision_transformations/src/fuse_subtract_to_fake_quantize.cpp
index 2e3f2e23d3f428..8d8d9968802e44 100644
--- a/inference-engine/src/low_precision_transformations/src/fuse_subtract_to_fake_quantize.cpp
+++ b/inference-engine/src/low_precision_transformations/src/fuse_subtract_to_fake_quantize.cpp
@@ -45,11 +45,18 @@ bool FuseSubtractToFakeQuantizeTransformation::transform(TransformationContext&
     const auto fakeQuantizeParent = fakeQuantize->get_input_node_shared_ptr(0);
     const size_t parentIndex = NetworkHelper::getParentOutputIndex(fakeQuantizeParent, fakeQuantize);
 
+    const auto inputLow = foldConvert(fakeQuantize->input_value(1), deqPrecision);
+    const auto inputHigh = foldConvert(fakeQuantize->input_value(2), deqPrecision);
+    NetworkHelper::copyInfo(fakeQuantize->get_input_node_shared_ptr(1), inputLow);
+    NetworkHelper::copyInfo(fakeQuantize->get_input_node_shared_ptr(2), inputHigh);
+    NetworkHelper::copyInfo(fakeQuantize->get_input_node_shared_ptr(3), outputLowConst_f32);
+    NetworkHelper::copyInfo(fakeQuantize->get_input_node_shared_ptr(4), outputHighConst_f32);
+
     auto newFakeQuantize = std::make_shared<op::TypeRelaxed<opset1::FakeQuantize>>(
         opset1::FakeQuantize(
             fakeQuantizeParent->output(parentIndex),
-            foldConvert(fakeQuantize->input_value(1), deqPrecision),
-            foldConvert(fakeQuantize->input_value(2), deqPrecision),
+            inputLow,
+            inputHigh,
             outputLowConst_f32,
             outputHighConst_f32,
             fakeQuantize->get_levels()),
@@ -76,7 +83,8 @@ bool FuseSubtractToFakeQuantizeTransformation::canBeTransformed(const Transforma
     for (const auto& target : children) {
         const auto convolution = is_type<opset1::Convolution>(target.get_node());
         const auto groupConvolution = is_type<opset1::GroupConvolution>(target.get_node());
-        if (convolution || groupConvolution) {
+        const auto convolutionBackpropData = is_type<opset1::ConvolutionBackpropData>(target.get_node());
+        if (convolution || groupConvolution || convolutionBackpropData) {
             return false;
         }
     }
diff --git a/inference-engine/src/low_precision_transformations/src/layer_transformation.cpp b/inference-engine/src/low_precision_transformations/src/layer_transformation.cpp
index 834aa6931c5a61..0fc0a9dc4fc52d 100644
--- a/inference-engine/src/low_precision_transformations/src/layer_transformation.cpp
+++ b/inference-engine/src/low_precision_transformations/src/layer_transformation.cpp
@@ -32,6 +32,7 @@ LayerTransformation::LayerTransformation(const Params& params) :
     precisionsOnWeights(params.precisionsOnWeights),
     deqPrecision(params.deqPrecision),
     support3DTensorOnActivations(params.support3DTensorOnActivations),
+    deconvolutionSpecificChannelsRatio(params.deconvolutionSpecificChannelsRatio),
     quantizationIntervalAsymmetryThreshold(0.002f),
     zeroThreshold(1.e-6f),
     minQuantizationLevels(2ul),
diff --git a/inference-engine/src/low_precision_transformations/src/network_helper.cpp b/inference-engine/src/low_precision_transformations/src/network_helper.cpp
index dbca7606e7322a..4a1e942e5753ba 100644
--- a/inference-engine/src/low_precision_transformations/src/network_helper.cpp
+++ b/inference-engine/src/low_precision_transformations/src/network_helper.cpp
@@ -69,7 +69,8 @@ bool NetworkHelper::isConstantPath(const std::shared_ptr<Node>& op) {
         return is_type<opset1::Parameter>(node) ||
             is_type<opset1::Convolution>(node) ||
             is_type<opset1::GroupConvolution>(node) ||
-            is_type<opset1::MatMul>(node);
+            is_type<opset1::MatMul>(node) ||
+            is_type<opset1::ConvolutionBackpropData>(node);
     };
 
     if (isNotConstantPathOperation(op)) {
@@ -440,8 +441,11 @@ std::shared_ptr<Node> NetworkHelper::fold_fake_quantize(const std::shared_ptr<op
     return foldFakeQuantize(fq, false, false);
 }
 
-std::shared_ptr<Node> NetworkHelper::fold_fake_quantize(const std::shared_ptr<opset1::FakeQuantize>& fq, const bool roundValues) {
-    return foldFakeQuantize(fq, roundValues, true);
+std::shared_ptr<Node> NetworkHelper::fold_fake_quantize(
+        const std::shared_ptr<opset1::FakeQuantize>& fq,
+        const bool roundValues,
+        const int outChannelsShapeIndex) {
+    return foldFakeQuantize(fq, roundValues, true, outChannelsShapeIndex);
 }
 
 FakeQuantizeDequantization NetworkHelper::foldDequantization(const std::shared_ptr<Node>& node, const size_t branchIndex, const bool inPlace) {
@@ -591,7 +595,8 @@ std::shared_ptr<opset1::FakeQuantize> NetworkHelper::fuseConvert(const std::shar
 std::shared_ptr<Node> NetworkHelper::foldFakeQuantize(
     const std::shared_ptr<opset1::FakeQuantize>& fq,
     const bool roundValuesArg,
-    const bool roundValuesWasSet) {
+    const bool roundValuesWasSet,
+    const int outChannelsShapeIndex) {
     if (is_type<opset1::Constant>(fq->get_input_node_shared_ptr(0)) &&
         is_type<opset1::Constant>(fq->get_input_node_shared_ptr(1)) &&
         is_type<opset1::Constant>(fq->get_input_node_shared_ptr(2)) &&
@@ -630,10 +635,20 @@ std::shared_ptr<Node> NetworkHelper::foldFakeQuantize(
         if (constShape.empty() || constShape.size() > 5lu) {
             THROW_IE_LPT_EXCEPTION(*fq) << "Unexpected dimensions count " << constShape.size();
         }
+        if (outChannelsShapeIndex != 0 && outChannelsShapeIndex != 1) {
+            THROW_IE_LPT_EXCEPTION(*fq) << "Unexpected outChannelsShapeIndex " << outChannelsShapeIndex;
+        }
 
-        // OIDHW
-        const size_t OC = constShape[0];
-        const size_t IC = constShape.size() > 1lu ? constShape[1] : 1;
+        size_t OC;
+        size_t IC;
+        // OIDHW or IODHW
+        if (constShape.size() == 1) {
+            OC = constShape[0];
+            IC = 1;
+        } else {
+            OC = constShape[outChannelsShapeIndex];
+            IC = constShape[outChannelsShapeIndex == 0 ? 1 : 0];
+        }
         const size_t D = constShape.size() > 4lu ? constShape[constShape.size() - 3] : 1;
         const size_t H = constShape.size() > 2lu ? constShape.size() == 3lu ? constShape[2] : constShape[constShape.size() - 2] : 1;
         const size_t W = constShape.size() > 3lu ? constShape[constShape.size() - 1] : 1;
@@ -667,29 +682,35 @@ std::shared_ptr<Node> NetworkHelper::foldFakeQuantize(
 
         auto levels_1 = fq->get_levels() - 1.f;
 
-        //const size_t DHW = D * H * W;
+        const size_t DHW = D * H * W;
         const size_t IDHW = IC * D * H * W;
 
         const auto values = constant->cast_vector<float>();
         std::vector<float> quantizedValues(OC * IC * D * H * W);
 
         for (size_t oc = 0; oc < OC; ++oc) {
-            for (size_t iidx = 0; iidx < IDHW; ++iidx) {
-                const float inputLow = inputLowValues[isInputLowBroadcasted ? 0 : oc];
-                const float inputHigh = inputHighValues[isInputHighBroadcasted ? 0 : oc];
-                const float outputLow = outputLowValues[isOutputLowBroadcasted ? 0 : oc];
-                const float outputHigh = outputHighValues[isOutputHighBroadcasted ? 0 : oc];
-
-                const size_t idx = oc * IDHW + iidx;
-
-                if (values[idx] <= inputLow) {
-                    quantizedValues[idx] = roundValues ? std::roundf(outputLow) : outputLow;
-                } else if (values[idx] > inputHigh) {
-                    quantizedValues[idx] = roundValues ? std::roundf(outputHigh) : outputHigh;
-                } else {
-                    const float value = std::roundf((values[idx] - inputLow) / (inputHigh - inputLow) * levels_1) /
-                        levels_1 * (outputHigh - outputLow) + outputLow;
-                    quantizedValues[idx] = roundValues ? std::roundf(value) : value;
+            const float inputLow = inputLowValues[isInputLowBroadcasted ? 0 : oc];
+            const float inputHigh = inputHighValues[isInputHighBroadcasted ? 0 : oc];
+            const float outputLow = outputLowValues[isOutputLowBroadcasted ? 0 : oc];
+            const float outputHigh = outputHighValues[isOutputHighBroadcasted ? 0 : oc];
+            for (size_t ic = 0; ic < IC; ++ic) {
+                for (size_t iidx = 0; iidx < DHW; ++iidx) {
+                    size_t idx;
+                    if (outChannelsShapeIndex == 0) {
+                        idx = oc * IDHW + ic * DHW + iidx;
+                    } else {
+                        idx = ic * IDHW + oc * DHW + iidx;
+                    }
+
+                    if (values[idx] <= inputLow) {
+                        quantizedValues[idx] = roundValues ? std::roundf(outputLow) : outputLow;
+                    } else if (values[idx] > inputHigh) {
+                        quantizedValues[idx] = roundValues ? std::roundf(outputHigh) : outputHigh;
+                    } else {
+                        const float value = std::roundf((values[idx] - inputLow) / (inputHigh - inputLow) * levels_1) /
+                            levels_1 * (outputHigh - outputLow) + outputLow;
+                        quantizedValues[idx] = roundValues ? std::roundf(value) : value;
+                    }
                 }
             }
         }
@@ -818,7 +839,8 @@ std::tuple<std::shared_ptr<Node>, std::shared_ptr<Node>> NetworkHelper::decompos
     const float max,
     const bool hasZeroPoint,
     const bool updatePrecision,
-    const element::Type deqPrecision) {
+    const element::Type deqPrecision,
+    const size_t outChannelsShapeIndex) {
     using std::make_shared;
 
     const auto outputLow = fq->input_value(3);
@@ -898,7 +920,8 @@ std::tuple<std::shared_ptr<Node>, std::shared_ptr<Node>> NetworkHelper::decompos
             newMax->output(0),
             fq->get_levels(),
             fq->get_auto_broadcast()),
-        true);
+        true,
+        outChannelsShapeIndex);
     NetworkHelper::copyInfo(fq, newFQ);
 
     std::shared_ptr<ngraph::Node> convert2;
@@ -1548,12 +1571,12 @@ bool NetworkHelper::checkZeroPoint(const std::shared_ptr<Node>& node, const Data
     if (is_type<opset1::Subtract>(node)) {
         const auto parent = node->get_input_node_shared_ptr(0);
         const auto intNode = is_type<opset1::Convert>(parent) ? parent : node;
-        const auto intType = intNode->get_input_element_type(0);
-        if (intType == element::u8 || intType == element::i8) {
-            min = DataPrecision::getMinValue(intType, 256) - 0.5f;
-            max = DataPrecision::getMaxValue(intType, 256) + 0.5f;
+        const auto type = intNode->get_input_element_type(0);
+        if (type == element::u8 || type == element::i8) {
+            min = DataPrecision::getMinValue(type, 256) - 0.5f;
+            max = DataPrecision::getMaxValue(type, 256) + 0.5f;
         } else {
-            return false;
+            return type == element::f32 || type == element::f16;
         }
         auto subtract1input = node->get_input_node_shared_ptr(1);
         if (is_type<opset1::Convert>(subtract1input)) {
@@ -1595,6 +1618,23 @@ bool NetworkHelper::checkZeroPoint(const std::shared_ptr<Node>& node, const Data
     return true;
 }
 
+std::vector<element::Type> NetworkHelper::precisionIntersection(
+        const std::vector<element::Type>& v1,
+        const std::vector<element::Type>& v2) noexcept {
+    std::vector<element::Type> v3;
+
+    auto v1Copy = v1;
+    auto v2Copy = v2;
+
+    std::sort(v1Copy.begin(), v1Copy.end());
+    std::sort(v2Copy.begin(), v2Copy.end());
+
+    std::set_intersection(v1Copy.begin(), v1Copy.end(),
+                          v2Copy.begin(), v2Copy.end(),
+                          std::back_inserter(v3));
+    return v3;
+}
+
 }  // namespace low_precision
 }  // namespace pass
 }  // namespace ngraph
diff --git a/inference-engine/src/low_precision_transformations/src/transformer.cpp b/inference-engine/src/low_precision_transformations/src/transformer.cpp
index d8b484bcbcebc1..4debb5868b6d96 100644
--- a/inference-engine/src/low_precision_transformations/src/transformer.cpp
+++ b/inference-engine/src/low_precision_transformations/src/transformer.cpp
@@ -34,6 +34,7 @@
 #include "low_precision/avg_pool.hpp"
 #include "low_precision/clamp.hpp"
 #include "low_precision/convolution.hpp"
+#include "low_precision/convolution_backprop_data.hpp"
 #include "low_precision/depth_to_space.hpp"
 #include "low_precision/fake_quantize.hpp"
 #include "low_precision/group_convolution.hpp"
@@ -220,6 +221,7 @@ LowPrecisionTransformations LowPrecisionTransformer::getAllTransformations(const
         add<AvgPoolTransformation, opset1::AvgPool>(params).
         add<ClampTransformation, opset1::Clamp>(params).
         add<ConvolutionTransformation, opset1::Convolution>(params).
+        add<ConvolutionBackpropDataTransformation, opset1::ConvolutionBackpropData>(params).
         add<DepthToSpaceTransformation, opset1::DepthToSpace>(params).
         add<FakeQuantizeTransformation, opset1::FakeQuantize>(params).
         add<GroupConvolutionTransformation, opset1::GroupConvolution>(params).
@@ -338,6 +340,7 @@ TypeRelaxedReplacer::TypeRelaxedReplacer() {
     make_matcher_type_relaxed<opset1::Clamp>(this);
     make_matcher_type_relaxed<opset1::Concat>(this);
     make_matcher_type_relaxed<opset1::Convolution>(this);
+    make_matcher_type_relaxed<opset1::ConvolutionBackpropData>(this);
     make_matcher_type_relaxed<opset1::DepthToSpace>(this);
     make_matcher_type_relaxed<opset1::FakeQuantize>(this);
     make_matcher_type_relaxed<opset1::GroupConvolution>(this);
@@ -430,23 +433,6 @@ void LowPrecisionTransformer::transform(std::shared_ptr<Function> network) {
     network->validate_nodes_and_infer_types();
 }
 
-std::vector<element::Type> LowPrecisionTransformer::precisionIntersection(
-    const std::vector<element::Type>& v1,
-    const std::vector<element::Type>& v2) const noexcept {
-    std::vector<element::Type> v3;
-
-    auto v1Copy = v1;
-    auto v2Copy = v2;
-
-    std::sort(v1Copy.begin(), v1Copy.end());
-    std::sort(v2Copy.begin(), v2Copy.end());
-
-    std::set_intersection(v1Copy.begin(), v1Copy.end(),
-        v2Copy.begin(), v2Copy.end(),
-        std::back_inserter(v3));
-    return v3;
-}
-
 std::vector<element::Type> LowPrecisionTransformer::getPrecisionsOnActivations(const Node& op) const noexcept {
     const std::string operantionType = LowPrecisionTransformations::getType(op);
     const std::vector<LayerTransformationPtr> transformation = transformations.find(operantionType);
@@ -456,7 +442,7 @@ std::vector<element::Type> LowPrecisionTransformer::getPrecisionsOnActivations(c
     std::vector<element::Type> precisions = transformation[0]->getPrecisionsOnActivations();
 
     for (const auto& transform : transformation) {
-        precisions = precisionIntersection(precisions, transform->getPrecisionsOnActivations());
+        precisions = NetworkHelper::precisionIntersection(precisions, transform->getPrecisionsOnActivations());
     }
     return precisions;
 }
diff --git a/inference-engine/src/low_precision_transformations/src/weightable_layer_transformation.cpp b/inference-engine/src/low_precision_transformations/src/weightable_layer_transformation.cpp
index b3651cdf231b09..726fc893975594 100644
--- a/inference-engine/src/low_precision_transformations/src/weightable_layer_transformation.cpp
+++ b/inference-engine/src/low_precision_transformations/src/weightable_layer_transformation.cpp
@@ -26,7 +26,7 @@ bool WeightableLayerTransformation::canConvolutionBeTransformed(const Transforma
         return false;
     }
 
-    if (updatePrecisions && !NetworkHelper::checkZeroPoint(dequantization.subtract)) {
+    if (!NetworkHelper::checkZeroPoint(dequantization.subtract)) {
         return false;
     }
 
@@ -46,24 +46,10 @@ bool WeightableLayerTransformation::canConvolutionBeTransformed(const Transforma
             return false;
         }
         if (!NetworkHelper::checkZeroPoint(fqOnWeights, dataPrecision)) {
-            const std::shared_ptr<ngraph::Node> resultConstant = NetworkHelper::fold_fake_quantize(fqOnWeights);
-            if (as_type_ptr<opset1::Constant>(resultConstant)) {
-                replace_node(fqOnWeights, resultConstant);
-            }
             return false;
         }
     } else {
         if (!NetworkHelper::checkZeroPoint(dequantization.subtract)) {
-            const auto resultDequantization = NetworkHelper::foldDequantization(dequantization.multiply, 0, true);
-            if (resultDequantization.empty() && reshapeFromWeights) {
-                const auto foldedReshape = fold<opset1::Reshape>(
-                        reshapeFromWeights->get_input_node_shared_ptr(0),
-                        reshapeFromWeights->get_input_node_shared_ptr(1),
-                        reshapeFromWeights->get_special_zero());
-                if (is_type<opset1::Constant>(foldedReshape)) {
-                    replace_node(reshapeFromWeights, foldedReshape);
-                }
-            }
             return false;
         }
     }
@@ -170,9 +156,11 @@ bool WeightableLayerTransformation::canBeTransformed(const TransformationContext
             return false;
         }
 
-        if ( // Check if all dimensions of scale except the first one (which is O-Output channels dimension) are all ones
-            (shape_size(constOutputShape) != constOutputShape[0]) ||
-            ((constOutputShape[0] != 1ul) && (fqFromWeights->get_output_shape(0)[0] != constOutputShape[0]))) {
+        const size_t outChannelsShapeIndex = is_type<opset1::ConvolutionBackpropData>(layer) ? 1ul : 0ul;
+        if ( // Check if all dimensions of scale except the output channels are all ones
+            (shape_size(constOutputShape) != constOutputShape[outChannelsShapeIndex]) ||
+            ((constOutputShape[outChannelsShapeIndex] != 1ul) &&
+                (fqFromWeights->get_output_shape(0)[outChannelsShapeIndex] != constOutputShape[outChannelsShapeIndex]))) {
             return false;
         }
     } else {
@@ -256,7 +244,7 @@ bool WeightableLayerTransformation::isPrecisionPreserved(std::shared_ptr<Node> l
     return false;
 }
 
-void WeightableLayerTransformation::decomposeFakeQuantizeForWeightsPath(std::shared_ptr<Node> node) const {
+void WeightableLayerTransformation::decomposeFakeQuantizeForWeightsPath(const std::shared_ptr<Node>& node, const size_t outChannelsShapeIndex) const {
     const auto fq = getFakeQuantizeOnWeights(node);
     if (fq == nullptr) {
         return;
@@ -270,7 +258,9 @@ void WeightableLayerTransformation::decomposeFakeQuantizeForWeightsPath(std::sha
         dataPrecision.min,
         dataPrecision.max,
         dataPrecision.hasZeroPoint,
-        updatePrecisions);
+        updatePrecisions,
+        element::f32,
+        outChannelsShapeIndex);
 
     std::shared_ptr<ngraph::Node> fqOnWeights = std::get<0>(tuple);
     if (as_type_ptr<ngraph::opset1::Constant>(fqOnWeights) == nullptr) {
diff --git a/inference-engine/src/mkldnn_plugin/mkldnn_plugin.cpp b/inference-engine/src/mkldnn_plugin/mkldnn_plugin.cpp
index 32efc8d09ac43a..3ab7622ac91d24 100644
--- a/inference-engine/src/mkldnn_plugin/mkldnn_plugin.cpp
+++ b/inference-engine/src/mkldnn_plugin/mkldnn_plugin.cpp
@@ -76,6 +76,7 @@
 #include <low_precision/transformer.hpp>
 #include <low_precision/convert_subtract_constant.hpp>
 #include <low_precision/convolution.hpp>
+#include <low_precision/convolution_backprop_data.hpp>
 #include <low_precision/group_convolution.hpp>
 #include <low_precision/multiply_to_group_convolution.hpp>
 #include <low_precision/network_helper.hpp>
@@ -328,7 +329,8 @@ static void Transformation(CNNNetwork& clonedNetwork, const Config& conf) {
             .add<GroupConvolutionTransformation, ngraph::opset1::GroupConvolution>(
                 LayerTransformation::Params(params).setPrecisionsOnActivations({ ngraph::element::u8 }).setSupportAsymmetricQuantization(true))
             .addStandaloneCleanup<MultiplyToGroupConvolutionTransformation, ngraph::opset1::Multiply>(
-                LayerTransformation::Params(params).setPrecisionsOnActivations({ ngraph::element::u8 })));
+                LayerTransformation::Params(params).setPrecisionsOnActivations({ ngraph::element::u8 }))
+            .remove<ConvolutionBackpropDataTransformation, ngraph::opset1::ConvolutionBackpropData>());
 
         transformer.transform(nGraphFunc);
     }
diff --git a/inference-engine/src/transformations/include/ngraph_ops/deconvolution_ie.hpp b/inference-engine/src/transformations/include/ngraph_ops/deconvolution_ie.hpp
index 96422f61e3b6c8..3aa4a6492d123f 100644
--- a/inference-engine/src/transformations/include/ngraph_ops/deconvolution_ie.hpp
+++ b/inference-engine/src/transformations/include/ngraph_ops/deconvolution_ie.hpp
@@ -29,6 +29,7 @@ class TRANSFORMATIONS_API DeconvolutionIE : public Op {
                     const Strides& dilations,
                     const CoordinateDiff& pads_begin,
                     const CoordinateDiff& pads_end,
+                    const element::Type output_type,
                     const size_t& group = 1,
                     const PadType& auto_pad = PadType::EXPLICIT,
                     const CoordinateDiff& output_padding = {},
@@ -41,6 +42,7 @@ class TRANSFORMATIONS_API DeconvolutionIE : public Op {
                     const Strides& dilations,
                     const CoordinateDiff& pads_begin,
                     const CoordinateDiff& pads_end,
+                    const element::Type output_type,
                     const size_t& group = 1,
                     const PadType& auto_pad = PadType::EXPLICIT,
                     const CoordinateDiff& output_padding = {},
@@ -79,6 +81,7 @@ class TRANSFORMATIONS_API DeconvolutionIE : public Op {
     size_t m_group;
     CoordinateDiff m_output_padding;
     std::shared_ptr<Node> m_output_shape;
+    element::Type m_output_type;
 };
 
 }  // namespace op
diff --git a/inference-engine/src/transformations/src/ngraph_ops/deconvolution_ie.cpp b/inference-engine/src/transformations/src/ngraph_ops/deconvolution_ie.cpp
index ef9bc90bd1ea8c..e8940700dbeece 100644
--- a/inference-engine/src/transformations/src/ngraph_ops/deconvolution_ie.cpp
+++ b/inference-engine/src/transformations/src/ngraph_ops/deconvolution_ie.cpp
@@ -13,6 +13,7 @@
 #include "ngraph/util.hpp"
 #include "ngraph/validation_util.hpp"
 #include "ngraph/opsets/opset1.hpp"
+#include "ngraph_ops/type_relaxed.hpp"
 
 using namespace std;
 using namespace ngraph;
@@ -25,6 +26,7 @@ op::DeconvolutionIE::DeconvolutionIE(const Output<Node>& data,
                                      const Strides& dilations,
                                      const CoordinateDiff& pads_begin,
                                      const CoordinateDiff& pads_end,
+                                     const element::Type output_type,
                                      const size_t& group,
                                      const PadType& auto_pad,
                                      const CoordinateDiff& output_padding,
@@ -37,7 +39,8 @@ op::DeconvolutionIE::DeconvolutionIE(const Output<Node>& data,
         , m_auto_pad(auto_pad)
         , m_group(group)
         , m_output_padding(output_padding)
-        , m_output_shape(output_shape) {
+        , m_output_shape(output_shape)
+        , m_output_type(output_type) {
     constructor_validate_and_infer_types();
 }
 
@@ -48,6 +51,7 @@ op::DeconvolutionIE::DeconvolutionIE(const Output<Node>& data,
                                      const Strides& dilations,
                                      const CoordinateDiff& pads_begin,
                                      const CoordinateDiff& pads_end,
+                                     const element::Type output_type,
                                      const size_t& group,
                                      const PadType& auto_pad,
                                      const CoordinateDiff& output_padding,
@@ -60,7 +64,8 @@ op::DeconvolutionIE::DeconvolutionIE(const Output<Node>& data,
         , m_auto_pad(auto_pad)
         , m_group(group)
         , m_output_padding(output_padding)
-        , m_output_shape(output_shape) {
+        , m_output_shape(output_shape)
+        , m_output_type(output_type) {
     constructor_validate_and_infer_types();
 }
 
@@ -81,13 +86,32 @@ void op::DeconvolutionIE::validate_and_infer_types() {
     }
     Output<Node> conv;
     if (m_output_shape) {
-        conv = std::make_shared<opset1::GroupConvolutionBackpropData>(input_value(0), weights, m_output_shape,
-                m_strides, m_pads_begin, m_pads_end, m_dilations, m_auto_pad, m_output_padding);
+        conv = std::make_shared<op::TypeRelaxed<opset1::GroupConvolutionBackpropData>>(
+            std::vector<element::Type>{ element::f32, element::f32 },
+            std::vector<element::Type>{ element::f32 },
+            ngraph::op::TemporaryReplaceOutputType(input_value(0), element::f32).get(),
+            ngraph::op::TemporaryReplaceOutputType(weights, element::f32).get(),
+            m_output_shape,
+            m_strides,
+            m_pads_begin,
+            m_pads_end,
+            m_dilations,
+            m_auto_pad,
+            m_output_padding);
     } else {
-        conv = std::make_shared<opset1::GroupConvolutionBackpropData>(input_value(0), weights,
-                m_strides, m_pads_begin, m_pads_end, m_dilations, m_auto_pad, m_output_padding);
+        conv = std::make_shared<op::TypeRelaxed<opset1::GroupConvolutionBackpropData>>(
+            std::vector<element::Type>{ element::f32, element::f32 },
+            std::vector<element::Type>{ element::f32 },
+            ngraph::op::TemporaryReplaceOutputType(input_value(0), element::f32).get(),
+            ngraph::op::TemporaryReplaceOutputType(weights, element::f32).get(),
+            m_strides,
+            m_pads_begin,
+            m_pads_end,
+            m_dilations,
+            m_auto_pad,
+            m_output_padding);
     }
-    set_output_type(0, conv.get_element_type(), conv.get_partial_shape());
+    set_output_type(0, m_output_type, conv.get_partial_shape());
 }
 
 shared_ptr<Node> op::DeconvolutionIE::clone_with_new_inputs(const ngraph::OutputVector &new_args) const {
@@ -99,6 +123,7 @@ shared_ptr<Node> op::DeconvolutionIE::clone_with_new_inputs(const ngraph::Output
                                             m_dilations,
                                             m_pads_begin,
                                             m_pads_end,
+                                            m_output_type,
                                             m_group,
                                             m_auto_pad,
                                             m_output_padding,
@@ -111,6 +136,7 @@ shared_ptr<Node> op::DeconvolutionIE::clone_with_new_inputs(const ngraph::Output
                                             m_dilations,
                                             m_pads_begin,
                                             m_pads_end,
+                                            m_output_type,
                                             m_group,
                                             m_auto_pad,
                                             m_output_padding,
diff --git a/inference-engine/src/transformations/src/transformations/op_conversions/convert_convolutions.cpp b/inference-engine/src/transformations/src/transformations/op_conversions/convert_convolutions.cpp
index 5b7965762a59c5..1f0fb32ae6be46 100644
--- a/inference-engine/src/transformations/src/transformations/op_conversions/convert_convolutions.cpp
+++ b/inference-engine/src/transformations/src/transformations/op_conversions/convert_convolutions.cpp
@@ -113,6 +113,7 @@ ngraph::pass::ConvertDeconvolution::ConvertDeconvolution() {
                                                                        deconv->get_dilations(),
                                                                        deconv->get_pads_begin(),
                                                                        deconv->get_pads_end(),
+                                                                       deconv->get_output_element_type(0),
                                                                        1 /* groups */,
                                                                        deconv->get_auto_pad(),
                                                                        deconv->get_output_padding(),
@@ -158,6 +159,7 @@ ngraph::pass::ConvertGroupDeconvolution::ConvertGroupDeconvolution() {
                                                                      gconv->get_dilations(),
                                                                      gconv->get_pads_begin(),
                                                                      gconv->get_pads_end(),
+                                                                     gconv->get_output_element_type(0),
                                                                      group,
                                                                      gconv->get_auto_pad(),
                                                                      gconv->get_output_padding(),
diff --git a/inference-engine/src/transformations/src/transformations/op_conversions/convert_subtract.cpp b/inference-engine/src/transformations/src/transformations/op_conversions/convert_subtract.cpp
index 7080688b09c409..b5507ee205f4a7 100644
--- a/inference-engine/src/transformations/src/transformations/op_conversions/convert_subtract.cpp
+++ b/inference-engine/src/transformations/src/transformations/op_conversions/convert_subtract.cpp
@@ -38,11 +38,14 @@ ngraph::pass::ConvertSubtract::ConvertSubtract() {
                 const std::shared_ptr<Node> child = subChildren.begin()->get_node()->shared_from_this();
                 if (child != nullptr) {
                     if (is_type<opset1::Convolution>(child) ||
+                        is_type<opset1::ConvolutionBackpropData>(child) ||
                         is_type<opset1::GroupConvolution>(child) ||
+                        is_type<opset1::GroupConvolutionBackpropData>(child) ||
                         is_type<opset1::MatMul>(child) ||
-                            (is_type<opset1::Reshape>(child) &&
+                        (is_type<opset1::Reshape>(child) &&
                             (child->output(0).get_target_inputs().size() == 1ul) &&
-                            is_type<opset1::GroupConvolution>(child->output(0).get_target_inputs().begin()->get_node()->shared_from_this()))) {
+                            (is_type<opset1::GroupConvolution>(child->output(0).get_target_inputs().begin()->get_node()->shared_from_this()) ||
+                             is_type<opset1::GroupConvolutionBackpropData>(child->output(0).get_target_inputs().begin()->get_node()->shared_from_this())))) {
                         const auto input1Type = sub->input(0).get_element_type();
                         const auto input2Type = sub->input(1).get_element_type();
                         if (((input1Type == element::u8) && (input2Type == element::u8)) ||
diff --git a/inference-engine/tests/functional/inference_engine/lp_transformations/convolution_backprop_data_transformation.cpp b/inference-engine/tests/functional/inference_engine/lp_transformations/convolution_backprop_data_transformation.cpp
new file mode 100644
index 00000000000000..283adb5bf45a3d
--- /dev/null
+++ b/inference-engine/tests/functional/inference_engine/lp_transformations/convolution_backprop_data_transformation.cpp
@@ -0,0 +1,334 @@
+// Copyright (C) 2021 Intel Corporation
+// SPDX-License-Identifier: Apache-2.0
+//
+
+#include "layer_transformation.hpp"
+
+#include <string>
+#include <sstream>
+#include <memory>
+
+#include <gtest/gtest.h>
+
+#include <transformations/utils/utils.hpp>
+#include <transformations/init_node_info.hpp>
+#include <low_precision/convolution_backprop_data.hpp>
+#include <low_precision/network_helper.hpp>
+
+#include "common_test_utils/ngraph_test_utils.hpp"
+#include "simple_low_precision_transformer.hpp"
+#include "lpt_ngraph_functions/convolution_backprop_data_function.hpp"
+
+using namespace testing;
+using namespace ngraph;
+using namespace ngraph::pass;
+
+class ConvolutionBackpropDataTransformationTestValues {
+public:
+    class Actual {
+    public:
+        ngraph::element::Type precisionBeforeDequantization;
+        ngraph::builder::subgraph::DequantizationOperations dequantizationOnActivations;
+        builder::subgraph::FakeQuantizeOnWeights fakeQuantizeOnWeights;
+        builder::subgraph::DequantizationOperations dequantizationOnWeights;
+        std::shared_ptr<ngraph::opset1::Constant> weights;
+
+        Actual() = default;
+        Actual(
+            const ngraph::element::Type& precisionBeforeDequantization,
+            const ngraph::builder::subgraph::DequantizationOperations& dequantizationOnActivations,
+            const builder::subgraph::FakeQuantizeOnWeights& fakeQuantizeOnWeights,
+            const std::shared_ptr<ngraph::opset1::Constant>& weights) :
+                precisionBeforeDequantization(precisionBeforeDequantization),
+                dequantizationOnActivations(dequantizationOnActivations),
+                fakeQuantizeOnWeights(fakeQuantizeOnWeights),
+                weights(weights) {}
+        Actual(
+            const  ngraph::element::Type& precisionBeforeDequantization,
+            const  ngraph::builder::subgraph::DequantizationOperations& dequantizationOnActivations,
+            const  builder::subgraph::DequantizationOperations& dequantizationOnWeights,
+            const std::shared_ptr<ngraph::opset1::Constant>& weights) :
+            precisionBeforeDequantization(precisionBeforeDequantization),
+            dequantizationOnActivations(dequantizationOnActivations),
+            dequantizationOnWeights(dequantizationOnWeights),
+            weights(weights) {}
+    };
+
+    class Expected {
+    public:
+        ngraph::element::Type precisionBeforeDequantization;
+        ngraph::builder::subgraph::DequantizationOperations dequantizationOnActivations;
+        builder::subgraph::DequantizationOperations dequantizationOnWeights;
+        ngraph::builder::subgraph::DequantizationOperations dequantizationAfter;
+        std::shared_ptr<ngraph::opset1::Constant> weights;
+        bool transformed;
+    };
+
+    ngraph::pass::low_precision::LayerTransformation::Params params;
+    Actual actual;
+    Expected expected;
+};
+
+typedef std::tuple<
+        element::Type,
+        ngraph::Shape,
+        ConvolutionBackpropDataTransformationTestValues> ConvolutionBackpropDataTransformationParams;
+
+class ConvolutionBackpropDataTransformation : public LayerTransformation, public testing::WithParamInterface<ConvolutionBackpropDataTransformationParams> {
+public:
+    void SetUp() override {
+        const auto netPrecision = std::get<0>(GetParam());
+        const auto inputShape = std::get<1>(GetParam());
+        auto outputShape = inputShape;
+        outputShape[1] /= 4;
+        outputShape[2] *= 2;
+        outputShape[3] *= 2;
+        auto testValues = std::get<2>(GetParam());
+
+        std::shared_ptr<Node> actualWeights = pass::low_precision::fold<opset1::Broadcast>(
+                testValues.actual.weights,
+                opset1::Constant::create(
+                        element::i64,
+                        Shape{inputShape.size()},
+                        Shape{inputShape[1], outputShape[1], 1, 1}));
+        if (!testValues.actual.fakeQuantizeOnWeights.empty()) {
+            actualWeights = ngraph::builder::subgraph::ConvolutionBackpropDataFunction::getWeights(
+                    outputShape,
+                    netPrecision,
+                    testValues.actual.fakeQuantizeOnWeights,
+                    as_type_ptr<opset1::Constant>(actualWeights));
+        } else {
+            actualWeights = ngraph::builder::subgraph::ConvolutionBackpropDataFunction::getWeights(
+                    outputShape,
+                    netPrecision,
+                    testValues.actual.dequantizationOnWeights,
+                    as_type_ptr<opset1::Constant>(actualWeights));
+        }
+
+        actualFunction = ngraph::builder::subgraph::ConvolutionBackpropDataFunction::getOriginal(
+                testValues.actual.precisionBeforeDequantization,
+                netPrecision,
+                inputShape,
+                outputShape,
+                testValues.actual.dequantizationOnActivations,
+                actualWeights);
+
+        SimpleLowPrecisionTransformer transform;
+        transform.add<ngraph::pass::low_precision::ConvolutionBackpropDataTransformation, ngraph::opset1::Convolution>(testValues.params);
+        transform.transform(actualFunction);
+        std::shared_ptr<Node> refWeights = pass::low_precision::fold<opset1::Broadcast>(
+                testValues.expected.weights,
+                opset1::Constant::create(
+                        element::i64,
+                        Shape{inputShape.size()},
+                        Shape{inputShape[1], outputShape[1], 1, 1}));
+
+        if (!testValues.expected.transformed) {
+            refWeights = ngraph::builder::subgraph::ConvolutionBackpropDataFunction::getWeights(
+                outputShape,
+                netPrecision,
+                testValues.actual.fakeQuantizeOnWeights,
+                as_type_ptr<opset1::Constant>(refWeights));
+        } else {
+            refWeights = ngraph::builder::subgraph::ConvolutionBackpropDataFunction::getWeights(
+                outputShape,
+                netPrecision,
+                testValues.expected.dequantizationOnWeights,
+                as_type_ptr<opset1::Constant>(refWeights));
+        }
+
+        referenceFunction = ngraph::builder::subgraph::ConvolutionBackpropDataFunction::getReference(
+                testValues.expected.precisionBeforeDequantization,
+                netPrecision,
+                inputShape,
+                outputShape,
+                testValues.expected.dequantizationOnActivations,
+                refWeights,
+                testValues.expected.dequantizationAfter);
+    }
+
+    static std::string getTestCaseName(testing::TestParamInfo<ConvolutionBackpropDataTransformationParams> obj) {
+        const auto netPrecision = std::get<0>(obj.param);
+        auto inputShape = std::get<1>(obj.param);
+        ConvolutionBackpropDataTransformationTestValues testValues = std::get<2>(obj.param);
+
+        std::ostringstream result;
+        result << toString(testValues.params) << "_" <<
+               netPrecision << "_" <<
+               inputShape << "_" <<
+               testValues.actual.precisionBeforeDequantization << "_" <<
+               testValues.actual.dequantizationOnActivations << "_" <<
+               testValues.actual.dequantizationOnWeights << "_" <<
+               testValues.actual.fakeQuantizeOnWeights << "_" <<"_weights_" <<
+               testValues.actual.weights->get_element_type() << "_" << "{ " <<
+               testValues.actual.weights->cast_vector<float>()[0] << " }_";
+        return result.str();
+    }
+};
+
+TEST_P(ConvolutionBackpropDataTransformation, CompareFunctions) {
+    actualFunction->validate_nodes_and_infer_types();
+    auto res = compare_functions(referenceFunction, actualFunction, true, true, true);
+    ASSERT_TRUE(res.first) << res.second;
+}
+
+const std::vector<element::Type> netPrecisions = {
+        element::f32,
+        element::f16
+};
+
+const std::vector<ngraph::Shape> shapes = {
+        ngraph::Shape({ 1, 8, 16, 16 })
+};
+
+const std::vector<ConvolutionBackpropDataTransformationTestValues> testValues = {
+    // with zero point
+    {
+        LayerTransformation::createParamsU8I8(),
+        // ActualValues
+        {
+            ngraph::element::u8,
+            {{ngraph::element::f32}, { 128.f }, { 0.02f }},
+            { 255ul, Shape({ 1, 1, 1, 1 }), { 0.f }, { 254.f }, { -1.27f }, { 1.27f } },
+            op::Constant::create(ngraph::element::i8, ngraph::Shape{}, std::vector<float>{ 2.f })
+        },
+        // ExpectedValues
+        {
+            ngraph::element::u8,
+            {{}, { { 128.f }, ngraph::element::f32, {}, false }, {}},
+            {},
+            {{}, {}, {{ 0.0002f }, ngraph::element::f32, { 1, 1, 1, 1 }}},
+            op::Constant::create(ngraph::element::i8, ngraph::Shape{}, std::vector<float>{ -125.f }),
+            true
+        }
+    },
+    // updatePrecisions = false
+    {
+        LayerTransformation::createParamsU8I8().setUpdatePrecisions(false),
+        // ActualValues
+        {
+            ngraph::element::u8,
+            {{ngraph::element::f32}, { 128.f }, { 0.02f }},
+            { 255ul, Shape({ 1, 1, 1, 1 }), { 0.f }, { 254.f }, { -1.27f }, { 1.27f } },
+            op::Constant::create(ngraph::element::i8, ngraph::Shape{}, std::vector<float>{ 2.f })
+        },
+        // ExpectedValues
+        {
+            ngraph::element::u8,
+            {{}, { { 128.f }, ngraph::element::f32, {}, false }, {}},
+            {},
+            {{}, {}, {{ 0.0002f }, ngraph::element::f32, { 1, 1, 1, 1 }}},
+            op::Constant::create(ngraph::element::f32, ngraph::Shape{}, std::vector<float>{ -125.f }),
+            true
+        }
+    },
+    // QDq version
+    {
+        LayerTransformation::createParamsU8I8(),
+        // ActualValues
+        {
+            ngraph::element::u8,
+            {{ngraph::element::f32}, { 128.f }, { 0.02f }},
+            {{ngraph::element::f32}, { 2.f }, { 0.01f }},
+            op::Constant::create(ngraph::element::i8, ngraph::Shape{}, std::vector<float>{ 2.f })
+        },
+        // ExpectedValues
+        {
+            ngraph::element::u8,
+            {{}, { { 128.f }, ngraph::element::f32, {}, false }, {}},
+            {{}, { { 2.f }, ngraph::element::f32, {1, 2, 1, 1}, true, 1ul, element::i8, false, { "DISABLED_CONSTANT_FOLDING" }  }, {}},
+            {{}, {}, {{ 0.0002f }, ngraph::element::f32, { 1 }}},
+            op::Constant::create(ngraph::element::i8, ngraph::Shape{}, std::vector<float>{ 2.f }),
+            true
+        }
+    },
+    // without zero point
+    {
+        LayerTransformation::createParamsU8I8(),
+        // ActualValues
+        {
+            ngraph::element::u8,
+            {{ngraph::element::f32}, {}, { 0.02f }},
+            { 255ul, Shape({ 1, 1, 1, 1 }), { 0.f }, { 254.f }, { -1.27f }, { 1.27f } },
+            op::Constant::create(ngraph::element::i8, ngraph::Shape{}, std::vector<float>{ 2.f })
+        },
+        // ExpectedValues
+        {
+            ngraph::element::u8,
+            {},
+            {},
+            {{}, {}, {{ 0.0002f }, ngraph::element::f32, { 1, 1, 1, 1 }}},
+            op::Constant::create(ngraph::element::i8, ngraph::Shape{}, std::vector<float>{ -125.f }),
+            true
+        }
+    },
+    // QDq version
+    {
+        LayerTransformation::createParamsU8I8(),
+        // ActualValues
+        {
+            ngraph::element::u8,
+            {{ngraph::element::f32}, {}, { 0.02f }},
+            {{ngraph::element::f32}, {}, { 0.01f }},
+            op::Constant::create(ngraph::element::i8, ngraph::Shape{}, std::vector<float>{ 2.f })
+        },
+        // ExpectedValues
+        {
+            ngraph::element::u8,
+            {},
+            {},
+            {{}, {}, {{ 0.0002f }, ngraph::element::f32, {1}}},
+            op::Constant::create(ngraph::element::i8, ngraph::Shape{}, std::vector<float>{ 2.f }),
+            true
+        }
+    },
+    // per-channel dequantization with the same values
+    {
+        LayerTransformation::createParamsU8I8(),
+        // ActualValues
+        {
+            ngraph::element::u8,
+            {{ngraph::element::f32}, {}, { std::vector<float>{0.02f, 0.02f, 0.02f, 0.02f, 0.02f, 0.02f, 0.02f, 0.02f}  }},
+            { 255ul, Shape({ 1, 1, 1, 1 }), { 0.f }, { 254.f }, { -1.27f }, { 1.27f } },
+            op::Constant::create(ngraph::element::i8, ngraph::Shape{}, std::vector<float>{ 2.f })
+        },
+        // ExpectedValues
+        {
+            ngraph::element::u8,
+            {},
+            {},
+            {{}, {}, {{ 0.0002f }, ngraph::element::f32, { 1, 1, 1, 1 }}},
+            op::Constant::create(ngraph::element::i8, ngraph::Shape{}, std::vector<float>{ -125.f }),
+            true
+        }
+    },
+    // per-channel dequantization with different values
+    {
+        LayerTransformation::createParamsU8I8(),
+        // ActualValues
+        {
+            ngraph::element::u8,
+            {{ngraph::element::f32}, {}, { std::vector<float>{0.02f, 0.01f, 0.02f, 0.01f, 0.02f, 0.01f, 0.02f, 0.01f} }},
+            { 255ul, Shape({ 1, 1, 1, 1 }), { 0.f }, { 254.f }, { -1.27f }, { 1.27f } },
+            op::Constant::create(ngraph::element::i8, ngraph::Shape{}, std::vector<float>{ 2.f })
+        },
+        // ExpectedValues
+        {
+            ngraph::element::u8,
+            {{ngraph::element::f32}, {}, { std::vector<float>{0.02f, 0.01f, 0.02f, 0.01f, 0.02f, 0.01f, 0.02f, 0.01f} }},
+            {},
+            {},
+            op::Constant::create(ngraph::element::f32, ngraph::Shape{}, std::vector<float>{ -1.25f }),
+            true
+        }
+    },
+};
+
+INSTANTIATE_TEST_CASE_P(
+    smoke_LPT,
+    ConvolutionBackpropDataTransformation,
+    ::testing::Combine(
+    ::testing::ValuesIn(netPrecisions),
+    ::testing::ValuesIn(shapes),
+    ::testing::ValuesIn(testValues)),
+    ConvolutionBackpropDataTransformation::getTestCaseName);
diff --git a/inference-engine/tests/functional/inference_engine/lp_transformations/convolution_qdq_transformation.cpp b/inference-engine/tests/functional/inference_engine/lp_transformations/convolution_qdq_transformation.cpp
index c52606641c402b..75b1d965e53416 100644
--- a/inference-engine/tests/functional/inference_engine/lp_transformations/convolution_qdq_transformation.cpp
+++ b/inference-engine/tests/functional/inference_engine/lp_transformations/convolution_qdq_transformation.cpp
@@ -231,7 +231,7 @@ const std::vector<ConvolutionQDqTransformationTestValues> testValues = {
         }
     },
 
-    // Actual & Transformed:
+    // Actual:
     //
     // Parameter   Constant   Constant Constant
     //  |U8         |U8        |FP32    |I8
@@ -246,6 +246,22 @@ const std::vector<ConvolutionQDqTransformationTestValues> testValues = {
     //         \FP32         /FP32
     //          \           /
     //           Convolution
+    //
+    // Transformed:
+    //
+    // Parameter   Constant
+    //  |U8         |U8
+    //  |           |
+    // Convert    Convert
+    //   \FP32    /FP32
+    //    \      /
+    //    Subtract  Constant
+    //      \FP32   /FP32
+    //       \     /
+    //       Multiply       Constant
+    //         \FP32         /FP32
+    //          \           /
+    //           Convolution
     {
         LayerTransformation::createParamsU8I8().setSupportAsymmetricQuantization(true),
         // ActualValues
@@ -262,8 +278,8 @@ const std::vector<ConvolutionQDqTransformationTestValues> testValues = {
         {
             ngraph::element::u8,
             {{ngraph::element::f32}, { {127.f}, element::f32, {}, false, 1ul, element::u8, true }, { 0.02f }},
-            {{ngraph::element::f32}, { {127.f}, element::f32, {}, false, 1ul, element::i8, true }, { 0.03f }},
-            { std::vector<float>{ 2.f }, ngraph::element::f32},
+            {},
+            { std::vector<float>{ -3.75f }, ngraph::element::f32},
             {},
             ngraph::element::f32,
             {}
@@ -434,12 +450,8 @@ const std::vector<ConvolutionQDqTransformationTestValues> testValues = {
                 { {1000.f}, element::f32, {}, false },
                 { {0.02f}, element::f32, {}, false }
             },
-            {
-                { ngraph::element::f32, false },
-                { {127.f}, element::f32, {}, false },
-                { {0.03f}, element::f32, {}, false }
-            },
-            { std::vector<float>{ 2.f }, ngraph::element::i8},
+            {},
+            { std::vector<float>{ -3.75f }, ngraph::element::f32},
             {},
             ngraph::element::f32,
             {}
diff --git a/inference-engine/tests/functional/inference_engine/lp_transformations/convolution_transformation.cpp b/inference-engine/tests/functional/inference_engine/lp_transformations/convolution_transformation.cpp
index 4ccbc8f412ad72..8c2d42dfbf3c98 100644
--- a/inference-engine/tests/functional/inference_engine/lp_transformations/convolution_transformation.cpp
+++ b/inference-engine/tests/functional/inference_engine/lp_transformations/convolution_transformation.cpp
@@ -160,8 +160,8 @@ const std::vector<ConvolutionTransformationTestValues> testValues = {
         {
             ngraph::element::u8,
             {{ ngraph::element::f32 }, { 128.f }, { 0.02f }},
-            op::Constant::create(ngraph::element::f32, ngraph::Shape{}, std::vector<float>{ 2.f }),
-            { 255ul, Shape({ 1, 1, 1, 1 }), { 0.f }, { 254.f }, { -1.27f }, { 1.27f } },
+            op::Constant::create(ngraph::element::f32, ngraph::Shape{}, std::vector<float>{ -1.25f }),
+            {},
             ngraph::element::f32,
             {}
         }
@@ -288,13 +288,13 @@ const std::vector<ConvolutionTransformationTestValues> testValues = {
                 {{ 128.f, 0.f, 128.f }, ngraph::element::f32, { 1, 3, 1, 1 }},
                 {{ 0.02f, 0.01f, 0.03f }, ngraph::element::f32, {1, 3, 1, 1}}
             },
-            op::Constant::create(ngraph::element::f32, ngraph::Shape{}, std::vector<float>{ 2.f }),
-            { 255ul, Shape({ 1, 1, 1, 1 }), { 0.f }, { 254.f }, { -1.27f }, { 1.27f } },
+            op::Constant::create(ngraph::element::f32, ngraph::Shape{}, std::vector<float>{ -1.25f }),
+            {},
             ngraph::element::f32,
             {}
         }
     },
-    // dequantization in second dimension
+    // float input
     {
         LayerTransformation::createParamsU8I8(),
         // ActualValues
@@ -316,8 +316,8 @@ const std::vector<ConvolutionTransformationTestValues> testValues = {
                 {{ 128.f }, ngraph::element::f32, { 1, 1, 1, 1 }},
                 {{ 0.02f }, ngraph::element::f32, {1, 1, 1, 1}}
             },
-            op::Constant::create(ngraph::element::f32, ngraph::Shape{}, std::vector<float>{ 2.f }),
-            { 255ul, Shape({ 1, 1, 1, 1 }), { 0.f }, { 254.f }, { -1.27f }, { 1.27f } },
+            op::Constant::create(ngraph::element::f32, ngraph::Shape{}, std::vector<float>{ -1.25f }),
+            {},
             ngraph::element::f32,
             {}
         }
@@ -356,8 +356,8 @@ const std::vector<ConvolutionTransformationTestValues> testValues = {
         {
             ngraph::element::f32,
             {{}, {}, { {0.02f}, element::f32 }},
-            op::Constant::create(ngraph::element::f32, ngraph::Shape{}, std::vector<float>{ 2.f }),
-            { 255ul, Shape({ 1, 1, 1, 1 }), { 0.f }, { 254.f }, { -1.27f }, { 1.27f } },
+            op::Constant::create(ngraph::element::f32, ngraph::Shape{}, std::vector<float>{ -1.25f }),
+            {},
             ngraph::element::f32,
             {}
         }
@@ -396,8 +396,8 @@ const std::vector<ConvolutionTransformationTestValues> testValues = {
         {
             ngraph::element::u8,
             {{element::f32}, { 1000.f }, { {0.02f}, element::f32 }},
-            op::Constant::create(ngraph::element::f32, ngraph::Shape{}, std::vector<float>{ 2.f }),
-            { 255ul, Shape({ 1, 1, 1, 1 }), { 0.f }, { 254.f }, { -1.27f }, { 1.27f } },
+            op::Constant::create(ngraph::element::f32, ngraph::Shape{}, std::vector<float>{ -1.25f }),
+            {},
             ngraph::element::f32,
             {}
         }
diff --git a/inference-engine/tests/functional/inference_engine/lp_transformations/group_convolution_transformation.cpp b/inference-engine/tests/functional/inference_engine/lp_transformations/group_convolution_transformation.cpp
index 7ba3252999e81f..d90999bb8ccad4 100644
--- a/inference-engine/tests/functional/inference_engine/lp_transformations/group_convolution_transformation.cpp
+++ b/inference-engine/tests/functional/inference_engine/lp_transformations/group_convolution_transformation.cpp
@@ -160,8 +160,8 @@ const std::vector<GroupConvolutionTestValues> testValues = {
         {
             ngraph::element::u8,
             {{ ngraph::element::f32 }, { 128.f }, { 0.02f }},
-            op::Constant::create(ngraph::element::f32, ngraph::Shape{}, std::vector<float>{ 2.f }),
-            { 255ul, Shape({ 1, 1, 1, 1 }), { 0.f }, { 254.f }, { -1.27f }, { 1.27f } },
+            op::Constant::create(ngraph::element::f32, ngraph::Shape{}, std::vector<float>{ -1.25f }),
+            {},
             {},
             ngraph::element::f32,
             {}
@@ -286,8 +286,8 @@ const std::vector<GroupConvolutionTestValues> testValues = {
         {
             ngraph::element::f32,
             {{}, {}, { 0.02f }},
-            op::Constant::create(ngraph::element::f32, ngraph::Shape{}, std::vector<float>{ 2.f }),
-            { 255ul, Shape({ 1, 1, 1, 1 }), { 0.f }, { 254.f }, { -1.27f }, { 1.27f } },
+            op::Constant::create(ngraph::element::f32, ngraph::Shape{}, std::vector<float>{ -1.25f }),
+            {},
             {},
             ngraph::element::f32,
             {}
@@ -459,8 +459,8 @@ const std::vector<GroupConvolutionTestValues> testValues = {
         {
             ngraph::element::f32,
             {{}, {}, { 0.02f }},
-            op::Constant::create(ngraph::element::f32, ngraph::Shape{}, std::vector<float>{ 2.f }),
-            { 255ul, Shape({ 1, 1, 1, 1 }), { 0.f }, { 254.f }, { -1.27f }, { 1.27f } },
+            op::Constant::create(ngraph::element::f32, ngraph::Shape{}, std::vector<float>{ -1.25f }),
+            {},
             {},
             ngraph::element::f32,
             {}
diff --git a/inference-engine/tests/functional/inference_engine/transformations/convert_deconvolution_test.cpp b/inference-engine/tests/functional/inference_engine/transformations/convert_deconvolution_test.cpp
index afe15697f0be77..51f60318f1824a 100644
--- a/inference-engine/tests/functional/inference_engine/transformations/convert_deconvolution_test.cpp
+++ b/inference-engine/tests/functional/inference_engine/transformations/convert_deconvolution_test.cpp
@@ -60,7 +60,7 @@ class ConvertDeconvolutionTest: public CommonTestUtils::TestsCommon,
         auto input = std::make_shared<ngraph::opset1::Parameter>(ngraph::element::f32, input_shape);
         auto weights = ngraph::opset1::Constant::create(ngraph::element::f32, weights_shape, {1});
         auto conv = std::make_shared<ngraph::op::DeconvolutionIE>(input, weights, ngraph::Strides(spatial_dims, 1), ngraph::Strides(spatial_dims, 1),
-                ngraph::CoordinateDiff(spatial_dims, 0), ngraph::CoordinateDiff(spatial_dims, 0));
+                ngraph::CoordinateDiff(spatial_dims, 0), ngraph::CoordinateDiff(spatial_dims, 0), ngraph::element::f32);
 
         return std::make_shared<ngraph::Function>(ngraph::NodeVector{conv}, ngraph::ParameterVector{input});
     }
diff --git a/inference-engine/tests/functional/plugin/cpu/shared_tests_instances/low_precision_transformations/convolution_backprop_data_transformation.cpp b/inference-engine/tests/functional/plugin/cpu/shared_tests_instances/low_precision_transformations/convolution_backprop_data_transformation.cpp
new file mode 100644
index 00000000000000..64ce304a24756f
--- /dev/null
+++ b/inference-engine/tests/functional/plugin/cpu/shared_tests_instances/low_precision_transformations/convolution_backprop_data_transformation.cpp
@@ -0,0 +1,100 @@
+// Copyright (C) 2021 Intel Corporation
+// SPDX-License-Identifier: Apache-2.0
+//
+
+#include <vector>
+
+#include "low_precision_transformations/convolution_backprop_data_transformation.hpp"
+#include "common_test_utils/test_constants.hpp"
+
+using namespace LayerTestsDefinitions;
+
+namespace {
+const std::vector<ngraph::element::Type> netPrecisions = {
+    ngraph::element::f32
+};
+
+const std::vector<ngraph::pass::low_precision::LayerTransformation::Params> trasformationParamValues = {
+    LayerTestsUtils::LayerTransformationParamsNGraphFactory::createParams().setUpdatePrecisions(true),
+    LayerTestsUtils::LayerTransformationParamsNGraphFactory::createParams().setUpdatePrecisions(false)
+};
+
+const std::vector<LayerTestsDefinitions::ConvolutionBackpropDataTransformationParam> params = {
+    // FQ on weights
+    // with zero point
+    {
+        {256ul, ngraph::Shape{1, 1, 1, 1}, { 0.f }, { 255.f }, { -12.7f }, { 12.8f }},
+        {255ul, ngraph::Shape{1, 1, 1, 1}, { 0.f }, { 254.f }, { -127.f }, { 127.f }},
+        "",
+        ""
+    },
+    // without zero point
+    {
+        {256ul, ngraph::Shape{1, 1, 1, 1}, { 0.f }, { 255.f }, { 0.f }, { 25.5f }},
+        {255ul, ngraph::Shape{1, 1, 1, 1}, { 0.f }, { 254.f }, { 0.f }, { 25.4f }},
+        "",
+        ""
+    },
+    // with incorrect zero point on activations
+    {
+        {256ul, ngraph::Shape{1, 1, 1, 1}, { 5.f }, { 6.f }, { 5.f }, { 6.f }},
+        {255ul, ngraph::Shape{1, 1, 1, 1}, { 0.f }, { 254.f }, { 0.f }, { 25.4f }},
+        "",
+        ""
+    },
+    // with incorrect zero point on weights
+    {
+        {256ul, ngraph::Shape{1, 1, 1, 1}, { 0.f }, { 255.f }, { 0.f }, { 25.5f }},
+        {255ul, ngraph::Shape{1, 1, 1, 1}, { 5.f }, { 6.f }, { 5.f }, { 6.f }},
+        "",
+        ""
+    },
+    // QDq on weights
+    // with zero point
+    {
+        {256ul, ngraph::Shape{1, 1, 1, 1}, { 0.f }, { 255.f }, { -12.7f }, { 12.8f }},
+        {{ngraph::element::f32}, { {12.f}, ngraph::element::f32, {}, false }, { {4.f}, ngraph::element::f32, {}, false }},
+        "",
+        ""
+    },
+    // without zero point
+    {
+        {256ul, ngraph::Shape{1, 1, 1, 1}, { 0.f }, { 255.f }, { 0.f }, { 25.5f }},
+        {{ngraph::element::f32}, {}, { {4.f}, ngraph::element::f32, {}, false }},
+        "",
+        ""
+    },
+    // with incorrect zero point on activations
+    {
+        {256ul, ngraph::Shape{1, 1, 1, 1}, { 5.f }, { 6.f }, { 5.f }, { 6.f }},
+        {{ngraph::element::f32}, { {12.f}, ngraph::element::f32, {}, false }, { {4.f}, ngraph::element::f32, {}, false }},
+        "",
+                ""
+    },
+    // with incorrect zero point on weights
+    {
+        {256ul, ngraph::Shape{1, 1, 1, 1}, { 0.f }, { 255.f }, { -12.7f }, { 12.8f }},
+        {{ngraph::element::f32}, { {1000.f}, ngraph::element::f32, {}, false }, { {4.f}, ngraph::element::f32, {}, false }},
+        "",
+        ""
+    }
+};
+
+const std::vector<ngraph::Shape> inputShapes = {
+    { 1, 8, 16, 16 }
+};
+
+const std::vector<ngraph::Shape> outputShapes = {
+    { 16, 16 }
+};
+
+INSTANTIATE_TEST_CASE_P(smoke_LPT, ConvolutionBackpropDataTransformation,
+    ::testing::Combine(
+        ::testing::ValuesIn(netPrecisions),
+        ::testing::ValuesIn(inputShapes),
+        ::testing::ValuesIn(outputShapes),
+        ::testing::Values(CommonTestUtils::DEVICE_CPU),
+        ::testing::ValuesIn(trasformationParamValues),
+        ::testing::ValuesIn(params)),
+    ConvolutionBackpropDataTransformation::getTestCaseName);
+}  // namespace
diff --git a/inference-engine/tests/functional/plugin/gpu/shared_tests_instances/low_precision_transformations/convolution_backprop_data_transformation.cpp b/inference-engine/tests/functional/plugin/gpu/shared_tests_instances/low_precision_transformations/convolution_backprop_data_transformation.cpp
new file mode 100644
index 00000000000000..d33e3c42f9e242
--- /dev/null
+++ b/inference-engine/tests/functional/plugin/gpu/shared_tests_instances/low_precision_transformations/convolution_backprop_data_transformation.cpp
@@ -0,0 +1,103 @@
+// Copyright (C) 2021 Intel Corporation
+// SPDX-License-Identifier: Apache-2.0
+//
+
+#include <vector>
+
+#include "low_precision_transformations/convolution_backprop_data_transformation.hpp"
+#include "common_test_utils/test_constants.hpp"
+
+using namespace LayerTestsDefinitions;
+
+namespace {
+const std::vector<ngraph::element::Type> netPrecisions = {
+        ngraph::element::f32,
+        ngraph::element::f16
+};
+
+const std::vector<ngraph::pass::low_precision::LayerTransformation::Params> trasformationParamValues = {
+        LayerTestsUtils::LayerTransformationParamsNGraphFactory::createParams().setUpdatePrecisions(true),
+        LayerTestsUtils::LayerTransformationParamsNGraphFactory::createParams().setUpdatePrecisions(false)
+};
+
+const std::vector<LayerTestsDefinitions::ConvolutionBackpropDataTransformationParam> params = {
+    // FQ on weights
+    // with zero point
+    {
+        {256ul, ngraph::Shape{1, 1, 1, 1}, { 0.f }, { 255.f }, { -12.7f }, { 12.8f }},
+        {255ul, ngraph::Shape{1, 1, 1, 1}, { 0.f }, { 254.f }, { -127.f }, { 127.f }},
+        "",
+        ""
+    },
+    // without zero point
+    {
+        {256ul, ngraph::Shape{1, 1, 1, 1}, { 0.f }, { 255.f }, { 0.f }, { 25.5f }},
+        {255ul, ngraph::Shape{1, 1, 1, 1}, { 0.f }, { 254.f }, { 0.f }, { 25.4f }},
+        "",
+        ""
+    },
+    // TODO: check fails in CI
+//    // with incorrect zero point on activations
+//    {
+//        {256ul, ngraph::Shape{1, 1, 1, 1}, { 5.f }, { 6.f }, { 5.f }, { 6.f }},
+//        {255ul, ngraph::Shape{1, 1, 1, 1}, { 0.f }, { 254.f }, { 0.f }, { 25.4f }},
+//        "",
+//        ""
+//    },
+//    // with incorrect zero point on weights
+//    {
+//        {256ul, ngraph::Shape{1, 1, 1, 1}, { 0.f }, { 255.f }, { 0.f }, { 25.5f }},
+//        {255ul, ngraph::Shape{1, 1, 1, 1}, { 5.f }, { 6.f }, { 5.f }, { 6.f }},
+//        "",
+//        ""
+//    },
+    // QDq on weights
+    // with zero point
+    {
+        {256ul, ngraph::Shape{1, 1, 1, 1}, { 0.f }, { 255.f }, { -12.7f }, { 12.8f }},
+        {{ngraph::element::f32}, { {12.f}, ngraph::element::f32, {}, false }, { {4.f}, ngraph::element::f32, {}, false }},
+        "",
+        ""
+    },
+    // without zero point
+    {
+        {256ul, ngraph::Shape{1, 1, 1, 1}, { 0.f }, { 255.f }, { 0.f }, { 25.5f }},
+        {{ngraph::element::f32}, {}, { {4.f}, ngraph::element::f32, {}, false }},
+        "",
+        ""
+    },
+    // with incorrect zero point on activations
+    {
+        {256ul, ngraph::Shape{1, 1, 1, 1}, { 5.f }, { 6.f }, { 5.f }, { 6.f }},
+        {{ngraph::element::f32}, { {12.f}, ngraph::element::f32, {}, false }, { {4.f}, ngraph::element::f32, {}, false }},
+        "",
+        ""
+    },
+    // with incorrect zero point on weights
+    {
+        {256ul, ngraph::Shape{1, 1, 1, 1}, { 0.f }, { 255.f }, { -12.7f }, { 12.8f }},
+        {{ngraph::element::f32}, { {1000.f}, ngraph::element::f32, {}, false }, { {4.f}, ngraph::element::f32, {}, false }},
+        "",
+        ""
+    }
+};
+
+const std::vector<ngraph::Shape> inputShapes = {
+        { 1, 8, 16, 16 },
+        { 1, 32, 16, 16 }
+};
+
+const std::vector<ngraph::Shape> outputShapes = {
+        { 16, 16 }
+};
+
+INSTANTIATE_TEST_CASE_P(smoke_LPT, ConvolutionBackpropDataTransformation,
+    ::testing::Combine(
+            ::testing::ValuesIn(netPrecisions),
+            ::testing::ValuesIn(inputShapes),
+            ::testing::ValuesIn(outputShapes),
+            ::testing::Values(CommonTestUtils::DEVICE_GPU),
+            ::testing::ValuesIn(trasformationParamValues),
+            ::testing::ValuesIn(params)),
+    ConvolutionBackpropDataTransformation::getTestCaseName);
+}  // namespace
diff --git a/inference-engine/tests/functional/plugin/shared/include/low_precision_transformations/convolution_backprop_data_transformation.hpp b/inference-engine/tests/functional/plugin/shared/include/low_precision_transformations/convolution_backprop_data_transformation.hpp
new file mode 100644
index 00000000000000..39d5ea583916e5
--- /dev/null
+++ b/inference-engine/tests/functional/plugin/shared/include/low_precision_transformations/convolution_backprop_data_transformation.hpp
@@ -0,0 +1,65 @@
+// Copyright (C) 2021 Intel Corporation
+// SPDX-License-Identifier: Apache-2.0
+//
+
+#pragma once
+
+#include <string>
+#include <memory>
+#include <utility>
+
+
+#include "shared_test_classes/base/low_precision_transformations/layer_transformation.hpp"
+#include "lpt_ngraph_functions/common/fake_quantize_on_data.hpp"
+#include "lpt_ngraph_functions/common/fake_quantize_on_weights.hpp"
+#include "lpt_ngraph_functions/common/dequantization_operations.hpp"
+
+namespace LayerTestsDefinitions {
+
+class ConvolutionBackpropDataTransformationParam {
+public:
+    ngraph::builder::subgraph::FakeQuantizeOnData fakeQuantizeOnData;
+    ngraph::builder::subgraph::FakeQuantizeOnWeights fakeQuantizeOnWeights;
+    ngraph::builder::subgraph::DequantizationOperations dequantizationOnWeights;
+    std::string layerName;
+    std::string expectedKernelType;
+
+    ConvolutionBackpropDataTransformationParam() = default;
+    ConvolutionBackpropDataTransformationParam(
+        const ngraph::builder::subgraph::FakeQuantizeOnData& fakeQuantizeOnData,
+        const ngraph::builder::subgraph::FakeQuantizeOnWeights& fakeQuantizeOnWeights,
+        std::string layerName,
+        std::string expectedKernelType) :
+        fakeQuantizeOnData(fakeQuantizeOnData), fakeQuantizeOnWeights(fakeQuantizeOnWeights),
+        layerName(std::move(layerName)), expectedKernelType(std::move(expectedKernelType)) {}
+    ConvolutionBackpropDataTransformationParam(
+        const ngraph::builder::subgraph::FakeQuantizeOnData& fakeQuantizeOnData,
+        ngraph::builder::subgraph::DequantizationOperations  dequantizationOnWeights,
+        std::string layerName,
+        std::string expectedKernelType) :
+        fakeQuantizeOnData(fakeQuantizeOnData), dequantizationOnWeights(std::move(dequantizationOnWeights)),
+        layerName(std::move(layerName)), expectedKernelType(std::move(expectedKernelType)) {}
+};
+
+typedef std::tuple<
+    ngraph::element::Type, // netPrecision
+    ngraph::Shape,         // inputShape
+    ngraph::Shape,         // outputShape
+    std::string,           // targetDevice
+    ngraph::pass::low_precision::LayerTransformation::Params,
+    ConvolutionBackpropDataTransformationParam
+> ConvolutionBackpropDataTransformationParams;
+
+class ConvolutionBackpropDataTransformation :
+    public testing::WithParamInterface<ConvolutionBackpropDataTransformationParams>,
+    public LayerTestsUtils::LayerTransformation {
+public:
+    static std::string getTestCaseName(testing::TestParamInfo<ConvolutionBackpropDataTransformationParams> obj);
+
+protected:
+    void SetUp() override;
+
+    void Run() override;
+};
+
+}  // namespace LayerTestsDefinitions
diff --git a/inference-engine/tests/functional/plugin/shared/src/low_precision_transformations/convolution_backprop_data_transformation.cpp b/inference-engine/tests/functional/plugin/shared/src/low_precision_transformations/convolution_backprop_data_transformation.cpp
new file mode 100644
index 00000000000000..951af4fdd4e0e0
--- /dev/null
+++ b/inference-engine/tests/functional/plugin/shared/src/low_precision_transformations/convolution_backprop_data_transformation.cpp
@@ -0,0 +1,77 @@
+// Copyright (C) 2021 Intel Corporation
+// SPDX-License-Identifier: Apache-2.0
+//
+
+#include "low_precision_transformations/convolution_backprop_data_transformation.hpp"
+
+#include <tuple>
+#include <vector>
+#include <string>
+
+#include "lpt_ngraph_functions/convolution_backprop_data_function.hpp"
+
+namespace LayerTestsDefinitions {
+
+std::string ConvolutionBackpropDataTransformation::getTestCaseName(testing::TestParamInfo<ConvolutionBackpropDataTransformationParams> obj) {
+    ngraph::element::Type netPrecision;
+    ngraph::Shape inputShape;
+    ngraph::Shape outputShape;
+    std::string targetDevice;
+    ngraph::pass::low_precision::LayerTransformation::Params params;
+    ConvolutionBackpropDataTransformationParam param;
+    std::tie(netPrecision, inputShape, outputShape, targetDevice, params, param) = obj.param;
+
+    std::ostringstream result;
+    result << getTestCaseNameByParams(netPrecision, inputShape, targetDevice, params) << "_" <<
+        outputShape << "_" <<
+        param.fakeQuantizeOnData << "_" <<
+        param.fakeQuantizeOnWeights << "_" <<
+        param.dequantizationOnWeights;
+    return result.str();
+}
+
+void ConvolutionBackpropDataTransformation::SetUp() {
+    threshold = 0.1f;
+
+    ngraph::element::Type netPrecision;
+    ngraph::Shape inputShape;
+    ngraph::Shape outputShape;
+    ngraph::pass::low_precision::LayerTransformation::Params params;
+    ConvolutionBackpropDataTransformationParam param;
+    std::tie(netPrecision, inputShape, outputShape, targetDevice, params, param) = this->GetParam();
+
+    std::shared_ptr<ngraph::Node> weights;
+
+    if (!param.fakeQuantizeOnWeights.empty()) {
+        weights = ngraph::builder::subgraph::ConvolutionBackpropDataFunction::getWeights(
+            ngraph::Shape{inputShape[1], inputShape[1] / 2, 1, 1},
+            netPrecision,
+            param.fakeQuantizeOnWeights);
+    } else {
+        weights = ngraph::builder::subgraph::ConvolutionBackpropDataFunction::getWeights(
+            ngraph::Shape{inputShape[1], inputShape[1] / 2, 1, 1},
+            netPrecision,
+            param.dequantizationOnWeights);
+    }
+
+    function = ngraph::builder::subgraph::ConvolutionBackpropDataFunction::get(
+        netPrecision,
+        inputShape,
+        outputShape,
+        param.fakeQuantizeOnData,
+        weights);
+}
+
+void ConvolutionBackpropDataTransformation::Run() {
+    LayerTestsCommon::Run();
+
+    const auto params = std::get<5>(GetParam());
+    const auto actualType = getRuntimePrecision(params.layerName);
+    EXPECT_EQ(actualType, params.expectedKernelType);
+}
+
+TEST_P(ConvolutionBackpropDataTransformation, CompareWithRefImpl) {
+    Run();
+};
+
+}  // namespace LayerTestsDefinitions
diff --git a/inference-engine/tests/ngraph_helpers/lpt_ngraph_functions/include/lpt_ngraph_functions/convolution_backprop_data_function.hpp b/inference-engine/tests/ngraph_helpers/lpt_ngraph_functions/include/lpt_ngraph_functions/convolution_backprop_data_function.hpp
new file mode 100644
index 00000000000000..fa05d7b3cb18cd
--- /dev/null
+++ b/inference-engine/tests/ngraph_helpers/lpt_ngraph_functions/include/lpt_ngraph_functions/convolution_backprop_data_function.hpp
@@ -0,0 +1,54 @@
+// Copyright (C) 2021 Intel Corporation
+// SPDX-License-Identifier: Apache-2.0
+//
+
+#pragma once
+
+#include <ngraph/ngraph.hpp>
+#include <ngraph/opsets/opset1.hpp>
+
+#include "lpt_ngraph_functions/common/fake_quantize_on_weights.hpp"
+#include "lpt_ngraph_functions/common/fake_quantize_on_data.hpp"
+#include "lpt_ngraph_functions/common/dequantization_operations.hpp"
+
+namespace ngraph {
+namespace builder {
+namespace subgraph {
+
+class ConvolutionBackpropDataFunction {
+public:
+    static std::shared_ptr<Node> getWeights(
+        const Shape& shape,
+        const element::Type& netPrecision,
+        const builder::subgraph::DequantizationOperations& dequantizationOnWeights,
+        const std::shared_ptr<opset1::Constant>& value = nullptr);
+    static std::shared_ptr<Node> getWeights(
+        const Shape& shape,
+        const element::Type& netPrecision,
+        const builder::subgraph::FakeQuantizeOnWeights& fqOnWeights,
+        const std::shared_ptr<opset1::Constant>& value = nullptr);
+    static std::shared_ptr<Function> get(
+        const element::Type netPrecision,
+        const Shape& inputShape,
+        const Shape& outputShape,
+        const builder::subgraph::FakeQuantizeOnData& fqOnData,
+        const std::shared_ptr<Node>& weights);
+    static std::shared_ptr<Function> getOriginal(
+        const element::Type precision,
+        const element::Type netPrecision,
+        const Shape& inputShape,
+        const Shape& outputShape,
+        const builder::subgraph::DequantizationOperations& dequantization,
+        const std::shared_ptr<Node>& weights);
+    static std::shared_ptr<Function> getReference(
+        const element::Type precision,
+        const element::Type netPrecision,
+        const Shape& inputShape,
+        const Shape& outputShape,
+        const builder::subgraph::DequantizationOperations& dequantization,
+        const std::shared_ptr<Node>& weights,
+        const builder::subgraph::DequantizationOperations& dequantizationAfter);
+};
+}  // namespace subgraph
+}  // namespace builder
+}  // namespace ngraph
diff --git a/inference-engine/tests/ngraph_helpers/lpt_ngraph_functions/src/convolution_backprop_data_function.cpp b/inference-engine/tests/ngraph_helpers/lpt_ngraph_functions/src/convolution_backprop_data_function.cpp
new file mode 100644
index 00000000000000..ae7d3847f69866
--- /dev/null
+++ b/inference-engine/tests/ngraph_helpers/lpt_ngraph_functions/src/convolution_backprop_data_function.cpp
@@ -0,0 +1,149 @@
+// Copyright (C) 2021 Intel Corporation
+// SPDX-License-Identifier: Apache-2.0
+//
+
+#include "lpt_ngraph_functions/convolution_backprop_data_function.hpp"
+
+#include <ngraph/opsets/opset1.hpp>
+#include <ngraph_ops/type_relaxed.hpp>
+#include "ngraph_functions/subgraph_builders.hpp"
+#include "low_precision/network_helper.hpp"
+
+#include "lpt_ngraph_functions/common/fake_quantize_on_weights.hpp"
+#include "lpt_ngraph_functions/common/fake_quantize_on_data.hpp"
+#include "lpt_ngraph_functions/common/dequantization_operations.hpp"
+#include "lpt_ngraph_functions/common/builders.hpp"
+#include "low_precision/common/dequantization_op.hpp"
+#include "low_precision/network_helper.hpp"
+
+using namespace ngraph::pass::low_precision;
+
+namespace ngraph {
+namespace builder {
+namespace subgraph {
+
+std::shared_ptr<Function> ConvolutionBackpropDataFunction::get(
+    const element::Type netPrecision,
+    const Shape& inputShape,
+    const Shape& outputShape,
+    const builder::subgraph::FakeQuantizeOnData& fqOnData,
+    const std::shared_ptr<Node>& weights) {
+    const auto input = std::make_shared<opset1::Parameter>(netPrecision, inputShape);
+    const auto fq = makeFakeQuantize(input, netPrecision, fqOnData);
+
+    auto convolutionBackpropData = std::make_shared<opset1::ConvolutionBackpropData>(
+        fq,
+        weights,
+        Strides{ 1, 1 },
+        CoordinateDiff{ 0, 0 },
+        CoordinateDiff{ 0, 0 },
+        Strides{ 1, 1 });
+
+    ngraph::ResultVector results{ std::make_shared<opset1::Result>(convolutionBackpropData) };
+    return std::make_shared<ngraph::Function>(results, ParameterVector{ input }, "ConvolutionBackpropDataTransformation");
+}
+
+std::shared_ptr<Node> ConvolutionBackpropDataFunction::getWeights(
+    const Shape& shape,
+    const element::Type& netPrecision,
+    const builder::subgraph::FakeQuantizeOnWeights& fqOnWeights,
+    const std::shared_ptr<opset1::Constant>& value) {
+    const auto weights = value != nullptr ?
+            value :
+            std::make_shared<opset1::Constant>(
+            element::i8,
+            shape,
+            std::vector<float>(shape_size(shape), 1));
+    const auto convert = std::make_shared<opset1::Convert>(weights, netPrecision);
+    OutputVector convertedOutput(1);
+    convert->constant_fold(convertedOutput, convert->input_values());
+    const auto convertedWeights = convertedOutput[0].get_node_shared_ptr();
+    const auto fq = makeFakeQuantize(convertedWeights, netPrecision, fqOnWeights);
+
+    return fq;
+}
+
+std::shared_ptr<Node> ConvolutionBackpropDataFunction::getWeights(
+    const Shape& shape,
+    const element::Type& netPrecision,
+    const builder::subgraph::DequantizationOperations& dequantizationOnWeights,
+    const std::shared_ptr<opset1::Constant>& value) {
+    auto weights =
+        value != nullptr ?
+            value :
+            std::make_shared<opset1::Constant>(
+                element::i8,
+                shape,
+                std::vector<float>(shape_size(shape), 1));
+    auto dequantizationStructure = dequantizationOnWeights;
+    dequantizationStructure.setPrecision(netPrecision);
+    if (!dequantizationOnWeights.subtract.constantPrecision.is_real()) {
+        dequantizationStructure.subtract.constantPrecision = dequantizationOnWeights.subtract.constantPrecision;
+    }
+    if (weights->get_element_type().is_real()) {
+        weights = as_type_ptr<opset1::Constant>(fold<opset1::Convert>(weights, netPrecision));
+    }
+    const auto dq = makeDequantization(weights, dequantizationStructure);
+
+    return dq;
+}
+
+std::shared_ptr<Function> ConvolutionBackpropDataFunction::getOriginal(
+    const element::Type precision,
+    const element::Type netPrecision,
+    const Shape& inputShape,
+    const Shape& outputShape,
+    const builder::subgraph::DequantizationOperations& dequantization,
+    const std::shared_ptr<Node>& weights) {
+    const auto input = std::make_shared<opset1::Parameter>(precision, inputShape);
+    auto dequantizationStructure = dequantization;
+    dequantizationStructure.multiply.outPrecision = netPrecision;
+    const auto activations = makeDequantization(input, dequantizationStructure);
+
+    auto convolutionBackpropData = std::make_shared<opset1::ConvolutionBackpropData>(
+            activations,
+            weights,
+            Strides{ 1, 1 },
+            CoordinateDiff{ 0, 0 },
+            CoordinateDiff{ 0, 0 },
+            Strides{ 1, 1 });
+
+    convolutionBackpropData->set_friendly_name("output");
+    ngraph::ResultVector results{ std::make_shared<opset1::Result>(convolutionBackpropData) };
+    return std::make_shared<ngraph::Function>(results, ParameterVector{ input }, "ConvolutionBackpropDataTransformation");
+}
+
+std::shared_ptr<Function>  ConvolutionBackpropDataFunction::getReference(
+    const element::Type precision,
+    const element::Type netPrecision,
+    const Shape& inputShape,
+    const Shape& outputShape,
+    const builder::subgraph::DequantizationOperations& dequantization,
+    const std::shared_ptr<Node>& weights,
+    const builder::subgraph::DequantizationOperations& dequantizationAfter) {
+    const auto input = std::make_shared<opset1::Parameter>(precision, inputShape);
+    auto dequantizationStructure = dequantization;
+    dequantizationStructure.multiply.outPrecision = netPrecision;
+    const auto activations = makeDequantization(input, dequantizationStructure);
+
+    auto convolutionBackpropData = std::make_shared<op::TypeRelaxed<opset1::ConvolutionBackpropData>>(
+            std::vector<element::Type>{ element::f32, element::f32 },
+            std::vector<element::Type>{ dequantizationAfter.empty() ? netPrecision : element::f32 },
+            ngraph::op::TemporaryReplaceOutputType(activations, element::f32).get(),
+            ngraph::op::TemporaryReplaceOutputType(weights, element::f32).get(),
+            Strides{ 1, 1 },
+            CoordinateDiff{ 0, 0 },
+            CoordinateDiff{ 0, 0 },
+            Strides{ 1, 1 });
+
+    auto dequantizationStructureAfter = dequantizationAfter;
+    dequantizationStructureAfter.multiply.outPrecision = netPrecision;
+    const auto result = makeDequantization(convolutionBackpropData, dequantizationStructureAfter);
+    result->set_friendly_name("output");
+    ngraph::ResultVector results{ std::make_shared<opset1::Result>(result) };
+    return std::make_shared<ngraph::Function>(results, ParameterVector{ input }, "ConvolutionBackpropDataTransformation");
+}
+
+}  // namespace subgraph
+}  // namespace builder
+}  // namespace ngraph
diff --git a/ngraph/core/include/ngraph/op/convolution.hpp b/ngraph/core/include/ngraph/op/convolution.hpp
index c6516a5572a07f..72a365be533705 100644
--- a/ngraph/core/include/ngraph/op/convolution.hpp
+++ b/ngraph/core/include/ngraph/op/convolution.hpp
@@ -86,8 +86,8 @@ namespace ngraph
             class NGRAPH_API ConvolutionBackpropData : public Op
             {
             public:
-                static constexpr NodeTypeInfo type_info{"ConvolutionBackpropData", 1};
-                const NodeTypeInfo& get_type_info() const override { return type_info; }
+                NGRAPH_RTTI_DECLARATION;
+
                 /// \brief Constructs a batched-convolution data batch-backprop operation.
                 ConvolutionBackpropData() = default;
                 // clang-format off
diff --git a/ngraph/core/include/ngraph/op/group_conv.hpp b/ngraph/core/include/ngraph/op/group_conv.hpp
index 3c175b512976ff..ebda0392d96571 100644
--- a/ngraph/core/include/ngraph/op/group_conv.hpp
+++ b/ngraph/core/include/ngraph/op/group_conv.hpp
@@ -85,8 +85,8 @@ namespace ngraph
             class NGRAPH_API GroupConvolutionBackpropData : public Op
             {
             public:
-                static constexpr NodeTypeInfo type_info{"GroupConvolutionBackpropData", 1};
-                const NodeTypeInfo& get_type_info() const override { return type_info; }
+                NGRAPH_RTTI_DECLARATION;
+
                 /// \brief Constructs a batched-convolution data batch-backprop operation.
                 GroupConvolutionBackpropData();
                 // clang-format off
diff --git a/ngraph/core/src/op/convolution.cpp b/ngraph/core/src/op/convolution.cpp
index 667fa933046a1d..6be59d5132e7ff 100644
--- a/ngraph/core/src/op/convolution.cpp
+++ b/ngraph/core/src/op/convolution.cpp
@@ -102,12 +102,14 @@ shared_ptr<Node> op::v1::Convolution::clone_with_new_inputs(const OutputVector&
                                         m_auto_pad);
 }
 
-constexpr NodeTypeInfo op::v1::ConvolutionBackpropData::type_info;
 shared_ptr<Node> op::v1::Convolution::get_default_value() const
 {
     return ngraph::make_constant_from_string("0", get_element_type(), get_shape());
 }
 
+// *** ConvolutionBackpropData OP SET 1 ***
+NGRAPH_RTTI_DEFINITION(op::v1::ConvolutionBackpropData, "ConvolutionBackpropData", 1);
+
 op::v1::ConvolutionBackpropData::ConvolutionBackpropData(const Output<Node>& data,
                                                          const Output<Node>& filters,
                                                          const Output<Node>& output_shape,
diff --git a/ngraph/core/src/op/group_conv.cpp b/ngraph/core/src/op/group_conv.cpp
index 4efbcae117e00a..b9d7cc4be10148 100644
--- a/ngraph/core/src/op/group_conv.cpp
+++ b/ngraph/core/src/op/group_conv.cpp
@@ -286,7 +286,7 @@ shared_ptr<Node> op::v1::GroupConvolution::clone_with_new_inputs(const OutputVec
 //                        v1::GroupConvolutionBackpropData
 //------------------------------------------------------------------------------
 
-constexpr NodeTypeInfo op::v1::GroupConvolutionBackpropData::type_info;
+NGRAPH_RTTI_DEFINITION(op::v1::GroupConvolutionBackpropData, "GroupConvolutionBackpropData", 1);
 
 op::v1::GroupConvolutionBackpropData::GroupConvolutionBackpropData()
     : Op()

From 6a8f8a0761fb8c69fd554f07b9ca5ed40aec8cc6 Mon Sep 17 00:00:00 2001
From: Gabriele Galiero Casay <gabriele.galiero.casay@intel.com>
Date: Tue, 18 May 2021 06:30:10 +0200
Subject: [PATCH 06/16] Enable zero epsilon attribute in BatchNormInference
 operation (#5657)

---
 .../ops/normalization/BatchNormInference_1.md |  2 +-
 .../ops/normalization/BatchNormInference_5.md |  2 +-
 .../serialization/single_layer/batch_norm.cpp |  1 +
 .../single_layer_tests/batch_norm.cpp         |  1 +
 ngraph/core/src/op/batch_norm.cpp             |  8 ++---
 ngraph/test/type_prop/batch_norm.cpp          | 35 +++++++------------
 6 files changed, 21 insertions(+), 28 deletions(-)

diff --git a/docs/ops/normalization/BatchNormInference_1.md b/docs/ops/normalization/BatchNormInference_1.md
index 218111575bd91d..694a9989e9f0fb 100644
--- a/docs/ops/normalization/BatchNormInference_1.md
+++ b/docs/ops/normalization/BatchNormInference_1.md
@@ -58,7 +58,7 @@ For a particular activation, consider a mini-batch \f$\mathcal{B}\f$ of m values
 
 * *epsilon*
   * **Description**: *epsilon* is a constant added to the variance for numerical stability.
-  * **Range of values**: a positive floating-point number
+  * **Range of values**: a floating-point number greater than or equal to zero
   * **Type**: `float`
   * **Default value**: none
   * **Required**: *yes*
diff --git a/docs/ops/normalization/BatchNormInference_5.md b/docs/ops/normalization/BatchNormInference_5.md
index cec26e4b2ecf16..f5019d08b2d37e 100644
--- a/docs/ops/normalization/BatchNormInference_5.md
+++ b/docs/ops/normalization/BatchNormInference_5.md
@@ -58,7 +58,7 @@ For a particular activation, consider a mini-batch \f$\mathcal{B}\f$ of m values
 
 * *epsilon*
   * **Description**: *epsilon* is a constant added to the variance for numerical stability.
-  * **Range of values**: a positive floating-point number
+  * **Range of values**: a floating-point number greater than or equal to zero
   * **Type**: `float`
   * **Default value**: none
   * **Required**: *yes*
diff --git a/inference-engine/tests/functional/inference_engine/serialization/single_layer/batch_norm.cpp b/inference-engine/tests/functional/inference_engine/serialization/single_layer/batch_norm.cpp
index 04d878727b3e10..cc1cbf7dff2a73 100644
--- a/inference-engine/tests/functional/inference_engine/serialization/single_layer/batch_norm.cpp
+++ b/inference-engine/tests/functional/inference_engine/serialization/single_layer/batch_norm.cpp
@@ -20,6 +20,7 @@ const std::vector<InferenceEngine::Precision> netPrecisions = {
 };
 
 const std::vector<double> epsilon = {
+    0.0,
     1e-6,
     1e-5,
     1e-4
diff --git a/inference-engine/tests/functional/plugin/cpu/shared_tests_instances/single_layer_tests/batch_norm.cpp b/inference-engine/tests/functional/plugin/cpu/shared_tests_instances/single_layer_tests/batch_norm.cpp
index cbe867e859840e..753efd3acaa84c 100644
--- a/inference-engine/tests/functional/plugin/cpu/shared_tests_instances/single_layer_tests/batch_norm.cpp
+++ b/inference-engine/tests/functional/plugin/cpu/shared_tests_instances/single_layer_tests/batch_norm.cpp
@@ -15,6 +15,7 @@ const std::vector<InferenceEngine::Precision> netPrecisions = {
 };
 
 const std::vector<double> epsilon = {
+    0.0,
     1e-6,
     1e-5,
     1e-4
diff --git a/ngraph/core/src/op/batch_norm.cpp b/ngraph/core/src/op/batch_norm.cpp
index 1f772cf67daa3a..57a4ce7f6f7568 100644
--- a/ngraph/core/src/op/batch_norm.cpp
+++ b/ngraph/core/src/op/batch_norm.cpp
@@ -42,8 +42,8 @@ void op::v0::BatchNormInference::validate_and_infer_types()
 
     NODE_VALIDATION_CHECK(
         this,
-        m_epsilon > 0,
-        "Attribute 'epsilon' must have non-zero positive floating-point value. Got: ",
+        m_epsilon >= 0,
+        "Attribute 'epsilon' must be a floating-point value greater than or equal to zero. Got: ",
         m_epsilon);
 
     set_output_size(1);
@@ -102,8 +102,8 @@ void op::v5::BatchNormInference::validate_and_infer_types()
 
     NODE_VALIDATION_CHECK(
         this,
-        m_epsilon > 0,
-        "Attribute 'epsilon' must have non-zero positive floating-point value. Got: ",
+        m_epsilon >= 0,
+        "Attribute 'epsilon' must be a floating-point value greater than or equal to zero. Got: ",
         m_epsilon);
 
     set_output_size(1);
diff --git a/ngraph/test/type_prop/batch_norm.cpp b/ngraph/test/type_prop/batch_norm.cpp
index 13abbdf5a4f942..8a1fafd95b62e6 100644
--- a/ngraph/test/type_prop/batch_norm.cpp
+++ b/ngraph/test/type_prop/batch_norm.cpp
@@ -497,29 +497,20 @@ TYPED_TEST_P(BatchNormTest, batch_norm_inference_invalid_epsilon)
         {inputs_et, PartialShape{100}, "variance"}
     };
 
-    double eps_zero = 0.0;
     double eps_neg = -1.0;
-
-    const std::vector<BatchNormInferParams> bn_tests{
-        BatchNormInferParams{inputs_et, data_batch_shape, ch_inputs, eps_zero},
-        BatchNormInferParams{inputs_et, data_batch_shape, ch_inputs, eps_neg}
-    };
-
-    for(const auto& params : bn_tests)
+    const BatchNormInferParams params{inputs_et, data_batch_shape, ch_inputs, eps_neg};
+    try
     {
-        try
-        {
-            auto bn = makeBatchNormOp<TypeParam>(params);
-            FAIL() << "Invalid 'epsilon' attribute value not detected";
-        }
-        catch (const NodeValidationFailure& error)
-        {
-            EXPECT_HAS_SUBSTRING(error.what(), "Attribute 'epsilon' must have non-zero positive floating-point value.");
-        }
-        catch (...)
-        {
-            FAIL() << "Positive 'epsilon' attribute value check failed for unexpected reason";
-        }
+        auto bn = makeBatchNormOp<TypeParam>(params);
+        FAIL() << "Invalid 'epsilon' attribute value not detected";
+    }
+    catch (const NodeValidationFailure& error)
+    {
+        EXPECT_HAS_SUBSTRING(error.what(), "Attribute 'epsilon' must be a floating-point value greater than or equal to zero.");
+    }
+    catch (...)
+    {
+        FAIL() << "Non-negative 'epsilon' attribute value check failed for unexpected reason";
     }
 }
 
@@ -542,4 +533,4 @@ REGISTER_TYPED_TEST_CASE_P(
     batch_norm_inference_invalid_epsilon);
 
 using Types = ::testing::Types<op::v0::BatchNormInference, op::v5::BatchNormInference>;
-INSTANTIATE_TYPED_TEST_CASE_P(type_prop, BatchNormTest, Types, );
+INSTANTIATE_TYPED_TEST_CASE_P(type_prop, BatchNormTest, Types);

From 61108f1147ea9a022b25c1eaf1358268e97ecfad Mon Sep 17 00:00:00 2001
From: Maksim Derbasov <maksim.derbasov@intel.com>
Date: Tue, 18 May 2021 07:32:53 +0300
Subject: [PATCH 07/16] Fix warnings, cl compiler (#5641)

* Fix warnings

* make cpplint happy
---
 .../mkldnn_plugin/mkldnn/iml_type_mapper.cpp  |  2 +-
 .../src/mkldnn_plugin/nodes/cum_sum.cpp       |  2 +-
 .../plugin/cpu/bfloat16/bfloat16_helpers.hpp  |  2 +-
 .../include/behavior/set_preprocess.hpp       | 20 +++++++++----------
 .../shared/src/behavior/stress_tests.cpp      |  2 +-
 .../common_test_utils/ngraph_test_utils.cpp   |  3 ---
 .../common_test_utils/ngraph_test_utils.hpp   |  8 +++++++-
 ngraph/core/include/ngraph/op/constant.hpp    | 18 +++++++++--------
 8 files changed, 31 insertions(+), 26 deletions(-)

diff --git a/inference-engine/src/mkldnn_plugin/mkldnn/iml_type_mapper.cpp b/inference-engine/src/mkldnn_plugin/mkldnn/iml_type_mapper.cpp
index bdc6795b13731f..c7278944df0cba 100644
--- a/inference-engine/src/mkldnn_plugin/mkldnn/iml_type_mapper.cpp
+++ b/inference-engine/src/mkldnn_plugin/mkldnn/iml_type_mapper.cpp
@@ -9,7 +9,7 @@ using namespace MKLDNNPlugin;
 impl_desc_type MKLDNNPlugin::parse_impl_name(std::string impl_desc_name) {
     impl_desc_type res = impl_desc_type::unknown;
 
-#define REPLACE_WORD(_wrd, _sub) int pos = impl_desc_name.find(#_wrd); \
+#define REPLACE_WORD(_wrd, _sub) auto pos = impl_desc_name.find(#_wrd); \
     if (pos != std::string::npos) impl_desc_name.replace(pos, std::string(#_wrd).length(), #_sub);
 
     REPLACE_WORD(simple, ref);
diff --git a/inference-engine/src/mkldnn_plugin/nodes/cum_sum.cpp b/inference-engine/src/mkldnn_plugin/nodes/cum_sum.cpp
index 2ed69db46b1892..8940527713cd36 100644
--- a/inference-engine/src/mkldnn_plugin/nodes/cum_sum.cpp
+++ b/inference-engine/src/mkldnn_plugin/nodes/cum_sum.cpp
@@ -159,7 +159,7 @@ class CumSumImpl: public ExtLayerBase {
             for (size_t iwork = start; iwork < end; ++iwork) {
                 std::vector<size_t> forStartOffset(numOfDims);
                 forStartOffset[axis] = 0;
-                for (int64_t offsetIdx = 0, countersIdx = 0; offsetIdx < numOfDims; ++offsetIdx) {
+                for (size_t offsetIdx = 0, countersIdx = 0; offsetIdx < numOfDims; ++offsetIdx) {
                     if (offsetIdx == axis) {
                         continue;
                     }
diff --git a/inference-engine/tests/functional/plugin/cpu/bfloat16/bfloat16_helpers.hpp b/inference-engine/tests/functional/plugin/cpu/bfloat16/bfloat16_helpers.hpp
index 5fbe3d6c89be2c..2e0d4025b41dca 100644
--- a/inference-engine/tests/functional/plugin/cpu/bfloat16/bfloat16_helpers.hpp
+++ b/inference-engine/tests/functional/plugin/cpu/bfloat16/bfloat16_helpers.hpp
@@ -143,7 +143,7 @@ class BasicBF16Test : public testing::WithParamInterface<basicParams>,
     InferenceEngine::SizeVector inputShapes, newInputShapes;
     InferenceEngine::Precision inputPrecision, netPrecision;
     std::map<std::string, std::string> expectedPrecisions;
-    float threshold = 2e-2;  // Is enough for tensor having abs maximum values less than 1
+    float threshold = 2e-2f;  // Is enough for tensor having abs maximum values less than 1
 
     static std::string getTestCaseName(testing::TestParamInfo<basicParams> obj) {
         InferenceEngine::Precision inputPrecision, netPrecision;
diff --git a/inference-engine/tests/functional/plugin/shared/include/behavior/set_preprocess.hpp b/inference-engine/tests/functional/plugin/shared/include/behavior/set_preprocess.hpp
index c498d7963e36b7..0f1704601e7baa 100644
--- a/inference-engine/tests/functional/plugin/shared/include/behavior/set_preprocess.hpp
+++ b/inference-engine/tests/functional/plugin/shared/include/behavior/set_preprocess.hpp
@@ -111,7 +111,7 @@ TEST_P(PreprocessTest, SetMeanImagePreProcessGetBlob) {
         auto lockedMem = inBlob->buffer();
         auto *inData = lockedMem.as<float*>();
         for (size_t i = 0; i < inBlob->size(); i++)
-            inData[i] = i;
+            inData[i] = static_cast<float>(i);
     }
 
     req.Infer();
@@ -182,7 +182,7 @@ TEST_P(PreprocessTest, SetMeanImagePreProcessSetBlob) {
         auto lockedMem = inBlob->buffer();
         auto *inData = lockedMem.as<float*>();
         for (size_t i = 0; i < inBlob->size(); i++)
-            inData[i] = i;
+            inData[i] = static_cast<float>(i);
     }
 
     req.Infer();
@@ -243,7 +243,7 @@ TEST_P(PreprocessTest, SetMeanValuePreProcessGetBlob) {
         auto lockedMem = inBlob->buffer();
         auto *inData = lockedMem.as<float*>();
         for (size_t i = 0; i < inBlob->size(); i++)
-            inData[i] = i;
+            inData[i] = static_cast<float>(i);
     }
 
     req.Infer();
@@ -308,7 +308,7 @@ TEST_P(PreprocessTest, SetMeanValuePreProcessSetBlob) {
         auto lockedMem = inBlob->buffer();
         auto *inData = lockedMem.as<float*>();
         for (size_t i = 0; i < inBlob->size(); i++)
-            inData[i] = i;
+            inData[i] = static_cast<float>(i);
     }
 
     req.Infer();
@@ -363,7 +363,7 @@ TEST_P(PreprocessTest, ReverseInputChannelsPreProcessGetBlob) {
         auto lockedMem = inBlob->buffer();
         auto *inData = lockedMem.as<float*>();
         for (size_t i = 0; i < inBlob->size(); i++)
-            inData[i] = i;
+            inData[i] = static_cast<float>(i);
     }
 
     req.Infer();
@@ -430,7 +430,7 @@ TEST_P(PreprocessTest, ReverseInputChannelsPreProcessSetBlob) {
         auto lockedMem = inBlob->buffer();
         auto *inData = lockedMem.as<float*>();
         for (size_t i = 0; i < inBlob->size(); i++)
-            inData[i] = i;
+            inData[i] = static_cast<float>(i);
     }
 
     req.Infer();
@@ -500,7 +500,7 @@ TEST_P(PreprocessTest, SetScalePreProcessGetBlob) {
         auto lockedMem = inBlob->buffer();
         auto *inData = lockedMem.as<float*>();
         for (size_t i = 0; i < inBlob->size(); i++)
-            inData[i] = i;
+            inData[i] = static_cast<float>(i);
     }
 
     req.Infer();
@@ -566,7 +566,7 @@ TEST_P(PreprocessTest, SetScalePreProcessSetBlob) {
         auto lockedMem = inBlob->buffer();
         auto *inData = lockedMem.as<float*>();
         for (size_t i = 0; i < inBlob->size(); i++)
-            inData[i] = i;
+            inData[i] = static_cast<float>(i);
     }
 
     req.Infer();
@@ -721,11 +721,11 @@ TEST_P(PreprocessConversionTest, Infer) {
         if (iPrecision == InferenceEngine::Precision::FP32) {
             auto *inData = lockedMem.as<float*>();
             for (size_t i = 0; i < inBlob->size(); i++)
-                inData[desc.offset(i)] = i;
+                inData[desc.offset(i)] = static_cast<float>(i);
         } else if (iPrecision == InferenceEngine::Precision::U8) {
             auto *inData = lockedMem.as<std::uint8_t*>();
             for (size_t i = 0; i < inBlob->size(); i++)
-                inData[desc.offset(i)] = i;
+                inData[desc.offset(i)] = static_cast<std::uint8_t>(i);
         } else {
             ASSERT_TRUE(false);
         }
diff --git a/inference-engine/tests/functional/plugin/shared/src/behavior/stress_tests.cpp b/inference-engine/tests/functional/plugin/shared/src/behavior/stress_tests.cpp
index 31e07b5c962edf..df964b76887eec 100644
--- a/inference-engine/tests/functional/plugin/shared/src/behavior/stress_tests.cpp
+++ b/inference-engine/tests/functional/plugin/shared/src/behavior/stress_tests.cpp
@@ -30,7 +30,7 @@ TEST_P(MultipleAllocations, InferWorksCorrectAfterAllocations) {
     auto ie = PluginCache::get().ie();
 
     std::cout << "Load the network " << m_allocationsCount << " times..." << std::flush;
-    for (int i = 0; i < m_allocationsCount; ++i) {
+    for (unsigned int i = 0; i < m_allocationsCount; ++i) {
         ie->LoadNetwork(cnnNet, targetDevice, configuration);
     }
 
diff --git a/inference-engine/tests/ie_test_utils/common_test_utils/ngraph_test_utils.cpp b/inference-engine/tests/ie_test_utils/common_test_utils/ngraph_test_utils.cpp
index fdfc95c0226e0c..0189de6a543fc2 100644
--- a/inference-engine/tests/ie_test_utils/common_test_utils/ngraph_test_utils.cpp
+++ b/inference-engine/tests/ie_test_utils/common_test_utils/ngraph_test_utils.cpp
@@ -22,9 +22,6 @@
 #include <ngraph/op/util/sub_graph_base.hpp>
 #include <ngraph/opsets/opset1.hpp>
 #include <ngraph/pass/visualize_tree.hpp>
-
-#include "details/ie_exception.hpp"
-
 namespace {
 inline namespace tools {
 bool isTypeRelaxed(const std::string &type) {
diff --git a/inference-engine/tests/ie_test_utils/common_test_utils/ngraph_test_utils.hpp b/inference-engine/tests/ie_test_utils/common_test_utils/ngraph_test_utils.hpp
index e58fa5edb9f455..929a0e5452fa70 100644
--- a/inference-engine/tests/ie_test_utils/common_test_utils/ngraph_test_utils.hpp
+++ b/inference-engine/tests/ie_test_utils/common_test_utils/ngraph_test_utils.hpp
@@ -4,6 +4,7 @@
 
 #pragma once
 
+#include <limits>
 #include <memory>
 #include <queue>
 
@@ -13,6 +14,7 @@
 #include <ngraph/pass/pass.hpp>
 #include <ngraph/opsets/opset6.hpp>
 
+#include "ie_common.h"
 #include "test_common.hpp"
 
 #define DYN ngraph::Dimension::dynamic()
@@ -569,7 +571,11 @@ struct Equal<uint8_t*> {
         if (lhs_bit_size != rhs_bit_size) return false;
 
         for (size_t bit_idx = 0; bit_idx < lhs_bit_size; bit_idx++) {
-            const uint8_t byte_idx = bit_idx / BITS_IN_BYTE_COUNT;
+            const auto byte_idx_result(bit_idx / BITS_IN_BYTE_COUNT);
+            if (byte_idx_result > std::numeric_limits<uint8_t>::max())
+                IE_THROW() << "(bit_idx / BITS_IN_BYTE_COUNT) bigger than uint8_t::max_value";
+
+            const uint8_t byte_idx(static_cast<uint8_t>(byte_idx_result));
             const uint8_t bit_in_byte_idx = 7 - (bit_idx % BITS_IN_BYTE_COUNT);
 
             if (extract_bit(lhs[byte_idx], bit_in_byte_idx) !=
diff --git a/ngraph/core/include/ngraph/op/constant.hpp b/ngraph/core/include/ngraph/op/constant.hpp
index 194d9d205df4cc..650651a37a7059 100644
--- a/ngraph/core/include/ngraph/op/constant.hpp
+++ b/ngraph/core/include/ngraph/op/constant.hpp
@@ -646,24 +646,26 @@ namespace ngraph
                 }
                 template <
                     ngraph::element::Type_t Type,
+                    typename ValueT,
                     typename std::enable_if<Type == ngraph::element::Type_t::u4, bool>::type = true>
-                static ngraph::fundamental_type_for<Type>
-                    value_in_range(const ngraph::fundamental_type_for<Type>& value)
+                static ngraph::fundamental_type_for<Type> value_in_range(const ValueT& value)
                 {
-                    NGRAPH_CHECK(0 <= value && value <= 15,
+                    const auto result = ngraph::fundamental_type_for<Type>(value);
+                    NGRAPH_CHECK(0 <= result && result <= 15,
                                  "assigned value out of range u4 values");
-                    return value;
+                    return result;
                 }
 
                 template <
                     ngraph::element::Type_t Type,
+                    typename ValueT,
                     typename std::enable_if<Type == ngraph::element::Type_t::i4, bool>::type = true>
-                static ngraph::fundamental_type_for<Type>
-                    value_in_range(const ngraph::fundamental_type_for<Type>& value)
+                static ngraph::fundamental_type_for<Type> value_in_range(const ValueT& value)
                 {
-                    NGRAPH_CHECK(-8 <= value && value <= 7,
+                    const auto result = ngraph::fundamental_type_for<Type>(value);
+                    NGRAPH_CHECK(-8 <= result && result <= 7,
                                  "assigned value out of range i4 values");
-                    return value;
+                    return result;
                 }
 
                 bool are_all_data_elements_bitwise_identical() const;

From 3dbf4b340b2013095e485070ebb8f8e2f64140f5 Mon Sep 17 00:00:00 2001
From: Dmitry Kurtaev <dmitry.kurtaev+github@gmail.com>
Date: Tue, 18 May 2021 11:51:26 +0300
Subject: [PATCH 08/16] Use extern template instantiation only for Clang (RTTI
 mess) (#5647)

* Remove extern template from headers for RTTI classes

* Moove instantiation out of the namespace

* Use __ANDROID__ conditional compilation for TBlob

* One more attempt
---
 inference-engine/include/ie_blob.h            |  2 ++
 .../src/inference_engine/ie_common.cpp        | 28 +++++++++----------
 2 files changed, 16 insertions(+), 14 deletions(-)

diff --git a/inference-engine/include/ie_blob.h b/inference-engine/include/ie_blob.h
index dbe264c054de7f..db7c29c950877c 100644
--- a/inference-engine/include/ie_blob.h
+++ b/inference-engine/include/ie_blob.h
@@ -799,6 +799,7 @@ class TBlob : public MemoryBlob {
     }
 };
 
+#ifdef __clang__
 extern template class INFERENCE_ENGINE_API_CLASS(InferenceEngine::TBlob<float>);
 extern template class INFERENCE_ENGINE_API_CLASS(InferenceEngine::TBlob<double>);
 extern template class INFERENCE_ENGINE_API_CLASS(InferenceEngine::TBlob<int8_t>);
@@ -813,6 +814,7 @@ extern template class INFERENCE_ENGINE_API_CLASS(InferenceEngine::TBlob<unsigned
 extern template class INFERENCE_ENGINE_API_CLASS(InferenceEngine::TBlob<unsigned long long>);
 extern template class INFERENCE_ENGINE_API_CLASS(InferenceEngine::TBlob<bool>);
 extern template class INFERENCE_ENGINE_API_CLASS(InferenceEngine::TBlob<char>);
+#endif  // __clang__
 
 /**
  * @brief Creates a blob with the given tensor descriptor.
diff --git a/inference-engine/src/inference_engine/ie_common.cpp b/inference-engine/src/inference_engine/ie_common.cpp
index c10c7a6c7bc3e4..effee536d59993 100644
--- a/inference-engine/src/inference_engine/ie_common.cpp
+++ b/inference-engine/src/inference_engine/ie_common.cpp
@@ -124,19 +124,19 @@ TBlob<T, U>::~TBlob() {
     free();
 }
 
-template class TBlob<float>;
-template class TBlob<double>;
-template class TBlob<int8_t>;
-template class TBlob<uint8_t>;
-template class TBlob<int16_t>;
-template class TBlob<uint16_t>;
-template class TBlob<int32_t>;
-template class TBlob<uint32_t>;
-template class TBlob<long>;
-template class TBlob<long long>;
-template class TBlob<unsigned long>;
-template class TBlob<unsigned long long>;
-template class TBlob<bool>;
-template class TBlob<char>;
+template class INFERENCE_ENGINE_API_CLASS(TBlob<float>);
+template class INFERENCE_ENGINE_API_CLASS(TBlob<double>);
+template class INFERENCE_ENGINE_API_CLASS(TBlob<int8_t>);
+template class INFERENCE_ENGINE_API_CLASS(TBlob<uint8_t>);
+template class INFERENCE_ENGINE_API_CLASS(TBlob<int16_t>);
+template class INFERENCE_ENGINE_API_CLASS(TBlob<uint16_t>);
+template class INFERENCE_ENGINE_API_CLASS(TBlob<int32_t>);
+template class INFERENCE_ENGINE_API_CLASS(TBlob<uint32_t>);
+template class INFERENCE_ENGINE_API_CLASS(TBlob<long>);
+template class INFERENCE_ENGINE_API_CLASS(TBlob<long long>);
+template class INFERENCE_ENGINE_API_CLASS(TBlob<unsigned long>);
+template class INFERENCE_ENGINE_API_CLASS(TBlob<unsigned long long>);
+template class INFERENCE_ENGINE_API_CLASS(TBlob<bool>);
+template class INFERENCE_ENGINE_API_CLASS(TBlob<char>);
 
 }  // namespace InferenceEngine

From bcbc3fe8cdbb67038f536e6751642115239ce4c0 Mon Sep 17 00:00:00 2001
From: Sergey Lyubimtsev <sergey.lyubimtsev@intel.com>
Date: Tue, 18 May 2021 12:25:37 +0300
Subject: [PATCH 09/16] Update EULA link for PyPI install guides (#5452)

---
 docs/install_guides/pypi-openvino-dev.md | 4 ++--
 docs/install_guides/pypi-openvino-rt.md  | 4 ++--
 2 files changed, 4 insertions(+), 4 deletions(-)

diff --git a/docs/install_guides/pypi-openvino-dev.md b/docs/install_guides/pypi-openvino-dev.md
index 7164c2cdf555c8..8d7b2f037847e2 100644
--- a/docs/install_guides/pypi-openvino-dev.md
+++ b/docs/install_guides/pypi-openvino-dev.md
@@ -1,7 +1,7 @@
 # Intel® Distribution of OpenVINO™ Toolkit Developer Package 
-
+Copyright © 2018-2021 Intel Corporation
 > **LEGAL NOTICE**: Your use of this software and any required dependent software (the
-“Software Package”) is subject to the terms and conditions of the [software license agreements](https://software.intel.com/en-us/license/eula-for-intel-software-development-products) for the Software Package, which may also include notices, disclaimers, or
+“Software Package”) is subject to the terms and conditions of the [software license agreements](https://software.intel.com/content/dam/develop/external/us/en/documents/intel-openvino-license-agreements.pdf) for the Software Package, which may also include notices, disclaimers, or
 license terms for third party or open source software included in or with the Software Package, and your use indicates your acceptance of all such terms. Please refer to the “third-party-programs.txt” or other similarly-named text file included with the Software Package for additional details.
 
 ## Introduction
diff --git a/docs/install_guides/pypi-openvino-rt.md b/docs/install_guides/pypi-openvino-rt.md
index cfe95281fdfc30..00a96085cbadb3 100644
--- a/docs/install_guides/pypi-openvino-rt.md
+++ b/docs/install_guides/pypi-openvino-rt.md
@@ -1,7 +1,7 @@
 # Intel® Distribution of OpenVINO™ Toolkit Runtime Package
-
+Copyright © 2018-2021 Intel Corporation
 > **LEGAL NOTICE**: Your use of this software and any required dependent software (the
-“Software Package”) is subject to the terms and conditions of the [software license agreements](https://software.intel.com/en-us/license/eula-for-intel-software-development-products) for the Software Package, which may also include notices, disclaimers, or
+“Software Package”) is subject to the terms and conditions of the [software license agreements](https://software.intel.com/content/dam/develop/external/us/en/documents/intel-openvino-license-agreements.pdf) for the Software Package, which may also include notices, disclaimers, or
 license terms for third party or open source software included in or with the Software Package, and your use indicates your acceptance of all such terms. Please refer to the “third-party-programs.txt” or other similarly-named text file included with the Software Package for additional details.
 
 ## Introduction

From b835c6e60a9887a973e409bf70337067a9737a98 Mon Sep 17 00:00:00 2001
From: Sergey Lyubimtsev <sergey.lyubimtsev@intel.com>
Date: Tue, 18 May 2021 12:28:42 +0300
Subject: [PATCH 10/16] Added Troubleshooting section (#5625)

* Added Troubleshooting section

* correct gamma
---
 docs/install_guides/pypi-openvino-dev.md | 27 ++++++++++++++++--------
 docs/install_guides/pypi-openvino-rt.md  | 27 ++++++++++++++++--------
 2 files changed, 36 insertions(+), 18 deletions(-)

diff --git a/docs/install_guides/pypi-openvino-dev.md b/docs/install_guides/pypi-openvino-dev.md
index 8d7b2f037847e2..f04bdf3a21f8c0 100644
--- a/docs/install_guides/pypi-openvino-dev.md
+++ b/docs/install_guides/pypi-openvino-dev.md
@@ -40,11 +40,7 @@ The table below lists the supported operating systems and Python* versions requi
 
 ## Install the Developer Package
 
-### Step 1. Install External Software Dependencies
-
-On Windows* OS you are required to install [Microsoft* Visual C++ Redistributable Package (x64)](https://visualstudio.microsoft.com/downloads/#microsoft-visual-c-redistributable-for-visual-studio-2019) to be able to run OpenVINO™ applications.
-
-### Step 2. Set Up Python Virtual Environment
+### Step 1. Set Up Python Virtual Environment
 
 To avoid dependency conflicts, use a virtual environment. Skip this
    step only if you do want to install all dependencies globally.
@@ -62,7 +58,7 @@ On Windows:
 python -m venv openvino_env
 ```
 
-### Step 3. Activate Virtual Environment
+### Step 2. Activate Virtual Environment
 
 On Linux and macOS:
 ```sh
@@ -73,14 +69,14 @@ On Windows:
 openvino_env\Scripts\activate
 ```
 
-### Step 4. Set Up and Update pip to the Highest Version
+### Step 3. Set Up and Update PIP to the Highest Version
 
 Run the command below:
 ```sh
 python -m pip install --upgrade pip
 ```
 
-### Step 5. Install the Package
+### Step 4. Install the Package
 
 Run the command below: <br>
 
@@ -88,7 +84,7 @@ Run the command below: <br>
    pip install openvino-dev
    ```
 
-### Step 6. Verify that the Package is Installed
+### Step 5. Verify that the Package is Installed
 
 Run the command below (this may take a few seconds):
 ```sh
@@ -97,6 +93,19 @@ pot -h
 
 You will see the help message for Post-Training Optimization Tool if installation finished successfully.
 
+## Troubleshooting
+
+#### Error: Microsoft Visual C++ 14.0 is required. Get it with "Build Tools for Visual Studio"
+
+On Windows* some dependencies may require compilation from source when installing. To resolve this issue, you need to install [Build Tools for Visual Studio* 2019](https://visualstudio.microsoft.com/downloads/#build-tools-for-visual-studio-2019) and repeat package installation.
+
+#### ImportError: libpython3.7m.so.1.0: cannot open shared object file: No such file or directory
+
+To resolve missing external dependency on Ubuntu*, execute the following command:
+```sh
+sudo apt-get install libpython3.7
+```
+
 ## Additional Resources
 
 - Intel® Distribution of OpenVINO™ toolkit home page: [https://software.intel.com/en-us/openvino-toolkit](https://software.intel.com/en-us/openvino-toolkit)
diff --git a/docs/install_guides/pypi-openvino-rt.md b/docs/install_guides/pypi-openvino-rt.md
index 00a96085cbadb3..6e22d74157cd28 100644
--- a/docs/install_guides/pypi-openvino-rt.md
+++ b/docs/install_guides/pypi-openvino-rt.md
@@ -37,11 +37,7 @@ The table below lists supported operating systems and Python* versions required
 
 ## Install the Runtime Package
 
-### Step 1. Install External Software Dependencies
-
-On Windows* OS you are required to install [Microsoft* Visual C++ Redistributable Package (x64)](https://visualstudio.microsoft.com/downloads/#microsoft-visual-c-redistributable-for-visual-studio-2019) to be able to run OpenVINO™ applications.
-
-### Step 2. Set Up Python Virtual Environment
+### Step 1. Set Up Python Virtual Environment
 
 To avoid dependency conflicts, use a virtual environment. Skip this
    step only if you do want to install all dependencies globally.
@@ -55,7 +51,7 @@ python -m venv openvino_env
 > **NOTE**: On Linux and macOS, you may need to type `python3` instead of
 `python`. You may also need to [install pip](https://pip.pypa.io/en/stable/installing/).
 
-### Step 3. Activate Virtual Environment
+### Step 2. Activate Virtual Environment
 
 On Linux and macOS:
 ```sh
@@ -66,14 +62,14 @@ On Windows:
 openvino_env\Scripts\activate
 ```
 
-### Step 4. Set Up and Update pip to the Highest Version
+### Step 3. Set Up and Update PIP to the Highest Version
 
 Run the command below:
 ```sh
 python -m pip install --upgrade pip
 ```
 
-### Step 5. Install the Package
+### Step 4. Install the Package
 
 Run the command below: <br>
 
@@ -81,7 +77,7 @@ Run the command below: <br>
    pip install openvino
    ```
 
-### Step 6. Verify that the Package is Installed
+### Step 5. Verify that the Package is Installed
 
 Run the command below:
 ```sh
@@ -90,6 +86,19 @@ python -c "from openvino.inference_engine import IECore"
    
 You will not see any error messages if installation finished successfully.
 
+## Troubleshooting
+
+#### Error: Microsoft Visual C++ 14.0 is required. Get it with "Build Tools for Visual Studio"
+
+On Windows* some dependencies may require compilation from source when installing. To resolve this issue, you need to install [Build Tools for Visual Studio* 2019](https://visualstudio.microsoft.com/downloads/#build-tools-for-visual-studio-2019) and repeat package installation.
+
+#### ImportError: libpython3.7m.so.1.0: cannot open shared object file: No such file or directory
+
+To resolve missing external dependency on Ubuntu*, execute the following command:
+```sh
+sudo apt-get install libpython3.7
+```
+
 ## Additional Resources
 
 - [Intel® Distribution of OpenVINO™ toolkit](https://software.intel.com/en-us/openvino-toolkit).

From 8be740a5aa4de04897a96f8d97554fe31bca7350 Mon Sep 17 00:00:00 2001
From: Gleb Kazantaev <gleb.kazantaev@intel.com>
Date: Tue, 18 May 2021 12:48:01 +0300
Subject: [PATCH 11/16] Enable Pruning transformation by default inside Model
 Optimizer (#5633)

---
 .../offline_transformations/src/moc_transformations.cpp   | 8 +++++++-
 model-optimizer/mo/back/offline_transformations.py        | 1 +
 2 files changed, 8 insertions(+), 1 deletion(-)

diff --git a/inference-engine/src/offline_transformations/src/moc_transformations.cpp b/inference-engine/src/offline_transformations/src/moc_transformations.cpp
index eec2491a28edbf..35f4a575c15c74 100644
--- a/inference-engine/src/offline_transformations/src/moc_transformations.cpp
+++ b/inference-engine/src/offline_transformations/src/moc_transformations.cpp
@@ -5,10 +5,16 @@
 #include <memory>
 
 #include "moc_transformations.hpp"
+#include "pruning.hpp"
 
+#include <ngraph/pass/manager.hpp>
 
 NGRAPH_RTTI_DEFINITION(ngraph::pass::MOCTransformations, "MOCTransformations", 0);
 
-bool ngraph::pass::MOCTransformations::run_on_function(std::shared_ptr<ngraph::Function>) {
+bool ngraph::pass::MOCTransformations::run_on_function(std::shared_ptr<ngraph::Function> f) {
+    ngraph::pass::Manager m(get_pass_config());
+    m.register_pass<Pruning>();
+    m.run_passes(f);
+
     return false;
 }
\ No newline at end of file
diff --git a/model-optimizer/mo/back/offline_transformations.py b/model-optimizer/mo/back/offline_transformations.py
index ee8905356acd1a..a363a1ca250756 100644
--- a/model-optimizer/mo/back/offline_transformations.py
+++ b/model-optimizer/mo/back/offline_transformations.py
@@ -35,6 +35,7 @@ def apply_offline_transformations(input_model: str, framework: str, transforms:
 
         available_transformations[name](net, **args)
 
+    ApplyMOCTransformations(net, False)
     net.serialize(input_model + ".xml", input_model + ".bin")
     path_to_mapping = input_model + ".mapping"
     GenerateMappingFile(net, path_to_mapping.encode('utf-8'), extract_names)

From b1d1f9287f3948a0110f19c84bddef9909047625 Mon Sep 17 00:00:00 2001
From: Roman Kazantsev <roman.kazantsev@intel.com>
Date: Tue, 18 May 2021 13:17:50 +0300
Subject: [PATCH 12/16] Implement nGraph transformation to decompose Einsum-7
 operation (#5529)

* Implement nGraph transformation to decompose Einsum-7 operation

Signed-off-by: Roman Kazantsev <roman.kazantsev@intel.com>

* Use MatMul instead of Eltwise-multiplication and ReduceSum

Signed-off-by: Roman Kazantsev <roman.kazantsev@intel.com>

* Add description for new methods

Signed-off-by: Roman Kazantsev <roman.kazantsev@intel.com>

* Fix code style

Signed-off-by: Roman Kazantsev <roman.kazantsev@intel.com>

* Fix code style #2

Signed-off-by: Roman Kazantsev <roman.kazantsev@intel.com>

* Remove unused variables.py

Signed-off-by: Roman Kazantsev <roman.kazantsev@intel.com>

* Apply feedback after review: fix comments, new_register_node use

Signed-off-by: Roman Kazantsev <roman.kazantsev@intel.com>

* Add Reshape if needed and apply code-review feedback

Signed-off-by: Roman Kazantsev <roman.kazantsev@intel.com>

* Fix code-style

Signed-off-by: Roman Kazantsev <roman.kazantsev@intel.com>

* Remove unused variable

Signed-off-by: Roman Kazantsev <roman.kazantsev@intel.com>
---
 .../op_conversions/einsum_decomposition.hpp   |  28 +
 .../common_optimizations.cpp                  |   2 +
 .../op_conversions/einsum_decomposition.cpp   | 683 ++++++++++++++++++
 .../ngraph_reader/einsum_tests.cpp            |  21 +-
 ngraph/core/include/ngraph/op/einsum.hpp      |   6 +
 5 files changed, 731 insertions(+), 9 deletions(-)
 create mode 100644 inference-engine/src/transformations/include/transformations/op_conversions/einsum_decomposition.hpp
 create mode 100644 inference-engine/src/transformations/src/transformations/op_conversions/einsum_decomposition.cpp

diff --git a/inference-engine/src/transformations/include/transformations/op_conversions/einsum_decomposition.hpp b/inference-engine/src/transformations/include/transformations/op_conversions/einsum_decomposition.hpp
new file mode 100644
index 00000000000000..68281a94b75b82
--- /dev/null
+++ b/inference-engine/src/transformations/include/transformations/op_conversions/einsum_decomposition.hpp
@@ -0,0 +1,28 @@
+// Copyright (C) 2021 Intel Corporation
+// SPDX-License-Identifier: Apache-2.0
+//
+
+#pragma once
+
+#include <memory>
+#include <ngraph/pass/graph_rewrite.hpp>
+#include <transformations_visibility.hpp>
+
+namespace ngraph {
+namespace pass {
+
+class TRANSFORMATIONS_API EinsumDecomposition;
+
+}  // namespace pass
+}  // namespace ngraph
+
+/**
+ * @ingroup ie_transformation_common_api
+ * @brief EinsumDecomposition transformation decomposes Einsum-7 operation into a sub-graph with more simple operations:
+ *        Transpose, Reshape, MatMul, ReduceSum, Unsqueeze, ShapeOf, ReduceProd, StridedSlice, and Concat
+ */
+class ngraph::pass::EinsumDecomposition : public ngraph::pass::MatcherPass {
+public:
+    NGRAPH_RTTI_DECLARATION;
+    EinsumDecomposition();
+};
diff --git a/inference-engine/src/transformations/src/transformations/common_optimizations/common_optimizations.cpp b/inference-engine/src/transformations/src/transformations/common_optimizations/common_optimizations.cpp
index b8aaa7d09ef201..bd44380f6275d3 100644
--- a/inference-engine/src/transformations/src/transformations/common_optimizations/common_optimizations.cpp
+++ b/inference-engine/src/transformations/src/transformations/common_optimizations/common_optimizations.cpp
@@ -58,6 +58,7 @@
 #include "transformations/op_conversions/convert_gelu.hpp"
 #include "transformations/op_conversions/convert_interpolate1_to_interpolate4.hpp"
 #include "transformations/op_conversions/batch_norm_decomposition.hpp"
+#include "transformations/op_conversions/einsum_decomposition.hpp"
 #include "transformations/op_conversions/gelu7_downgrade.hpp"
 #include "transformations/op_conversions/reduce_l1_decomposition.hpp"
 #include "transformations/op_conversions/reduce_l2_decomposition.hpp"
@@ -146,6 +147,7 @@ bool ngraph::pass::CommonOptimizations::run_on_function(std::shared_ptr<ngraph::
     decomp->add_matcher<ngraph::pass::BatchNormDecomposition>();
     decomp->add_matcher<ngraph::pass::MVN6Decomposition>();
     decomp->add_matcher<ngraph::pass::SimplifyCTCGreedyDecoderSeqLen>();
+    decomp->add_matcher<ngraph::pass::EinsumDecomposition>();
     decomp->set_name("ngraph::pass::CommonDecompositions");
 
     // CF is required after all decompositions
diff --git a/inference-engine/src/transformations/src/transformations/op_conversions/einsum_decomposition.cpp b/inference-engine/src/transformations/src/transformations/op_conversions/einsum_decomposition.cpp
new file mode 100644
index 00000000000000..e715d76c0363e2
--- /dev/null
+++ b/inference-engine/src/transformations/src/transformations/op_conversions/einsum_decomposition.cpp
@@ -0,0 +1,683 @@
+// Copyright (C) 2021 Intel Corporation
+// SPDX-License-Identifier: Apache-2.0
+//
+
+#include "transformations/op_conversions/einsum_decomposition.hpp"
+
+#include <memory>
+#include <ngraph/opsets/opset7.hpp>
+#include <ngraph/pattern/op/wrap_type.hpp>
+#include <ngraph/rt_info.hpp>
+#include <ngraph/validation_util.hpp>
+#include <transformations/utils/utils.hpp>
+
+#include "itt.hpp"
+
+namespace {
+/// \brief      Check if the EinsumDecomposition transformation is applicable to a given Einsum.
+/// The transformation is applicable if input subscript does not have repeated labels and ellipsis.
+///
+/// \param      subscript          A subscript to check its format
+///
+/// \return     true - applicable, false - not applicable
+///
+bool is_subscript_applicable(const std::string& subscript) {
+    auto labels = ngraph::opset7::Einsum::extract_labels(subscript);
+    auto unique_labels = std::unordered_set<std::string>(labels.begin(), labels.end());
+    return std::find(labels.begin(), labels.end(), "...") == labels.end() && unique_labels.size() == labels.size();
+}
+
+/// \brief      Compute einsum_path for a given Einsum node meaning that the (pseudo-)optimal
+/// order of operands contraction in terms of performance and memory consumption
+///
+/// \param      einsum_node         An input Einsum node
+///
+/// \return     a vector of pairs with input indices assuming that the intermediate result is
+/// appended in the tail
+///
+std::vector<std::pair<size_t, size_t>> compute_einsum_path(std::shared_ptr<const ngraph::opset7::Einsum> einsum_node) {
+    // TODO: implement algorithm for finding (pseudo-)optimal einsum_path
+    std::vector<std::pair<size_t, size_t>> einsum_path;
+    const size_t num_inputs = einsum_node->get_input_size();
+    NGRAPH_CHECK(num_inputs > 0);
+    for (size_t input_ind = num_inputs - 1; input_ind > 0; --input_ind) {
+        einsum_path.push_back(std::make_pair(0, input_ind));
+    }
+    return einsum_path;
+}
+
+/// \brief      Check if the dimension with a given label is reduced. The dimension is reduced
+/// if the corresponding label is met in neither the output subscript nor the input subscripts
+/// excluding ones specified by a vector excluded_indices
+///
+/// \param      input_subscripts         The vector of the input subscripts
+/// \param      output_subscript         The output subscript
+/// \param      label_to_check           A label that corresponds to dimension to check
+/// \param      excluded_indices         A vector of input subscript indices to be excluded
+///
+/// \return     true - a dimension to reduce, false - otherwise
+///
+bool is_dimension_reduced(const std::vector<std::string>& input_subscripts, const std::string& output_subscript,
+    const std::string label_to_check, const std::vector<size_t>& excluded_indices) {
+    for (size_t input_ind = 0; input_ind < input_subscripts.size(); ++input_ind) {
+        const auto& input_subscript = input_subscripts[input_ind];
+        // the subscript is checked only if its index is not in excluded indices list
+        bool check_subscript = (std::find(excluded_indices.begin(), excluded_indices.end(), input_ind) == excluded_indices.end());
+        if (check_subscript && input_subscript.find(label_to_check) != std::string::npos) {
+            return false;
+        }
+    }
+    return output_subscript.find(label_to_check) == std::string::npos;
+}
+
+/// \brief    Checks if input vector represents a range [0; n]
+///
+/// \param    labels_inds    Input vector to check
+///
+/// \return   true - the input vector is a range [0; n]; false - otherwise
+///
+bool is_range_0_to_n(const std::vector<int64_t> &labels_inds) {
+    int64_t check_index = 0;
+    for (auto index : labels_inds) {
+        if (check_index != index) {
+            return false;
+        }
+        ++check_index;
+    }
+    return true;
+}
+
+/// \brief      Generate an input subscript that provides to group dimensions into the common,
+/// separate and reduced dimensions after transpose
+///
+/// \param      input_subscripts         A vector of the input subscripts
+/// \param      common_labels_inds       A vector of indices of the common dimensions
+/// \param      separate_labels_inds     A vector of indices of the separate dimensions
+/// \param      reduced_labels_inds      A vector of indices of the reduced dimensions
+/// \param      is_separate_first        A boolean flag. It is true if the separate dimensions
+/// goes before the reduced dimensions
+///
+/// \return     An input subscript for grouping dimensions
+///
+std::string generate_grouping_subscript(const std::string& input_subscript, const std::vector<int64_t>& common_labels_inds,
+                                        const std::vector<int64_t>& separate_labels_inds, const std::vector<int64_t>& reduced_labels_inds,
+                                        bool& is_separate_first) {
+    // transpose is not needed if common labels, reduced labels
+    // and separate labels indices go concurrently
+    std::vector<int64_t> labels_inds = common_labels_inds;
+    labels_inds.insert(labels_inds.end(), reduced_labels_inds.begin(), reduced_labels_inds.end());
+    labels_inds.insert(labels_inds.end(), separate_labels_inds.begin(), separate_labels_inds.end());
+    if (is_range_0_to_n(labels_inds)) {
+        is_separate_first = false;
+        return input_subscript;
+    }
+
+    // transpose is not needed if common labels, separate labels
+    // and reduced labels indices go concurrently
+    labels_inds = common_labels_inds;
+    labels_inds.insert(labels_inds.end(), separate_labels_inds.begin(), separate_labels_inds.end());
+    labels_inds.insert(labels_inds.end(), reduced_labels_inds.begin(), reduced_labels_inds.end());
+    if (is_range_0_to_n(labels_inds)) {
+        is_separate_first = true;
+        return input_subscript;
+    }
+
+    auto labels = ngraph::opset7::Einsum::extract_labels(input_subscript);
+    std::string required_subscript = "";
+    for (auto index : labels_inds) {
+        required_subscript += labels[index];
+    }
+    is_separate_first = true;
+    return required_subscript;
+}
+
+/// \brief      Update a vector of input nodes and subscripts by removing items for operands
+/// with indices input_ind1 and input_ind2 and inserted new input node and the corresponsing
+/// subscript in the tail
+///
+/// \param      input_nodes         A vector of the input nodes to update
+/// \param      input_subscripts    A vector of the input subscripts to update
+/// \param      input_ind1          An index of item to be removed
+/// \param      input_ind2          An index of item to be removed
+/// \param      new_node            New input node to be inserted in the tail
+/// \param      new_subscript       New input subscript to be inserted in the tail
+///
+void update_operands(ngraph::OutputVector& input_nodes, std::vector<std::string>& input_subscripts, size_t input_ind1, size_t input_ind2,
+                     const ngraph::Output<ngraph::Node>& new_node, const std::string& new_subscript) {
+    NGRAPH_CHECK(input_ind1 < input_ind2);
+    NGRAPH_CHECK(input_ind2 < input_nodes.size());
+    NGRAPH_CHECK(input_ind2 < input_subscripts.size());
+    input_nodes.erase(input_nodes.begin() + input_ind2);
+    input_nodes.erase(input_nodes.begin() + input_ind1);
+    input_nodes.push_back(new_node);
+    input_subscripts.erase(input_subscripts.begin() + input_ind2);
+    input_subscripts.erase(input_subscripts.begin() + input_ind1);
+    input_subscripts.push_back(new_subscript);
+}
+
+/// \brief      Return input node with computed sub-shape defined by a range [s_begin;s_end)
+///
+/// \param      data_shape          Input node that contains some tensor shape
+/// \param      s_begin             Start index of dimension
+/// \param      s_end               End index of dimension
+/// \param      subgraph_nodes      A vector of operation nodes where to add new ones
+/// \param      is_product          A boolean flag that indicates if to compute a product of
+/// dimension sizes in the computed sub-shape
+///
+/// \return     A vector of input nodes that can be empty (if s_end <= s_begin)
+/// or contains just one input node with sub-shape or its product
+///
+ngraph::OutputVector compute_sub_shape(const ngraph::Output<ngraph::Node>& data_shape, size_t s_begin, size_t s_end, ngraph::NodeVector& subgraph_nodes,
+                                       bool is_product = false) {
+    int64_t begin = static_cast<int64_t>(s_begin);
+    int64_t end = static_cast<int64_t>(s_end);
+    ngraph::OutputVector sub_shape_vector;
+    if (end <= begin) {
+        return sub_shape_vector;
+    }
+    std::vector<int64_t> begin_mask(1, 0);
+    std::vector<int64_t> end_mask(1, 0);
+    auto begin_const = ngraph::opset7::Constant::create(ngraph::element::Type_t::i64, ngraph::Shape {1}, {begin});
+    auto end_const = ngraph::opset7::Constant::create(ngraph::element::Type_t::i64, ngraph::Shape {1}, {end});
+    auto stride_const = ngraph::opset7::Constant::create(ngraph::element::Type_t::i64, ngraph::Shape {1}, {1});
+    auto sub_shape = std::make_shared<ngraph::opset7::StridedSlice>(data_shape, begin_const, end_const, begin_mask, end_mask);
+
+    if (is_product) {
+        auto reduce_axis_const = ngraph::opset7::Constant::create(ngraph::element::Type_t::i64, ngraph::Shape {1}, {0});
+        auto separate_shape_prod = std::make_shared<ngraph::opset7::ReduceProd>(sub_shape->output(0), reduce_axis_const, true);
+        sub_shape_vector.push_back(separate_shape_prod->output(0));
+        subgraph_nodes.insert(subgraph_nodes.end(), {reduce_axis_const, separate_shape_prod});
+    } else {
+        sub_shape_vector.push_back(sub_shape->output(0));
+    }
+    subgraph_nodes.insert(subgraph_nodes.end(), {begin_const, end_const, stride_const, sub_shape});
+    return sub_shape_vector;
+}
+
+/// \brief      Unsqueeze input node by given dimensions if a vector of unsqueezing dimensions
+/// is not empty
+///
+/// \param      input_node          Input node to unsqueeze
+/// \param      unsqueeze_axes      A vector of dimensions to be unsqueezed
+/// \param      subgraph_nodes      A vector of operation nodes that is included into a
+/// sub-graph decomposing Einsum that is needed for copy_runtime_info
+///
+/// \return     Unsqueezed input node if a vector of unsqueezing dimensions is not empty,
+/// otherwise, the original input node
+///
+ngraph::Output<ngraph::Node> unsqueeze_input(const ngraph::Output<ngraph::Node>& input_node, const std::vector<int64_t>& unsqueeze_axes,
+                                             ngraph::NodeVector& subgraph_nodes) {
+    if (unsqueeze_axes.empty()) {
+        return input_node;
+    }
+    auto unsqueeze_axes_const = ngraph::opset7::Constant::create(ngraph::element::Type_t::i64, ngraph::Shape {unsqueeze_axes.size()}, unsqueeze_axes);
+    auto unsqueeze = std::make_shared<ngraph::opset7::Unsqueeze>(input_node, unsqueeze_axes_const);
+    subgraph_nodes.insert(subgraph_nodes.end(), {unsqueeze_axes_const, unsqueeze});
+    return unsqueeze->output(0);
+}
+
+/// \brief      Reshape input node to the new shape specified by sub-shapes of the common,
+/// separate and reduced dimensions so that the reshaped input has a format acceptable by MatMul
+///
+/// \param      input_node              Input node to reshape
+/// \param      common_sub_shape        A sub-shape corresponding to the common dimensions
+/// \param      separate_sub_shape      A sub-shape corresponding to the separate dimensions
+/// \param      reduced_sub_shape_prod  A product of the separate dimensions sizes
+/// \param      is_separate_first       true - the separate dimensions placed before reduced
+/// dimensions, otherwise, it is after them
+/// \param      subgraph_nodes          A vector of operation nodes that is included into
+/// a sub-graph decomposing Einsum that is needed for copy_runtime_info
+///
+/// \return     Reshaped input node
+///
+ngraph::Output<ngraph::Node> reshape_input_for_matmul(const ngraph::Output<ngraph::Node>& input_node, const ngraph::OutputVector& common_sub_shape,
+                                                      const ngraph::OutputVector& separate_sub_shape, const ngraph::OutputVector& reduced_sub_shape_prod,
+                                                      bool is_separate_first, ngraph::NodeVector& subgraph_nodes) {
+    ngraph::OutputVector new_shape_parts;
+    new_shape_parts.insert(new_shape_parts.end(), common_sub_shape.begin(), common_sub_shape.end());
+
+    // compute a product of a sub-shape for separate labels
+    ngraph::OutputVector separate_parts;
+    if (common_sub_shape.size() > 0 && separate_sub_shape.size() == 0) {
+        // in this case new dimension corresponding to separate labels must be added
+        // since MatMul operation is not possible to do without separate dimensions if the
+        // common dimension presents
+        auto separate_new_dim = ngraph::opset7::Constant::create(ngraph::element::Type_t::i64, ngraph::Shape {1}, {1});
+        separate_parts.push_back(separate_new_dim);
+        subgraph_nodes.insert(subgraph_nodes.end(), {separate_new_dim});
+    } else if (separate_sub_shape.size() > 0) {
+        // in this case compute a product of separate dimension sizes since they must be
+        // presented with just one dimension for MatMul
+        auto reduce_axis_const = ngraph::opset7::Constant::create(ngraph::element::Type_t::i64, ngraph::Shape {1}, {0});
+        auto separate_shape_prod = std::make_shared<ngraph::opset7::ReduceProd>(separate_sub_shape[0], reduce_axis_const, true);
+        separate_parts.push_back(separate_shape_prod->output(0));
+        subgraph_nodes.insert(subgraph_nodes.end(), {reduce_axis_const, separate_shape_prod});
+    }
+
+    // form a new shape for input so that collapsed dimensions corresponding
+    // to the common, separate and reduced dimensions are placed in the correct order
+    if (is_separate_first) {
+        new_shape_parts.insert(new_shape_parts.end(), separate_parts.begin(), separate_parts.end());
+        new_shape_parts.insert(new_shape_parts.end(), reduced_sub_shape_prod.begin(), reduced_sub_shape_prod.end());
+    } else {
+        new_shape_parts.insert(new_shape_parts.end(), reduced_sub_shape_prod.begin(), reduced_sub_shape_prod.end());
+        new_shape_parts.insert(new_shape_parts.end(), separate_parts.begin(), separate_parts.end());
+    }
+
+    // in case of scalar reshape is not needed
+    if (new_shape_parts.size() == 0) {
+        return input_node;
+    }
+
+    auto new_shape_op = std::make_shared<ngraph::opset7::Concat>(new_shape_parts, 0);
+
+    // if new shape is possible to compute on the shape infer stage, insert Constant node immediatelly
+    // in order to prevent repeated computing during constant-folding pass
+    std::shared_ptr<ngraph::opset7::Reshape> reshaped_input_op;
+    if (auto new_shape_const = ngraph::get_constant_from_source(new_shape_op)) {
+        reshaped_input_op = std::make_shared<ngraph::opset7::Reshape>(input_node, new_shape_const, false);
+        subgraph_nodes.insert(subgraph_nodes.end(), {new_shape_const});
+    } else {
+        reshaped_input_op = std::make_shared<ngraph::opset7::Reshape>(input_node, new_shape_op->output(0), false);
+        subgraph_nodes.insert(subgraph_nodes.end(), {new_shape_op});
+    }
+
+    subgraph_nodes.insert(subgraph_nodes.end(), {reshaped_input_op});
+    return reshaped_input_op->output(0);
+}
+
+/// \brief      Transpose one of the Einsum inputs to layout specified through the required
+/// subscript
+///
+/// \param      input_nodes         A vector of input nodes to Einsum
+/// \param      input_subscripts    A vector of corresponding subscripts for input nodes
+/// \param      required_subscript  The required subscript that defines layout to which the
+/// input is to transpose
+/// \param      input_ind           An index of the input node to be transposed
+/// \param      subgraph_nodes      A vector of operation nodes that is included into
+/// a sub-graph decomposing Einsum that is needed for copy_runtime_info
+///
+void transpose_input(ngraph::OutputVector& input_nodes, std::vector<std::string>& input_subscripts, const std::string& required_subscript, size_t input_ind,
+                     ngraph::NodeVector& subgraph_nodes) {
+    // perform sanity check for arguments
+    auto num_inputs = input_nodes.size();
+    NGRAPH_CHECK(num_inputs == input_subscripts.size(), "Each input must have own subscript.");
+    NGRAPH_CHECK(input_ind < num_inputs, "Input index is out of range.");
+
+    // generate permutation vector by searching for bijection between input_subscripts
+    // and required_subscript
+    std::vector<int64_t> permutation;
+    const auto& input_subscript = input_subscripts[input_ind];
+
+    // transpose is not needed since the input subscript is not going to be changed
+    if (required_subscript == input_subscript) {
+        return;
+    }
+
+    // find permutation that establishes bijection between the input subscript
+    // and the required one
+    auto labels = ngraph::opset7::Einsum::extract_labels(input_subscript);
+    auto required_labels = ngraph::opset7::Einsum::extract_labels(required_subscript);
+    NGRAPH_CHECK(labels.size() == required_labels.size());
+    for (const auto& required_label : required_labels) {
+        auto it = std::find(labels.begin(), labels.end(), required_label);
+        NGRAPH_CHECK(it != labels.end());
+        int64_t found_index = static_cast<int64_t>(it - labels.begin());
+        permutation.push_back(found_index);
+    }
+
+    // create a sub-graph for transposing into the required layout
+    const auto& input_node = input_nodes[input_ind];
+    auto permutation_const = ngraph::opset7::Constant::create(ngraph::element::Type_t::i64, ngraph::Shape {permutation.size()}, permutation);
+    auto transpose = std::make_shared<ngraph::opset7::Transpose>(input_node, permutation_const);
+
+    // update a vector of inputs and input subscripts
+    input_nodes[input_ind] = transpose->output(0);
+    input_subscripts[input_ind] = required_subscript;
+
+    // update a vector of nodes for copy_runtime_info
+    subgraph_nodes.insert(subgraph_nodes.end(), {permutation_const, transpose});
+}
+
+/// \brief      Find labels (in a given input subscript) that are met once in the equation
+/// and reduce dimensions corresponding to such labels
+///
+/// \param      einsum_decompose_ptr    A pointer to Einsum decomposing pass
+/// \param      input_nodes             A vector of input nodes to Einsum operation
+/// \param      input_subscripts        A vector of corresponding subscripts for the input nodes
+/// \param      output_subscript        The output subscript
+/// \param      input_ind               An index of the input node for which it will check
+/// dimensions to be reduced
+/// \param      subgraph_nodes          A vector of operation nodes that is included into
+/// a sub-graph decomposing Einsum that is needed for copy_runtime_info
+///
+void reduce_input(ngraph::pass::EinsumDecomposition *einsum_decompose_ptr,
+    ngraph::OutputVector& input_nodes, std::vector<std::string>& input_subscripts,
+    const std::string& output_subscript, size_t input_ind, ngraph::NodeVector& subgraph_nodes) {
+    // perform sanity check for arguments
+    auto num_inputs = input_nodes.size();
+    NGRAPH_CHECK(num_inputs == input_subscripts.size(), "Each input must have own subscript.");
+    NGRAPH_CHECK(input_ind < num_inputs, "Input index is out of range.");
+
+    std::vector<int64_t> reduced_axes;
+    auto labels = ngraph::opset7::Einsum::extract_labels(input_subscripts[input_ind]);
+    std::string new_input_subscript = "";
+    for (size_t dim_ind = 0; dim_ind < labels.size(); ++dim_ind) {
+        const auto& label = labels[dim_ind];
+
+        // check if the current label is met in the other input subscripts
+        // or the output subscript
+        bool is_dim_reduced = is_dimension_reduced(input_subscripts, output_subscript, label, {input_ind});
+
+        // if label is not met, dimension corresponding to the label is to reduce
+        if (is_dim_reduced) {
+            reduced_axes.push_back(dim_ind);
+        } else {
+            new_input_subscript += label;
+        }
+    }
+
+    if (reduced_axes.size() == 0) {
+        // there is no axis to reduce
+        return;
+    }
+
+    // reduce by summed up elements along dimension for which label is met just once
+    const auto& input_node = input_nodes[input_ind];
+    auto axes_const = ngraph::opset7::Constant::create(ngraph::element::Type_t::i64, ngraph::Shape {reduced_axes.size()}, reduced_axes);
+    auto reduce_sum = einsum_decompose_ptr->register_new_node<ngraph::opset7::ReduceSum>(input_node, axes_const, false);
+
+    // update a vector of inputs and input subscripts
+    input_nodes[input_ind] = reduce_sum->output(0);
+    input_subscripts[input_ind] = new_input_subscript;
+
+    // update a vector of nodes for copy_runtime_info
+    subgraph_nodes.insert(subgraph_nodes.end(), {axes_const, reduce_sum});
+}
+
+/// \brief      Contract two inputs of Einsum operation according to equation.
+/// The result of the contraction is appended into input_nodes along with its subscript.
+/// The input nodes for these two operands are removed from input_nodes along with their input
+/// subscripts
+///
+/// \param      einsum_decompose_ptr    A pointer to Einsum decomposing pass
+/// \param      input_nodes             A vector of input nodes to Einsum operation
+/// \param      input_subscripts        A vector of corresponding subscripts for the input nodes
+/// \param      output_subscript        The output subscript
+/// \param      input_ind1              An index of the first operand
+/// \param      input_ind2              An index of the second operand
+/// \param      subgraph_nodes          A vector of operation nodes that is included into a
+/// sub-graph decomposing Einsum that is needed for copy_runtime_info
+///
+void contract_two_inputs(ngraph::pass::EinsumDecomposition* einsum_decompose_ptr,
+    ngraph::OutputVector& input_nodes, std::vector<std::string>& input_subscripts,
+    const std::string& output_subscript, size_t input_ind1,
+    size_t input_ind2, ngraph::NodeVector& subgraph_nodes) {
+    // assume that input_ind1 < input_ind2 without loss of generality, otherwise, just swap them
+    if (input_ind2 < input_ind1) {
+        std::swap(input_ind1, input_ind2);
+    }
+
+    // perform sanity check for arguments
+    auto num_inputs = input_nodes.size();
+    NGRAPH_CHECK(num_inputs == input_subscripts.size(), "Each input must have own subscript.");
+    NGRAPH_CHECK(input_ind2 < num_inputs && input_ind1 != input_ind2, "Incorrect input index is specified.");
+
+    const auto& input_node1 = input_nodes[input_ind1];
+    const auto& input_node2 = input_nodes[input_ind2];
+
+    // reduce dimensions for input operands if possible
+    reduce_input(einsum_decompose_ptr, input_nodes, input_subscripts, output_subscript, input_ind1, subgraph_nodes);
+    reduce_input(einsum_decompose_ptr, input_nodes, input_subscripts, output_subscript, input_ind2, subgraph_nodes);
+
+    // step 0. split dimensions of both operands into three groups:
+    // 1. dimension indices with the same labels (in both subscripts) that are NOT reduced -
+    // common labels (dimensions)
+    // 2. dimension indices with labels that are met only in one of two subscripts - separate
+    // labels (dimensions)
+    // 3. dimension indices with the same labels (in both subscripts) that are reduced - reduced
+    // labels (dimensions) NOTE: dimension is reduced iff. the corresponding label are met in
+    // neither the output subscript nor the input subscripts for other Einsum inputs excluding
+    // two given inputs
+    auto& input_subscript1 = input_subscripts[input_ind1];
+    auto labels1 = ngraph::opset7::Einsum::extract_labels(input_subscript1);
+    auto& input_subscript2 = input_subscripts[input_ind2];
+    auto labels2 = ngraph::opset7::Einsum::extract_labels(input_subscript2);
+    std::string common_part = "";
+    std::string separate_part1 = "";
+    std::string separate_part2 = "";
+    std::vector<int64_t> common_labels_inds1, common_labels_inds2;
+    std::vector<int64_t> separate_labels_inds1, separate_labels_inds2;
+    std::vector<int64_t> reduced_labels_inds1, reduced_labels_inds2;
+    for (size_t label_ind = 0; label_ind < labels1.size(); ++label_ind) {
+        const auto& label = labels1[label_ind];
+        auto iter = std::find(labels2.begin(), labels2.end(), label);
+        if (iter != labels2.end()) {
+            bool is_dim_reduced = is_dimension_reduced(input_subscripts, output_subscript, label, {input_ind1, input_ind2});
+            common_part += label;
+            if (is_dim_reduced) {
+                reduced_labels_inds1.push_back(static_cast<int64_t>(label_ind));
+                reduced_labels_inds2.push_back(static_cast<int64_t>(iter - labels2.begin()));
+            } else {
+                common_labels_inds1.push_back(static_cast<int64_t>(label_ind));
+                common_labels_inds2.push_back(static_cast<int64_t>(iter - labels2.begin()));
+            }
+        } else {
+            separate_part1 += label;
+            separate_labels_inds1.push_back(static_cast<int64_t>(label_ind));
+        }
+    }
+    for (size_t label_ind = 0; label_ind < labels2.size(); ++label_ind) {
+        const auto& label = labels2[label_ind];
+        auto iter = std::find(labels1.begin(), labels1.end(), label);
+        if (iter == labels1.end()) {
+            separate_part2 += label;
+            separate_labels_inds2.push_back(static_cast<int64_t>(label_ind));
+        }
+    }
+
+    // if there is no common dimension to reduce, apply eltwise multiplication
+    if (reduced_labels_inds1.empty()) {
+        std::string convenient_subscript = common_part + separate_part2;
+        std::string resultant_subscript = input_subscript1 + separate_part2;
+
+        // transpose the second operand in order to get the convenient layout
+        // for further unsqueezing
+        transpose_input(input_nodes, input_subscripts, convenient_subscript, input_ind2, subgraph_nodes);
+
+        // unsqueeze the first operand with new dimensions in the tail
+        // and the number of them is equal to the number of separate labels in the second
+        // subscript
+        int64_t unsqueeze_dim = labels1.size();
+        std::vector<int64_t> unsqueeze_axis1;
+        for (size_t label_ind = 0; label_ind < separate_labels_inds2.size(); ++label_ind) {
+            unsqueeze_axis1.push_back(unsqueeze_dim++);
+        }
+        const auto& unsqueeze_axis2 = separate_labels_inds1;
+
+        // unsqueeze input operands for elementwise-multiplication with broadcasting
+        auto unsqueeze_output1 = unsqueeze_input(input_node1, unsqueeze_axis1, subgraph_nodes);
+        auto unsqueeze_output2 = unsqueeze_input(input_node2, unsqueeze_axis2, subgraph_nodes);
+
+        // multiply both operands with broadcasting
+        auto mul = std::make_shared<ngraph::opset7::Multiply>(unsqueeze_output1, unsqueeze_output2, ngraph::op::AutoBroadcastSpec::NUMPY);
+
+        // update input operand and input subscript for Einsum operation
+        update_operands(input_nodes, input_subscripts, input_ind1, input_ind2, mul->output(0), resultant_subscript);
+
+        // update a vector of nodes for copy_runtime_info
+        subgraph_nodes.insert(subgraph_nodes.end(), {mul});
+        return;
+    }
+
+    // in this case a set of reduced labels is not empty and it can apply MatMul operation
+    // step 1. transpose both operands so that common labels, separated and reduced labels
+    // are grouped for both operands
+    bool is_separate_first1 = false;
+    auto int_subscript1 = generate_grouping_subscript(input_subscript1, common_labels_inds1, separate_labels_inds1,
+        reduced_labels_inds1, is_separate_first1);
+    transpose_input(input_nodes, input_subscripts, int_subscript1, input_ind1, subgraph_nodes);
+    bool is_separate_first2 = false;
+    auto int_subscript2 = generate_grouping_subscript(input_subscript2, common_labels_inds2, separate_labels_inds2,
+        reduced_labels_inds2, is_separate_first2);
+    transpose_input(input_nodes, input_subscripts, int_subscript2, input_ind2, subgraph_nodes);
+
+    // step 2. reshape both operands so that separate labels and reduced labels are represented
+    // with just one dimension this is needed by MatMul operation requirement to operands
+    // format. For example, the shape must be in a format [B1, ..., Bm, X1, Y] or [B1, ..., Bm,
+    // Y, X2], where B1, ..., Bm are common dimensions, X1 and X2 are collapsed dimensions
+    // for separate labels and Y is collapsed dimension for reduced labels
+    // this step is not needed for the operand if it satisfies to one of the requirements:
+    // 1. there is just one separate dimension and just one reduced dimension
+    // 2. there is no separate dimension, no common dimensions, and just one reduced dimension
+    bool no_reshape_for_matmul1 = (reduced_labels_inds1.size() == 1 && separate_labels_inds1.size() == 1) ||
+                                  (reduced_labels_inds1.size() == 1 && common_labels_inds1.size() == 0
+                                      && separate_labels_inds1.size() == 0);
+    bool no_reshape_for_matmul2 = (reduced_labels_inds2.size() == 1 && separate_labels_inds2.size() == 1) ||
+                                  (reduced_labels_inds2.size() == 1 && common_labels_inds2.size() == 0
+                                      && separate_labels_inds2.size() == 0);
+    // reshape back after MatMul is not needed if one of two requrements satisfies for both operands:
+    // 1. there is just one separate dimension
+    // 2. there is no separate dimension and no common dimensions present.
+    // If there is no separate dimension and common dimensions present, reshape is needed
+    // because auxiliary separate dimension has been added by Unsqueeze operation
+    // in the purpose for MatMul
+    bool no_reshape_back1 = (separate_labels_inds1.size() == 1) ||
+        (common_labels_inds1.size() == 0 && separate_labels_inds1.size() == 0);
+    bool no_reshape_back2 = (separate_labels_inds2.size() == 1) ||
+        (common_labels_inds2.size() == 0 && separate_labels_inds2.size() == 0);
+    bool no_reshape_after_matmul = no_reshape_back1 && no_reshape_back2;
+
+    auto matmul_operand1 = input_node1;
+    auto matmul_operand2 = input_node2;
+    int64_t common_dims_begin = 0;
+    int64_t common_dims_end = common_labels_inds1.size();
+    ngraph::OutputVector common_sub_shape, separate1_sub_shape, separate2_sub_shape;
+    if (no_reshape_for_matmul1 == false || no_reshape_for_matmul2 == false) {
+        auto data_shape1 = std::make_shared<ngraph::opset7::ShapeOf>(input_node1);
+        common_sub_shape = compute_sub_shape(data_shape1, common_dims_begin, common_dims_end, subgraph_nodes);
+        int64_t reduced_dims_begin = (is_separate_first1 ? common_labels_inds1.size() + separate_labels_inds1.size() : common_labels_inds1.size());
+        int64_t reduced_dims_end = reduced_dims_begin + reduced_labels_inds1.size();
+        auto reduced_sub_shape_prod = compute_sub_shape(data_shape1, reduced_dims_begin, reduced_dims_end, subgraph_nodes, true);
+
+        if (no_reshape_for_matmul1 == false || no_reshape_after_matmul == false) {
+            int64_t separate1_dims_begin = (is_separate_first1 ? common_labels_inds1.size() : common_labels_inds1.size() + reduced_labels_inds1.size());
+            int64_t separate1_dims_end = separate1_dims_begin + separate_labels_inds1.size();
+            separate1_sub_shape = compute_sub_shape(data_shape1, separate1_dims_begin, separate1_dims_end, subgraph_nodes);
+            matmul_operand1 = reshape_input_for_matmul(input_node1, common_sub_shape, separate1_sub_shape,
+                reduced_sub_shape_prod, is_separate_first1, subgraph_nodes);
+        }
+
+        if (no_reshape_for_matmul2 == false || no_reshape_after_matmul == false) {
+            auto data_shape2 = std::make_shared<ngraph::opset7::ShapeOf>(input_node2);
+            int64_t separate2_dims_begin = (is_separate_first2 ? common_labels_inds2.size() : common_labels_inds2.size() + reduced_labels_inds2.size());
+            int64_t separate2_dims_end = separate2_dims_begin + separate_labels_inds2.size();
+            separate2_sub_shape = compute_sub_shape(data_shape2, separate2_dims_begin, separate2_dims_end, subgraph_nodes);
+            matmul_operand2 = reshape_input_for_matmul(input_node2, common_sub_shape, separate2_sub_shape,
+                reduced_sub_shape_prod, is_separate_first2, subgraph_nodes);
+            subgraph_nodes.insert(subgraph_nodes.end(), {data_shape2});
+        }
+        subgraph_nodes.insert(subgraph_nodes.end(), {data_shape1});
+    }
+
+    // step 3. apply MatMul operation for formatted inputs
+    bool transpose_a = (is_separate_first1 ? false : true);
+    bool transpose_b = (is_separate_first2 ? true : false);
+    auto matmul = std::make_shared<ngraph::opset7::MatMul>(matmul_operand1, matmul_operand2, transpose_a, transpose_b);
+
+    // step 4. reshape back by unrolling dimensions corresponding to separate labels if needed
+    // now dimensions corresponding to reduced labels are reduced by the MatMul operation
+    std::string resultant_subscript = input_subscript1.substr(common_dims_begin, common_dims_end) + separate_part1 + separate_part2;
+    if (no_reshape_after_matmul) {
+        // this is a case when Reshape is not needed after MatMul operation
+        // since there are no collapsed (or auxiliary added) separated dimensions
+        update_operands(input_nodes, input_subscripts, input_ind1, input_ind2, matmul->output(0), resultant_subscript);
+    } else {
+        ngraph::OutputVector new_shape;
+        new_shape.insert(new_shape.end(), common_sub_shape.begin(), common_sub_shape.end());
+        new_shape.insert(new_shape.end(), separate1_sub_shape.begin(), separate1_sub_shape.end());
+        new_shape.insert(new_shape.end(), separate2_sub_shape.begin(), separate2_sub_shape.end());
+        auto result_shape_op = std::make_shared<ngraph::opset7::Concat>(new_shape, 0);
+
+        // if new shape is possible to compute on the shape infer stage, insert Constant node immediatelly
+        // in order to prevent repeated computing during constant-folding pass
+        std::shared_ptr<ngraph::opset7::Reshape> result_op;
+        if (auto new_shape_const = ngraph::get_constant_from_source(result_shape_op)) {
+            result_op = std::make_shared<ngraph::opset7::Reshape>(matmul->output(0), new_shape_const, false);
+            subgraph_nodes.insert(subgraph_nodes.end(), {new_shape_const});
+        } else {
+            result_op = std::make_shared<ngraph::opset7::Reshape>(matmul->output(0), result_shape_op->output(0), false);
+            subgraph_nodes.insert(subgraph_nodes.end(), {result_shape_op});
+        }
+
+        // update input operand and input subscript for Einsum operation
+        update_operands(input_nodes, input_subscripts, input_ind1, input_ind2, result_op->output(0), resultant_subscript);
+        subgraph_nodes.insert(subgraph_nodes.end(), {result_op});
+    }
+
+    // update a vector of nodes for copy_runtime_info
+    subgraph_nodes.insert(subgraph_nodes.end(), {matmul});
+}
+}  // namespace
+
+NGRAPH_RTTI_DEFINITION(ngraph::pass::EinsumDecomposition, "EinsumDecomposition", 0);
+
+ngraph::pass::EinsumDecomposition::EinsumDecomposition() {
+    // NOTE: The transformation is applicable if Einsum equation does not contain ellipsis label
+    // and does not contain subscripts with repeated labels.
+    // For example, the transformation is applicable to Einsum with equation="abc,bd->ad"
+    // but not applicable to a case with equation="aabc,bd->ad" due to repeated labels
+    // in the first input subscript.
+    MATCHER_SCOPE(EinsumDecomposition);
+    auto einsum = ngraph::pattern::wrap_type<opset7::Einsum>();
+    ngraph::matcher_pass_callback callback = [this](ngraph::pattern::Matcher& m) {
+        auto einsum_node = std::dynamic_pointer_cast<ngraph::opset7::Einsum>(m.get_match_root());
+        if (!einsum_node) {
+            return false;
+        }
+
+        auto equation = einsum_node->get_equation();
+        std::vector<std::string> input_subscripts;
+        std::string output_subscript;
+        ngraph::opset7::Einsum::parse_equation(equation, input_subscripts, output_subscript);
+
+        // check that the transformation is applicable
+        if (std::any_of(input_subscripts.cbegin(), input_subscripts.cend(), [](const std::string& subscript) {
+                return is_subscript_applicable(subscript) == false;
+            })) {
+            return false;
+        }
+
+        // create a list of input nodes with preserving their order
+        // and a vector of sub-graph nodes for copy_runtime_info
+        ngraph::OutputVector input_nodes = einsum_node->input_values();
+        ngraph::NodeVector subgraph_nodes;
+
+        // compute einsum path that is used to contract a pair of operands
+        // in more optimal order
+        auto einsum_path = compute_einsum_path(einsum_node);
+
+        // contract inputs by Einsum until just one is remained
+        for (auto const& inds_pair : einsum_path) {
+            contract_two_inputs(this, input_nodes, input_subscripts, output_subscript, inds_pair.first, inds_pair.second, subgraph_nodes);
+        }
+
+        // reduce dimensions for the remained input node
+        NGRAPH_CHECK(input_nodes.size() == 1);
+        reduce_input(this, input_nodes, input_subscripts, output_subscript, 0, subgraph_nodes);
+
+        // transpose dimensions to layout required by the output subscript
+        transpose_input(input_nodes, input_subscripts, output_subscript, 0, subgraph_nodes);
+
+        // replace the original Einsum node with the last node from decomposing sub-graph
+        // preserve the original node name
+        auto last_node = input_nodes[0].get_node_shared_ptr();
+        last_node->set_friendly_name(einsum_node->get_friendly_name());
+        ngraph::copy_runtime_info(einsum_node, subgraph_nodes);
+        ngraph::replace_node(einsum_node, last_node);
+        return true;
+    };
+
+    auto m = std::make_shared<ngraph::pattern::Matcher>(einsum, matcher_name);
+    register_matcher(m, callback);
+}
diff --git a/inference-engine/tests/functional/inference_engine/ngraph_reader/einsum_tests.cpp b/inference-engine/tests/functional/inference_engine/ngraph_reader/einsum_tests.cpp
index 16da3327ac59b9..a0f5ca24c12f8d 100644
--- a/inference-engine/tests/functional/inference_engine/ngraph_reader/einsum_tests.cpp
+++ b/inference-engine/tests/functional/inference_engine/ngraph_reader/einsum_tests.cpp
@@ -3,10 +3,14 @@
 //
 
 #include <string>
+
+#include "common_test_utils/xml_net_builder/ir_net.hpp"
 #include "ngraph_reader_tests.hpp"
 
-TEST_F(NGraphReaderTests, ReadEinsumNetwork) {
-    std::string model = R"V0G0N(
+// since EinsumDecomposition is applied, disable these two tests
+// until ngraph_reader_test checks only correctness of IR reading
+TEST_F(NGraphReaderTests, DISABLED_ReadEinsumNetwork) {
+  std::string model = R"V0G0N(
 <net name="saved_model" version="10">
     <layers>
         <layer id="0" name="input_a" type="Parameter" version="opset1">
@@ -66,7 +70,7 @@ TEST_F(NGraphReaderTests, ReadEinsumNetwork) {
     </edges>
 </net>
 )V0G0N";
-    std::string modelV7 = R"V0G0N(
+  std::string modelV7 = R"V0G0N(
 <net name="saved_model" version="7">
     <layers>
         <layer id="0" name="input_a" type="Input" version="opset1">
@@ -115,11 +119,11 @@ TEST_F(NGraphReaderTests, ReadEinsumNetwork) {
     </edges>
 </net>
 )V0G0N";
-    compareIRs(model, modelV7);
+  compareIRs(model, modelV7);
 }
 
-TEST_F(NGraphReaderTests, ReadEinsumNetwork2) {
-    std::string model = R"V0G0N(
+TEST_F(NGraphReaderTests, DISABLED_ReadEinsumNetwork2) {
+  std::string model = R"V0G0N(
 <net name="saved_model" version="10">
     <layers>
         <layer id="0" name="input_a" type="Parameter" version="opset1">
@@ -199,7 +203,7 @@ TEST_F(NGraphReaderTests, ReadEinsumNetwork2) {
     </edges>
 </net>
 )V0G0N";
-    std::string modelV7 = R"V0G0N(
+  std::string modelV7 = R"V0G0N(
 <net name="saved_model" version="7">
     <layers>
         <layer id="0" name="input_a" type="Input" version="opset1">
@@ -266,6 +270,5 @@ TEST_F(NGraphReaderTests, ReadEinsumNetwork2) {
     </edges>
 </net>
 )V0G0N";
-    compareIRs(model, modelV7);
+  compareIRs(model, modelV7);
 }
-
diff --git a/ngraph/core/include/ngraph/op/einsum.hpp b/ngraph/core/include/ngraph/op/einsum.hpp
index 08f066823e9bed..37d1bf482a9b5c 100644
--- a/ngraph/core/include/ngraph/op/einsum.hpp
+++ b/ngraph/core/include/ngraph/op/einsum.hpp
@@ -38,6 +38,12 @@ namespace ngraph
                 std::shared_ptr<Node>
                     clone_with_new_inputs(const OutputVector& new_args) const override;
 
+                /// \brief      Get an equation of Einsum operation
+                ///
+                /// \return     Einsum equation
+                ///
+                std::string get_equation() const { return m_equation; }
+
                 /// \brief      Check correctness of equation format and extract input subscripts
                 /// and output subscript
                 ///

From 5db77bf9e6a17fb197cb32bfe9bdbe66cdb6273b Mon Sep 17 00:00:00 2001
From: Szymon Irzabek <szymon.jakub.irzabek@intel.com>
Date: Tue, 18 May 2021 13:31:23 +0200
Subject: [PATCH 13/16] GNA padded2conv tests & fixes (#5589)

* add conversion of padded to valid convolution without other parameters change

* [GNA] Fix graph loop when multiple connections exist from single layer to concat

* [GNA] Add 1d and 2d conv test cases

Add models covering all transform scenarios.
Add test cases covering 1d and 2d convolutions.
Update transform with the newest code.
Add minor fixes in transform and elsewhere.

* [GNA] Remove debug code

* [GNA] Fixes after review

* [GNA] Fix failing tests

Co-authored-by: prozen <piotr.rozen@intel.com>
---
 .../src/gna_plugin/backend/am_intel_dnn.cpp   |   2 +-
 .../gna_plugin/backend/gna_limitations.cpp    |   2 +-
 .../src/gna_plugin/gna_graph_compiler.cpp     |   7 +-
 .../src/gna_plugin/gna_plugin.cpp             |   2 +
 .../gna_plugin/optimizer/gna_pass_manager.cpp |   2 +-
 .../convert_padded2valid_conv.hpp             |  26 ++
 .../convert_padded2valid_conv.cpp             | 372 ++++++++++++++++++
 .../gna/pass_tests/padded2valid_conv.cpp      | 308 +++++++++++++++
 8 files changed, 712 insertions(+), 9 deletions(-)
 create mode 100644 inference-engine/src/transformations/include/transformations/op_conversions/convert_padded2valid_conv.hpp
 create mode 100644 inference-engine/src/transformations/src/transformations/op_conversions/convert_padded2valid_conv.cpp
 create mode 100644 inference-engine/tests/functional/plugin/gna/pass_tests/padded2valid_conv.cpp

diff --git a/inference-engine/src/gna_plugin/backend/am_intel_dnn.cpp b/inference-engine/src/gna_plugin/backend/am_intel_dnn.cpp
index 728efcdd684687..f6bc926a35ada6 100644
--- a/inference-engine/src/gna_plugin/backend/am_intel_dnn.cpp
+++ b/inference-engine/src/gna_plugin/backend/am_intel_dnn.cpp
@@ -1784,7 +1784,7 @@ void GNAPluginNS::backend::AMIntelDNN::InitGNAStruct(intel_nnet_type_t *ptr_nnet
                         || (component[i - 1].operation == kDnnConvolutional1dOp)
                         || (component[i - 1].operation == kDnnConvolutional2dOp)
                         || ((component[i - 1].operation == kDnnMaxPoolOp) &&
-                        (component[i - 2].operation == kDnnConvolutional1dOp))) {
+                        (component[i - 2].operation == kDnnConvolutional1dOp || component[i - 2].operation == kDnnConvolutional2dOp))) {
                         if (gnaOperation->Operands[PwlOpIdx] == nullptr) {
                             HelperGna2OperationSetOperand(gnaOperation, gnaUserAllocator, gnaUserFree, PwlOpIdx, createGna2TensorPwl(1, nullptr));
                         }
diff --git a/inference-engine/src/gna_plugin/backend/gna_limitations.cpp b/inference-engine/src/gna_plugin/backend/gna_limitations.cpp
index 98257eb3687939..cef6e26537a29c 100644
--- a/inference-engine/src/gna_plugin/backend/gna_limitations.cpp
+++ b/inference-engine/src/gna_plugin/backend/gna_limitations.cpp
@@ -31,7 +31,7 @@ bool RangeLimit2D::isValid(const uint32_t h, const uint32_t w) const {
 }
 
 std::string RangeLimit2D::GetErrorOrEmpty(const uint32_t h, const uint32_t w) const {
-    return hLimit.GetErrorOrEmpty(h) + hLimit.GetErrorOrEmpty(w);
+    return hLimit.GetErrorOrEmpty(h) + wLimit.GetErrorOrEmpty(w);
 }
 
 RangeMultipleLimit::RangeMultipleLimit(RangeLimit rlIn, uint32_t multiplierIn) : RangeLimit(rlIn), multiplier(multiplierIn) {
diff --git a/inference-engine/src/gna_plugin/gna_graph_compiler.cpp b/inference-engine/src/gna_plugin/gna_graph_compiler.cpp
index b085dcef7f46c3..2dcac40afc6a65 100644
--- a/inference-engine/src/gna_plugin/gna_graph_compiler.cpp
+++ b/inference-engine/src/gna_plugin/gna_graph_compiler.cpp
@@ -1027,13 +1027,8 @@ void GNAGraphCompiler::ConcatPrimitive(InferenceEngine::CNNLayerPtr layer) {
         auto layerInfo = LayerInfo(concatParent);
         // auto layerInfo = LayerInfo(getCreatorLayer(concatLayerInput->insData[it].lock()).lock());
         if (layerInfo.isInput()) {
-            auto & bytesAllocated = inputDesc->bytes_allocated_for_input[((InferenceEngine::CNNLayerPtr)layerInfo)->name];
-
             connectInput(layer, &concatLayerInfo.gna_ptr,
-                         concatLayerInfo.reserved_size, inputLayer.offset, idx, false);
-
-            // TODO: currently connectInput api accept only total size, for concat we need extension for allocated, and actual sizes
-            bytesAllocated = inputLayer.tensorSize;
+                inputLayer.tensorSize, inputLayer.offset, idx, false);
 
             concatLayerInfo.input_allocated = true;
         } else if (layerInfo.isMemory()) {
diff --git a/inference-engine/src/gna_plugin/gna_plugin.cpp b/inference-engine/src/gna_plugin/gna_plugin.cpp
index 9832d59d527a70..1b1019767f242f 100644
--- a/inference-engine/src/gna_plugin/gna_plugin.cpp
+++ b/inference-engine/src/gna_plugin/gna_plugin.cpp
@@ -54,6 +54,7 @@
 #include <transformations/common_optimizations/pull_transpose_through_fq.hpp>
 #include <transformations/common_optimizations/relu_fake_quantize_fusion.hpp>
 #include <transformations/common_optimizations/add_fake_quantize_fusion.hpp>
+#include <transformations/op_conversions/convert_padded2valid_conv.hpp>
 
 #include "transformations/remove_extra_reshapes.hpp"
 
@@ -662,6 +663,7 @@ void GNAPlugin::LoadNetwork(CNNNetwork & _network) {
         manager.register_pass<ngraph::pass::InitNodeInfo>();
         // WA: ConvertPriorBox must be executed before the 1st ConstantFolding pass
         manager.register_pass<ngraph::pass::ConvertPriorBox>();
+        manager.register_pass<ngraph::pass::ConvertPadded2ValidConv>();
         manager.register_pass<ngraph::pass::CommonOptimizations>();
         manager.register_pass<ngraph::pass::ConvertOpSet3ToOpSet2>();
         manager.register_pass<ngraph::pass::ConvertOpSet2ToOpSet1>();
diff --git a/inference-engine/src/gna_plugin/optimizer/gna_pass_manager.cpp b/inference-engine/src/gna_plugin/optimizer/gna_pass_manager.cpp
index 52bc0d1d43fe63..35c9d2206a642e 100644
--- a/inference-engine/src/gna_plugin/optimizer/gna_pass_manager.cpp
+++ b/inference-engine/src/gna_plugin/optimizer/gna_pass_manager.cpp
@@ -1189,7 +1189,7 @@ void InsertConcatAligningFilterPass::run() {
                 getCreatorLayer(outData) = filterWithQuant;
                 filterWithQuant->outData.push_back(outData);
 
-                CNNNetworkInsertLayer(prevLayer, l, filterWithQuant);
+                CNNNetworkInsertLayer(prevLayer, l, filterWithQuant, invalid_data_idx, input_idx);
             }
             offset += outputSize;
         }
diff --git a/inference-engine/src/transformations/include/transformations/op_conversions/convert_padded2valid_conv.hpp b/inference-engine/src/transformations/include/transformations/op_conversions/convert_padded2valid_conv.hpp
new file mode 100644
index 00000000000000..ef5983c1ee6cd3
--- /dev/null
+++ b/inference-engine/src/transformations/include/transformations/op_conversions/convert_padded2valid_conv.hpp
@@ -0,0 +1,26 @@
+// Copyright (C) 2021 Intel Corporation
+// SPDX-License-Identifier: Apache-2.0
+//
+
+#pragma once
+
+#include <transformations_visibility.hpp>
+#include <ngraph/pass/graph_rewrite.hpp>
+
+namespace ngraph {
+namespace pass {
+
+    class TRANSFORMATIONS_API ConvertPadded2ValidConv;
+
+}  // namespace pass
+}  // namespace ngraph
+
+/**
+ * @ingroup ie_transformation_common_api
+ * @brief ConvertPadded2ValidConv transformation breaks down 2d conv into set of 1d conv.
+ */
+class ngraph::pass::ConvertPadded2ValidConv : public ngraph::pass::FunctionPass {
+public:
+    NGRAPH_RTTI_DECLARATION;
+    bool run_on_function(std::shared_ptr<ngraph::Function> f) override;
+};
diff --git a/inference-engine/src/transformations/src/transformations/op_conversions/convert_padded2valid_conv.cpp b/inference-engine/src/transformations/src/transformations/op_conversions/convert_padded2valid_conv.cpp
new file mode 100644
index 00000000000000..b5d1023a20d09e
--- /dev/null
+++ b/inference-engine/src/transformations/src/transformations/op_conversions/convert_padded2valid_conv.cpp
@@ -0,0 +1,372 @@
+// Copyright (C) 2021 Intel Corporation
+// SPDX-License-Identifier: Apache-2.0
+//
+
+#include "transformations/op_conversions/convert_padded2valid_conv.hpp"
+
+#include <memory>
+
+#include <ngraph/opsets/opset1.hpp>
+#include <ngraph/builder/reshape.hpp>
+#include <ngraph/builder/split.hpp>
+#include <ngraph/rt_info.hpp>
+#include <ngraph/pattern/op/wrap_type.hpp>
+#include "itt.hpp"
+
+using namespace ngraph;
+using namespace op;
+
+static bool TransposeOrderMatches(std::shared_ptr<Transpose> transpose, std::vector<int64_t> order) {
+    if (!transpose)
+        return false;
+    const Output<Node>& transpose_order = transpose->input_value(1);
+    auto transpose_order_dim = transpose_order.get_shape().size();
+
+    if (transpose_order_dim != 1 || transpose_order.get_shape()[0] != order.size())
+        return false;
+
+    auto const_with_order_values = std::dynamic_pointer_cast<ngraph::opset1::Constant>(transpose_order.get_node_shared_ptr());
+    if (!const_with_order_values)
+        return false;
+
+    const int64_t* data = const_with_order_values->get_data_ptr<int64_t>();
+    if (!data)
+        return false;
+
+    for (size_t i = 0; i < order.size(); i++) {
+        if (order[i] != data[i])
+            return false;
+    }
+
+    return true;
+}
+
+static std::shared_ptr<opset1::StridedSlice> FlatCrop(Output<Node> input, size_t offset, size_t size) {
+    auto shape = input.get_shape();
+    if (shape.size() == 1) {
+        return std::make_shared<ngraph::opset1::StridedSlice>(
+            input, // data
+            ngraph::opset1::Constant::create(ngraph::element::i64, ngraph::Shape{ 1 }, { offset }), // begin slice index
+            ngraph::opset1::Constant::create(ngraph::element::i64, ngraph::Shape{ 1 }, { offset + size }), // end slice index
+            ngraph::opset1::Constant::create(ngraph::element::i64, ngraph::Shape{ 1 }, { 1 }), // strides
+            std::vector<int64_t>{0},  // begin mask
+            std::vector<int64_t>{0}); // end mask
+    } else if (shape.size() == 2) {
+        return std::make_shared<ngraph::opset1::StridedSlice>(
+            input, // data
+            ngraph::opset1::Constant::create(ngraph::element::i64, ngraph::Shape{ 2 }, { (size_t)0, offset }), // begin sice index
+            ngraph::opset1::Constant::create(ngraph::element::i64, ngraph::Shape{ 2 }, { (size_t)0, offset + size }), // end slice index
+            ngraph::opset1::Constant::create(ngraph::element::i64, ngraph::Shape{ 2 }, { (size_t)1, (size_t)1 }), // strides
+            std::vector<int64_t>{1, 0},  // begin mask
+            std::vector<int64_t>{1, 0}); // end mask
+    }
+    return nullptr;
+}
+
+NGRAPH_RTTI_DEFINITION(ngraph::pass::ConvertPadded2ValidConv, "ConvertPadded2ValidConv", 0);
+bool ngraph::pass::ConvertPadded2ValidConv::run_on_function(std::shared_ptr<ngraph::Function> f) {
+    // Traverse nGraph Function in topological order
+    bool is_graph_modfied = false;
+    for (auto& node : f->get_ordered_ops()) {
+        auto conv = std::dynamic_pointer_cast<ngraph::opset1::Convolution> (node);
+        if (nullptr == conv || transformation_callback(conv)) {
+            continue;
+        }
+
+        const Output<Node>& input = conv->input_value(0);
+        const Output<Node>& filters = conv->input_value(1);
+        auto output_shape = conv->get_output_shape(0);
+        auto padding_type = conv->get_auto_pad();
+
+        // we support only 2D conv batch 1
+        if (input.get_shape().size() != 4 ||
+            filters.get_shape().size() != 4 ||
+            output_shape.size() != 4 ||
+            conv->get_dilations().size() != 2 ||
+            conv->get_strides().size() != 2 ||
+            input.get_shape()[0] != 1) {
+            continue;
+        }
+        // we are looking for Transpose(NHWC->NCHW) => conv => Transpose(NCHW->NHWC)
+        // so required network must be in NHWC order like in TF
+        //   supported cases:
+        //     - Transpose(NHWC->NCHW) => conv => Transpose(NCHW->NHWC)
+        //     - Transpose(NHWC->NCHW) => conv => broadcasted add (BIAS) => Transpose(NCHW->NHWC)
+        //     - Transpose(NHWC->NCHW) => conv => broadcasted add (BIAS) => MaxPooling => Transpose(NCHW->NHWC) (2d max pool case)
+        //     - Transpose(NHWC->NCHW) => conv => broadcasted add (BIAS) => ActivationFunction => Transpose(NCHW->NHWC)
+        //     - Transpose(NHWC->NCHW) => conv => broadcasted add (BIAS) => MaxPool => ActivationFunction => Transpose(NCHW->NHWC)
+        //     - Transpose(NHWC->NCHW) => conv => Transpose(NCHW->NHWC) => BIAS (output of MO --disable_nhwc_to_nchw option)
+        //     - Transpose(NHWC->NCHW) => conv => Transpose(NCHW->NHWC) => BIAS => AF (output of MO --disable_nhwc_to_nchw option)
+        auto leading_transpose = std::dynamic_pointer_cast<Transpose>(input.get_node_shared_ptr());
+        if (!leading_transpose || !TransposeOrderMatches(leading_transpose, { 0, 3, 1, 2 }))
+            continue;
+
+        // check if convolution output port is connected with only one Op
+        auto output_0 = node->get_output_target_inputs(0);
+        if (output_0.size() != 1)
+            continue;
+
+        auto filter_values = std::dynamic_pointer_cast<ngraph::opset1::Constant>(filters.get_node_shared_ptr());
+        if (!filter_values) {
+            continue;
+        }
+        size_t input_channel_count = input.get_shape()[1];
+        size_t input_height = input.get_shape()[2];
+        size_t input_width = input.get_shape()[3];
+
+        size_t filter_count = filters.get_shape()[0];
+
+        size_t filter_height = filters.get_shape()[2];
+        size_t filter_width = filters.get_shape()[3];
+
+        auto output_0_node = output_0.begin()->get_node()->shared_from_this();
+        auto trailing_transpose = std::dynamic_pointer_cast<Transpose>(output_0_node);
+        auto conv_bias = std::dynamic_pointer_cast<ngraph::opset1::Add>(output_0_node);
+        auto max_pool = std::dynamic_pointer_cast<ngraph::opset1::MaxPool>(output_0_node);
+        auto af = std::dynamic_pointer_cast<ngraph::op::util::UnaryElementwiseArithmetic>(output_0_node);
+        std::shared_ptr<Node>last_op_in_sequence_for_replacement = trailing_transpose;
+
+        std::shared_ptr<ngraph::Node> bias_const;
+        if (leading_transpose && trailing_transpose && conv) {
+            auto trailing_transpose_output_0 = trailing_transpose->get_output_target_inputs(0);
+            if (trailing_transpose_output_0.size() == 1) {
+                auto trailing_transpose_output_0_node = trailing_transpose_output_0.begin()->get_node()->shared_from_this();
+                auto add_op = std::dynamic_pointer_cast<ngraph::opset1::Add>(trailing_transpose_output_0_node);
+                max_pool = std::dynamic_pointer_cast<ngraph::opset1::MaxPool>(trailing_transpose_output_0_node);
+                af = std::dynamic_pointer_cast<ngraph::op::util::UnaryElementwiseArithmetic>(trailing_transpose_output_0_node);
+                if (add_op) {
+                    auto add_const = std::dynamic_pointer_cast<ngraph::op::Constant>(add_op->input_value(1).get_node_shared_ptr());
+                    if (add_const) {
+                        auto bias_size = shape_size(add_const->get_shape());
+                        // the add maybe normal add not bias, than we just go further
+                        if (bias_size == filter_count) {
+                            conv_bias = add_op;
+                            last_op_in_sequence_for_replacement = add_op;
+
+                            auto bias_output_0 = add_op->get_output_target_inputs(0);
+                            if (bias_output_0.size() == 1) {
+                                auto bias_output_0_node = bias_output_0.begin()->get_node()->shared_from_this();
+                                max_pool = std::dynamic_pointer_cast<ngraph::opset1::MaxPool>(bias_output_0_node);
+                                af = std::dynamic_pointer_cast<ngraph::op::util::UnaryElementwiseArithmetic>(bias_output_0_node);
+                            }
+                        }
+                    }
+                }
+            }
+        } else if (!trailing_transpose && conv_bias) {
+            // the NCHW order
+            auto bias_output_0 = conv_bias->get_output_target_inputs(0);
+            if (bias_output_0.size() != 1)
+                continue;
+
+            auto bias_output_0_node = bias_output_0.begin()->get_node()->shared_from_this();
+            trailing_transpose = std::dynamic_pointer_cast<Transpose>(bias_output_0_node);
+            last_op_in_sequence_for_replacement = trailing_transpose;
+            max_pool = std::dynamic_pointer_cast<ngraph::opset1::MaxPool>(bias_output_0_node);
+            af = std::dynamic_pointer_cast<ngraph::op::util::UnaryElementwiseArithmetic>(bias_output_0_node);
+        }
+
+        if (max_pool) {
+            auto maxpool_output_0 = max_pool->get_output_target_inputs(0);
+            if (maxpool_output_0.size() != 1)
+                continue;
+            auto maxpool_output_0_node = maxpool_output_0.begin()->get_node()->shared_from_this();
+            // disable_nhwc_to_nchw option case
+            if (!trailing_transpose) {
+                trailing_transpose = std::dynamic_pointer_cast<Transpose>(maxpool_output_0_node);
+                last_op_in_sequence_for_replacement = trailing_transpose;
+            } else {
+                last_op_in_sequence_for_replacement = max_pool;
+            }
+            af = std::dynamic_pointer_cast<ngraph::op::util::UnaryElementwiseArithmetic>(maxpool_output_0_node);
+        }
+
+        //and finally activation function
+        if (af) {
+            auto af_output_0 = af->get_output_target_inputs(0);
+            if (af_output_0.size() != 1)
+                continue;
+            auto af_output_0_node = af_output_0.begin()->get_node()->shared_from_this();
+            if (!trailing_transpose) {
+                trailing_transpose = std::dynamic_pointer_cast<Transpose>(af_output_0_node);
+                last_op_in_sequence_for_replacement = trailing_transpose;
+            } else {
+                last_op_in_sequence_for_replacement = af;
+            }
+        }
+
+        if (!last_op_in_sequence_for_replacement || !trailing_transpose || !TransposeOrderMatches(trailing_transpose, { 0, 2, 3, 1 }))
+            continue;
+
+        size_t filter_dilation_x = conv->get_dilations()[1];
+        size_t filter_dilation_y = conv->get_dilations()[0];
+
+        size_t filter_stride_x = conv->get_strides()[1];
+        size_t filter_stride_y = conv->get_strides()[0];
+
+        // we are assuming VALID conv
+        size_t pads_begin_x = 0;
+        size_t pads_begin_y = 0;
+        size_t pads_end_x = 0;
+        size_t pads_end_y = 0;
+
+        size_t output_channel_count = filter_count;
+        size_t output_height = 0;
+        size_t output_width = 0;
+
+        switch (padding_type) {
+        case ngraph::op::PadType::EXPLICIT:
+            pads_begin_y = conv->get_pads_begin()[0];
+            pads_begin_x = conv->get_pads_begin()[1];
+            pads_end_y = conv->get_pads_end()[0];
+            pads_end_x = conv->get_pads_end()[1];
+            break;
+        case ngraph::op::PadType::VALID:
+            // all padding equal to 0 - already set
+            break;
+        case ngraph::op::PadType::SAME_LOWER:
+        case ngraph::op::PadType::SAME_UPPER:
+        {
+            output_height = output_shape[2];
+            output_width = output_shape[3];
+
+            size_t pad_begin_n_end_y = output_height * filter_stride_y + (filter_height)*filter_dilation_y - input_height - 1;
+            size_t pad_begin_n_end_x = output_width * filter_stride_x + (filter_width)*filter_dilation_x - input_width - 1;
+            pads_begin_y = (ngraph::op::PadType::SAME_LOWER == padding_type) ? (pad_begin_n_end_y >> 1) + (pad_begin_n_end_y & 1) : (pad_begin_n_end_y >> 1);
+            pads_end_y = (ngraph::op::PadType::SAME_UPPER == padding_type) ? (pad_begin_n_end_y >> 1) + (pad_begin_n_end_y & 1) : (pad_begin_n_end_y >> 1);
+            pads_begin_x = (ngraph::op::PadType::SAME_LOWER == padding_type) ? (pad_begin_n_end_x >> 1) + (pad_begin_n_end_x & 1) : (pad_begin_n_end_x >> 1);
+            pads_end_x = (ngraph::op::PadType::SAME_UPPER == padding_type) ? (pad_begin_n_end_x >> 1) + (pad_begin_n_end_x & 1) : (pad_begin_n_end_x >> 1);
+
+            break;
+        }
+        default:
+            break;
+        }
+        output_height = (input_height + pads_begin_y + pads_end_y - ((filter_height - 1) * filter_dilation_y + 1)) / filter_stride_y + 1;
+        output_width = (input_width + pads_begin_x + pads_end_x - ((filter_width - 1) * filter_dilation_x + 1)) / filter_stride_x + 1;
+
+        if (output_channel_count != output_shape[1] ||
+            output_height != output_shape[2] ||
+            output_width != output_shape[3]) {
+            continue;
+        }
+
+        // No padding - there is no need to decompose such convolution
+        if (pads_begin_y == 0 && pads_end_y == 0 && pads_begin_x == 0 && pads_end_x == 0)
+            continue;
+
+        // All checks applied - now we may start to do transformations
+
+        size_t flat_left_padding = input_channel_count * pads_begin_x;
+        size_t flat_right_padding = input_channel_count * pads_end_x;
+        size_t flat_top_padding = input_channel_count * (pads_begin_x + input_width + pads_end_x) * pads_begin_y;
+        size_t flat_bottom_padding = input_channel_count * (pads_begin_x + input_width + pads_end_x) * pads_end_y;
+        size_t biggest_padding = std::max(std::max(flat_left_padding, flat_right_padding), std::max(flat_top_padding, flat_bottom_padding));
+        size_t padded_row_size = input_channel_count * (pads_begin_x + input_width + pads_end_x);
+
+        if (input_height > 1 && (flat_top_padding > 1 || flat_bottom_padding > 1)) {
+            biggest_padding = biggest_padding > padded_row_size ? biggest_padding : padded_row_size;
+        }
+
+        auto flat_input = builder::opset1::reshape(
+            leading_transpose->input_value(0),
+            Shape{ (size_t)1, shape_size(leading_transpose->input_value(0).get_shape()) });
+        // zero padding
+        auto const_holding_padding = std::make_shared<opset1::Constant>(element::Type_t::f32, Shape{ 1, biggest_padding }, 0);
+
+        // padding
+        // padding
+        // ... row ...
+        // ... row ...
+        // ...........
+        // ... row ...
+        // padding
+        // padding
+
+        // Add top padding
+        OutputVector input_rows_to_concat;
+
+        // padding
+        for (size_t p = 0; p < pads_begin_y; p++) {
+            if (padded_row_size == biggest_padding) {
+                input_rows_to_concat.push_back(const_holding_padding);
+            } else {
+                auto slice = FlatCrop(const_holding_padding, 0, padded_row_size);
+                ngraph::copy_runtime_info(conv, slice);
+                input_rows_to_concat.push_back(slice);
+            }
+        }
+
+        // pad every row of input plan
+        for (size_t h = 0; h < input_height; h++) {
+            // left padding     input     right padding
+            //     |              |           |
+            //     +--------------+-----------+
+            //                    |
+            //                 concat
+
+            auto not_padded_row = input_height == 1 ?
+                flat_input :
+                FlatCrop(flat_input, h * input_width * input_channel_count, input_width * input_channel_count);
+            ngraph::copy_runtime_info(conv, not_padded_row);
+            if (flat_left_padding || flat_right_padding) {
+                OutputVector single_row_concat_inputs;
+                if (flat_left_padding) {
+                    if (flat_left_padding == biggest_padding) {
+                        single_row_concat_inputs.push_back(const_holding_padding);
+                    } else {
+                        auto slice = FlatCrop(const_holding_padding, 0, flat_left_padding);
+                        ngraph::copy_runtime_info(conv, slice);
+                        single_row_concat_inputs.push_back(slice);
+                    }
+                }
+                single_row_concat_inputs.push_back(not_padded_row);
+                if (flat_right_padding) {
+                    if (flat_right_padding == biggest_padding) {
+                        single_row_concat_inputs.push_back(const_holding_padding);
+                    } else {
+                        auto slice = FlatCrop(const_holding_padding, 0, flat_right_padding);
+                        ngraph::copy_runtime_info(conv, slice);
+                        single_row_concat_inputs.push_back(slice);
+                    }
+                }
+                auto padded_row_concat = std::make_shared<opset1::Concat>(single_row_concat_inputs, 1);
+                ngraph::copy_runtime_info(conv, padded_row_concat);
+                input_rows_to_concat.push_back(padded_row_concat);
+            } else {
+                input_rows_to_concat.push_back(not_padded_row);
+            }
+        }
+        // Bottom padding
+        for (size_t p = 0; p < pads_end_y; p++) {
+            if (padded_row_size == biggest_padding) {
+                input_rows_to_concat.push_back(const_holding_padding);
+            } else {
+                auto slice = FlatCrop(const_holding_padding, 0, padded_row_size);
+                ngraph::copy_runtime_info(conv, slice);
+                input_rows_to_concat.push_back(slice);
+            }
+        }
+        auto padded_input_plane = std::make_shared<opset1::Concat>(input_rows_to_concat, 1);
+        ngraph::copy_runtime_info(conv, padded_input_plane);
+
+        auto padded_input_plane_reshaped = builder::opset1::reshape(padded_input_plane,
+            Shape{ 1, pads_begin_y + input_height + pads_end_y, pads_begin_x + input_width + pads_end_x, input_channel_count });
+        //NHWC => NCHW
+        auto transposed2chw = builder::opset1::reorder_axes(padded_input_plane_reshaped, { 0, 3, 1, 2 });
+
+        auto conv_copy = std::make_shared<ngraph::opset1::Convolution>(
+            transposed2chw->output(0),
+            conv->input_value(1),
+            conv->get_strides(),
+            CoordinateDiff{ 0, 0 },
+            CoordinateDiff{ 0, 0 },
+            conv->get_dilations(),
+            PadType::EXPLICIT);
+
+        ngraph::replace_node(conv, conv_copy);
+
+        is_graph_modfied = true;
+    }
+    return is_graph_modfied;
+}
diff --git a/inference-engine/tests/functional/plugin/gna/pass_tests/padded2valid_conv.cpp b/inference-engine/tests/functional/plugin/gna/pass_tests/padded2valid_conv.cpp
new file mode 100644
index 00000000000000..48f28491f35ae1
--- /dev/null
+++ b/inference-engine/tests/functional/plugin/gna/pass_tests/padded2valid_conv.cpp
@@ -0,0 +1,308 @@
+// Copyright (C) 2021 Intel Corporation
+// SPDX-License-Identifier: Apache-2.0
+//
+
+#include <gtest/gtest.h>
+
+#include "common_test_utils/test_common.hpp"
+#include <string>
+#include <sstream>
+#include <fstream>
+#include <memory>
+#include <queue>
+#include <map>
+
+#include "transformations/init_node_info.hpp"
+#include "ngraph_functions/builders.hpp"
+#include "shared_test_classes/base/layer_test_utils.hpp"
+#include "../shared_tests_instances/skip_tests_check.hpp"
+
+using namespace ngraph;
+using namespace ngraph::opset1;
+
+namespace LayerTestsDefinitions {
+
+enum class modelType {
+    TranspConvTransp = 0,               /* Transpose(NHWC->NCHW) => conv => Transpose(NCHW->NHWC) */
+    TranspConvBcastAddTransp,           /* Transpose(NHWC->NCHW) => conv => broadcasted add (BIAS) => Transpose(NCHW->NHWC) */
+    TranspConvBcastAddMaxPoolTransp,    /* Transpose(NHWC->NCHW) => conv => broadcasted add (BIAS) => MaxPooling => Transpose(NCHW->NHWC) (2d max pool case) */
+    TranspConvBcastAddActTransp,        /* Transpose(NHWC->NCHW) => conv => broadcasted add (BIAS) => ActivationFunction => Transpose(NCHW->NHWC) */
+    TranspConvBcastAddMaxPoolActTransp, /* Transpose(NHWC->NCHW) => conv => broadcasted add (BIAS) => MaxPool => ActivationFunction => Transpose(NCHW->NHWC) */
+    TranspConvTranspBcastAdd,           /* Transpose(NHWC->NCHW) => conv => Transpose(NCHW->NHWC) => BIAS (output of MO --disable_nhwc_to_nchw option) */
+    TranspConvTranspBcastAddAct         /* Transpose(NHWC->NCHW) => conv => Transpose(NCHW->NHWC) => BIAS => AF (output of MO --disable_nhwc_to_nchw option) */
+};
+
+typedef std::tuple<
+    InferenceEngine::SizeVector,    // Kernel size
+    InferenceEngine::SizeVector,    // Strides
+    std::vector<ptrdiff_t>,         // Pad begin
+    std::vector<ptrdiff_t>,         // Pad end
+    InferenceEngine::SizeVector,    // Dilation
+    size_t,                         // Num out channels
+    op::PadType,                    // Padding type
+    InferenceEngine::SizeVector,    // Bias
+    InferenceEngine::SizeVector,    // Transposed Bias
+    InferenceEngine::SizeVector     // Maxpool
+> convSpecificParams;
+
+typedef std::tuple<
+    convSpecificParams,                 // Convolution parameters
+    InferenceEngine::Precision,         // Network Precision
+    std::string,                        // Target Device
+    std::map<std::string, std::string>, // Configuration
+    InferenceEngine::SizeVector,        // Input shapes
+    modelType                           // Test model
+> padded2ValidParams;
+
+class Padded2ValidConvTest : public testing::WithParamInterface<padded2ValidParams>,
+    virtual public LayerTestsUtils::LayerTestsCommon {
+public:
+    static std::string getTestCaseName(testing::TestParamInfo<padded2ValidParams> obj) {
+        convSpecificParams convParams;
+        InferenceEngine::Precision netPrecision;
+        std::string targetDevice;
+        std::map<std::string, std::string> configuration;
+        InferenceEngine::SizeVector inputShapes;
+        modelType model;
+        std::tie(convParams, netPrecision, targetDevice, configuration, inputShapes, model) = obj.param;
+        op::PadType padType;
+        InferenceEngine::SizeVector kernel, stride, dilation, bias, transpBias, maxpool;
+        std::vector<ptrdiff_t> padBegin, padEnd;
+        size_t convInput;
+        std::tie(kernel, stride, padBegin, padEnd, dilation, convInput, padType, bias, transpBias, maxpool) = convParams;
+
+        std::ostringstream result;
+        result << "M=" << static_cast<uint32_t>(model) << "_";
+        result << "IS=" << CommonTestUtils::vec2str(inputShapes) << "_";
+        result << "K" << CommonTestUtils::vec2str(kernel) << "_";
+        result << "S" << CommonTestUtils::vec2str(stride) << "_";
+        result << "PB" << CommonTestUtils::vec2str(padBegin) << "_";
+        result << "PE" << CommonTestUtils::vec2str(padEnd) << "_";
+        result << "D=" << CommonTestUtils::vec2str(dilation) << "_";
+        result << "O=" << convInput << "_";
+        result << "AP=" << padType << "_";
+        result << "B=" << CommonTestUtils::vec2str(bias) << "_";
+        result << "B=" << CommonTestUtils::vec2str(transpBias) << "_";
+        result << "MP=" << CommonTestUtils::vec2str(maxpool) << "_";
+        result << "netPRC=" << netPrecision.name() << "_";
+        result << "targetDevice=" << targetDevice << "_";
+        for (auto const& configItem : configuration) {
+            result << "_configItem=" << configItem.first << "_" << configItem.second;
+        }
+        return result.str();
+    }
+
+protected:
+    void SetUp() override {
+        convSpecificParams convParams;
+        InferenceEngine::Precision netPrecision;
+        std::vector<size_t> inputShape;
+        modelType model;
+        std::tie(convParams, netPrecision, targetDevice, configuration, inputShape, model) = this->GetParam();
+        op::PadType padType;
+        InferenceEngine::SizeVector kernel, stride, dilation, bias, transpBias, maxpool;
+        std::vector<ptrdiff_t> padBegin, padEnd;
+        size_t numOutChannels;
+        std::tie(kernel, stride, padBegin, padEnd, dilation, numOutChannels, padType, bias, transpBias, maxpool) = convParams;
+        auto ngPrc = FuncTestUtils::PrecisionUtils::convertIE2nGraphPrc(netPrecision);
+
+        Shape bias_shape{ bias };
+        Shape transp_bias_shape{ transpBias };
+        Shape maxpool_shape{ maxpool };
+        std::vector<float> bias_weights{};
+
+        auto input = builder::makeParams(ngPrc, { inputShape });
+        auto transpose_in_order = op::Constant::create(element::i64, Shape{ 4 }, { 0, 3, 1, 2 });
+        auto transpose_in = std::make_shared<Transpose>(input[0], transpose_in_order);
+        auto filter_size = std::accumulate(std::begin(kernel), std::end(kernel), 1, std::multiplies<size_t>());
+        auto filter_weights = CommonTestUtils::generate_float_numbers(numOutChannels * inputShape[3] * filter_size, -0.5f, 0.5f);
+        auto conv = builder::makeConvolution(transpose_in, ngPrc, kernel, stride, padBegin,
+            padEnd, dilation, padType, numOutChannels, false, filter_weights);
+        auto transpose_out_order = op::Constant::create(element::i64, Shape{ 4 }, { 0, 2, 3, 1 });
+        auto bias_const = builder::makeConstant(ngPrc, bias_shape, bias_weights, true);
+        std::shared_ptr<Node> last_op = std::make_shared<Transpose>(conv, transpose_out_order);;
+
+        switch (model) {
+        case modelType::TranspConvBcastAddTransp:
+        {
+            auto bias = std::make_shared<Add>(conv, bias_const);
+            last_op = std::make_shared<Transpose>(bias, transpose_out_order);
+        }
+        break;
+
+        case modelType::TranspConvBcastAddMaxPoolTransp:
+        {
+            auto bcast_add = std::make_shared<Add>(conv, bias_const);
+            auto maxpool = std::make_shared<MaxPool>(bcast_add, Strides{ 1, 1 }, Shape{ 0, 0 }, Shape{ 0, 0 }, maxpool_shape);
+            last_op = std::make_shared<Transpose>(maxpool, transpose_out_order);
+        }
+        break;
+
+        case modelType::TranspConvBcastAddActTransp:
+        {
+            auto bcast_add = std::make_shared<Add>(conv, bias_const);
+            auto activation = std::make_shared<Relu>(bcast_add);
+            last_op = std::make_shared<Transpose>(activation, transpose_out_order);
+        }
+        break;
+
+        case modelType::TranspConvBcastAddMaxPoolActTransp:
+        {
+            auto bcast_add = std::make_shared<Add>(conv, bias_const);
+            auto max_pool = std::make_shared<MaxPool>(bcast_add, Strides{ 1, 1 }, Shape{ 0, 0 }, Shape{ 0, 0 }, maxpool_shape);
+            auto activation = std::make_shared<Relu>(max_pool);
+            last_op = std::make_shared<Transpose>(activation, transpose_out_order);
+        }
+        break;
+
+        case modelType::TranspConvTranspBcastAdd:
+        {
+            bias_const = std::make_shared<Constant>(ngPrc, transp_bias_shape);
+            last_op = std::make_shared<Add>(last_op, bias_const);
+        }
+        break;
+
+        case modelType::TranspConvTranspBcastAddAct:
+        {
+            bias_const = builder::makeConstant(ngPrc, transp_bias_shape, bias_weights, true);
+            auto bcast_add = std::make_shared<Add>(last_op, bias_const);
+            last_op = std::make_shared<Relu>(bcast_add);
+        }
+        break;
+
+        case modelType::TranspConvTransp:
+        default:
+            break;
+        }
+
+        function = std::make_shared<Function>(NodeVector{ last_op }, ParameterVector{ input });
+    }
+};
+
+class GnaPadded2Valid2DConvTest : public Padded2ValidConvTest, GnaLayerTestCheck {
+protected:
+    void Run() override {
+        GnaLayerTestCheck::SkipTestCheck();
+
+        if (!GnaLayerTestCheck::skipTest) {
+            Padded2ValidConvTest::Run();
+        }
+    }
+
+    void SetUp() override {
+        Padded2ValidConvTest::SetUp();
+    }
+};
+
+TEST_P(Padded2ValidConvTest, CompareWithRefs) {
+    Run();
+}
+
+TEST_P(GnaPadded2Valid2DConvTest, CompareWithRefs) {
+    Run();
+}
+
+const std::vector<InferenceEngine::Precision> netPrecisions = {
+    InferenceEngine::Precision::FP32,
+    //TODO: some tests fail for FP16
+    //InferenceEngine::Precision::FP16
+};
+
+const std::vector<std::map<std::string, std::string>> configs = {
+    {
+        {"GNA_DEVICE_MODE", "GNA_SW_EXACT"},
+        {"GNA_SCALE_FACTOR_0", "1"}
+    }
+};
+
+const std::vector<op::PadType> padTypes = {
+        op::PadType::EXPLICIT,
+        op::PadType::SAME_LOWER,
+        //TODO: SAME_UPPER fails for 1d conv
+        //op::PadType::SAME_UPPER,
+        op::PadType::VALID
+};
+
+const std::vector<modelType> models = {
+    modelType::TranspConvTransp,
+    modelType::TranspConvBcastAddTransp,
+    //TODO: this model fails for 1d conv
+    //modelType::TranspConvBcastAddMaxPoolTransp,
+    //TODO: disabled models fail with result comparison check
+    //modelType::TranspConvBcastAddActTransp,
+    //modelType::TranspConvBcastAddMaxPoolActTransp,
+    modelType::TranspConvTranspBcastAdd,
+    //modelType::TranspConvTranspBcastAddAct
+};
+
+const std::vector<std::vector<size_t>> input1DNHWC = { {1, 1, 16, 8} };
+const std::vector<std::vector<size_t >> kernels1D = { {1, 2}, {1, 3} //TODO: {1, 4} fails on result comparison for 1d conv
+};
+const std::vector<std::vector<size_t >> strides1D = { {1, 1} };
+const std::vector<std::vector<ptrdiff_t>> padBegins1D = { {0, 2} };
+const std::vector<std::vector<ptrdiff_t>> padEnds1D = { {0, 3} };
+const std::vector<std::vector<size_t >> dilations1D = { {1, 1} };
+const std::vector<size_t> numOutChannels1D = { 4 };
+const std::vector<std::vector<size_t >> biases1D = { {1, 4, 1, 1} };
+const std::vector<std::vector<size_t >> transp_biases1D = { {1, 1, 1, 4} };
+const std::vector<std::vector<size_t >> maxpools1D = { {1, 2} };
+
+const std::vector<std::vector<size_t>> input2DNHWC = { {1, 16, 16, 32} };
+const std::vector<std::vector<size_t >> kernels2D = { {2, 2}, {4, 1}, {1, 3} };
+//TODO: strides other than {1, 1} fail on result comparison for 2d conv
+const std::vector<std::vector<size_t >> strides2D = { {1, 1} };
+const std::vector<std::vector<ptrdiff_t>> padBegins2D = { {1, 2} };
+const std::vector<std::vector<ptrdiff_t>> padEnds2D = { {3, 1} };
+const std::vector<std::vector<size_t >> dilations2D = { {1, 1} };
+const std::vector<size_t> numOutChannels2D = { 32 };
+const std::vector<std::vector<size_t >> biases2D = { {1, 32, 1, 1} };
+const std::vector<std::vector<size_t >> transp_biases2D = { {1, 1, 1, 32} };
+const std::vector<std::vector<size_t >> maxpools2D = { {2, 2} };
+
+const auto conv1DParams = ::testing::Combine(
+    ::testing::ValuesIn(kernels1D),
+    ::testing::ValuesIn(strides1D),
+    ::testing::ValuesIn(padBegins1D),
+    ::testing::ValuesIn(padEnds1D),
+    ::testing::ValuesIn(dilations1D),
+    ::testing::ValuesIn(numOutChannels1D),
+    ::testing::ValuesIn(padTypes),
+    ::testing::ValuesIn(biases1D),
+    ::testing::ValuesIn(transp_biases1D),
+    ::testing::ValuesIn(maxpools1D)
+);
+
+const auto conv2DParams = ::testing::Combine(
+    ::testing::ValuesIn(kernels2D),
+    ::testing::ValuesIn(strides2D),
+    ::testing::ValuesIn(padBegins2D),
+    ::testing::ValuesIn(padEnds2D),
+    ::testing::ValuesIn(dilations2D),
+    ::testing::ValuesIn(numOutChannels2D),
+    ::testing::ValuesIn(padTypes),
+    ::testing::ValuesIn(biases2D),
+    ::testing::ValuesIn(transp_biases2D),
+    ::testing::ValuesIn(maxpools2D)
+);
+
+INSTANTIATE_TEST_CASE_P(smoke_1DTranspConvTransp, Padded2ValidConvTest,
+    ::testing::Combine(
+        conv1DParams,
+        ::testing::ValuesIn(netPrecisions),
+        ::testing::Values(CommonTestUtils::DEVICE_GNA),
+        ::testing::ValuesIn(configs),
+        ::testing::ValuesIn(input1DNHWC),
+        ::testing::ValuesIn(models)),
+    Padded2ValidConvTest::getTestCaseName);
+
+INSTANTIATE_TEST_CASE_P(smoke_2DTranspConvTransp, GnaPadded2Valid2DConvTest,
+    ::testing::Combine(
+        conv2DParams,
+        ::testing::ValuesIn(netPrecisions),
+        ::testing::Values(CommonTestUtils::DEVICE_GNA),
+        ::testing::ValuesIn(configs),
+        ::testing::ValuesIn(input2DNHWC),
+        ::testing::ValuesIn(models)),
+    GnaPadded2Valid2DConvTest::getTestCaseName);
+
+} // namespace LayerTestsDefinitions

From 0face0e7cbbf52f849c86c36e509c5ccbdf05295 Mon Sep 17 00:00:00 2001
From: Krzysztof Bruniecki <krzysztof.bruniecki@intel.com>
Date: Tue, 18 May 2021 13:32:48 +0200
Subject: [PATCH 14/16] Use compile target in export tests for SUE (#5594)

---
 .../src/gna_plugin/gna_device.cpp             | 46 +++++++++++++------
 .../src/gna_plugin/gna_device.hpp             |  4 +-
 .../behavior/infer_request_config.cpp         |  4 +-
 3 files changed, 38 insertions(+), 16 deletions(-)

diff --git a/inference-engine/src/gna_plugin/gna_device.cpp b/inference-engine/src/gna_plugin/gna_device.cpp
index 01e0d7e80befb9..cbfc47f57aab0b 100644
--- a/inference-engine/src/gna_plugin/gna_device.cpp
+++ b/inference-engine/src/gna_plugin/gna_device.cpp
@@ -156,24 +156,42 @@ void GNADeviceHelper::releaseModel(const uint32_t model_id) {
 }
 
 bool GNADeviceHelper::enforceLegacyCnnNeeded() const {
-    auto devVersion = getExecutionTargetDevice();
-    return isGnaLibVersion2_1 && isUpTo20HwGnaDevice(devVersion);
+    const auto compileTargetDevice = getTargetDevice(false);
+    return isGnaLibVersion2_1 && isUpTo20HwGnaDevice(compileTargetDevice);
 }
 
-Gna2DeviceVersion GNADeviceHelper::getExecutionTargetDevice() const {
+namespace {
     const volatile auto Gna2DeviceVersion3_0 = static_cast<Gna2DeviceVersion>(0x30);
-    if (executionTarget.empty()) {
-        if (detectedGnaDevVersion == Gna2DeviceVersionSoftwareEmulation)
-            return isGnaLibVersion2_1 ? Gna2DeviceVersion3_0 : Gna2DeviceVersion2_0;
-        return detectedGnaDevVersion;
-    } else if (executionTarget == InferenceEngine::GNAConfigParams::GNA_TARGET_3_0) {
+} // namespace
+
+Gna2DeviceVersion GNADeviceHelper::parseDeclaredTarget(std::string target, const bool execTarget) const {
+    auto parsed = Gna2DeviceVersion2_0;
+    auto throwUnsupportedGnaTarget = [&](std::string extraSuffix) {
+        auto key = execTarget ? InferenceEngine::GNAConfigParams::KEY_GNA_EXEC_TARGET : InferenceEngine::GNAConfigParams::KEY_GNA_COMPILE_TARGET;
+        THROW_GNA_EXCEPTION << "Unsupported " << key << " = \"" << target << "\"" << extraSuffix;
+    };
+    if (target == InferenceEngine::GNAConfigParams::GNA_TARGET_3_0) {
         if (!isGnaLibVersion2_1)
-            THROW_GNA_EXCEPTION << "Unsupported GNA execution target " << executionTarget << " when GNA Library version is 2.0.X.Y";
-        return Gna2DeviceVersion3_0;
-    } else if (executionTarget == InferenceEngine::GNAConfigParams::GNA_TARGET_2_0) {
-        return Gna2DeviceVersion2_0;
+            throwUnsupportedGnaTarget(", when GNA Library version is 2.0.X.Y");
+        parsed = Gna2DeviceVersion3_0;
+    } else if (target != InferenceEngine::GNAConfigParams::GNA_TARGET_2_0) {
+        throwUnsupportedGnaTarget("");
     }
-    THROW_GNA_EXCEPTION << "Unknown execution target: \"" << executionTarget << "\"";
+    return parsed;
+}
+
+Gna2DeviceVersion GNADeviceHelper::getDefaultTarget() const {
+    if (detectedGnaDevVersion == Gna2DeviceVersionSoftwareEmulation)
+        return isGnaLibVersion2_1 ? Gna2DeviceVersion3_0 : Gna2DeviceVersion2_0;
+    return detectedGnaDevVersion;
+}
+
+Gna2DeviceVersion GNADeviceHelper::getTargetDevice(const bool execTarget) const {
+    const auto declared = execTarget ? executionTarget : compileTarget;
+    if (declared.empty()) {
+        return execTarget ? getDefaultTarget() : getTargetDevice(true);
+    }
+    return parseDeclaredTarget(declared, execTarget);
 }
 
 uint32_t GNADeviceHelper::createRequestConfig(const uint32_t model_id) {
@@ -186,7 +204,7 @@ uint32_t GNADeviceHelper::createRequestConfig(const uint32_t model_id) {
     // (bit exactly) as on the selected GNA execution target generation.
     // See the GNA Plugin's GNA_EXEC_TARGET config option description.
     if (swExactMode) {
-        const auto consistentDevice = getExecutionTargetDevice();
+        const auto consistentDevice = getTargetDevice(true);
         status = Gna2RequestConfigEnableHardwareConsistency(reqConfId, consistentDevice);
         checkGna2Status(status, "Gna2RequestConfigEnableHardwareConsistency(" + std::to_string(static_cast<long>(consistentDevice)) + ")");
     }
diff --git a/inference-engine/src/gna_plugin/gna_device.hpp b/inference-engine/src/gna_plugin/gna_device.hpp
index 831b9fde517352..e032e5532dafc3 100644
--- a/inference-engine/src/gna_plugin/gna_device.hpp
+++ b/inference-engine/src/gna_plugin/gna_device.hpp
@@ -145,7 +145,6 @@ class GNADeviceHelper {
         return dev <= Gna2DeviceVersion2_0 && isGnaHw(dev);
     }
     bool enforceLegacyCnnNeeded() const;
-    Gna2DeviceVersion getExecutionTargetDevice() const;
     static void checkGna2Status(Gna2Status status, const std::string& from);
     static void checkGna2Status(Gna2Status status, const Gna2Model& gnaModel);
 #endif
@@ -197,6 +196,9 @@ class GNADeviceHelper {
     static const std::map <const std::pair<Gna2OperationType, int32_t>, const std::string > operandTypes;
 
     static void enforceLegacyCnns(Gna2Model& gnaModel);
+    Gna2DeviceVersion parseDeclaredTarget(std::string target, const bool execTarget) const;
+    Gna2DeviceVersion getDefaultTarget() const;
+    Gna2DeviceVersion getTargetDevice(bool execTarget) const;
 #endif
     void setOMPThreads(uint8_t const n_threads);
 
diff --git a/inference-engine/tests/functional/plugin/gna/shared_tests_instances/behavior/infer_request_config.cpp b/inference-engine/tests/functional/plugin/gna/shared_tests_instances/behavior/infer_request_config.cpp
index 2d502168f46d42..62d2a94ffa81da 100644
--- a/inference-engine/tests/functional/plugin/gna/shared_tests_instances/behavior/infer_request_config.cpp
+++ b/inference-engine/tests/functional/plugin/gna/shared_tests_instances/behavior/infer_request_config.cpp
@@ -19,7 +19,9 @@ namespace {
     const std::vector<std::map<std::string, std::string>> Inconfigs = {
             {{InferenceEngine::GNAConfigParams::KEY_GNA_SCALE_FACTOR, "1.0"}},
             {{InferenceEngine::GNAConfigParams::KEY_GNA_PRECISION, "I8"}},
-            {{InferenceEngine::GNAConfigParams::KEY_GNA_FIRMWARE_MODEL_IMAGE, "gfile"}},
+            {{InferenceEngine::GNAConfigParams::KEY_GNA_FIRMWARE_MODEL_IMAGE, "gfile"},
+             {InferenceEngine::GNAConfigParams::KEY_GNA_EXEC_TARGET, InferenceEngine::GNAConfigParams::GNA_TARGET_2_0},
+             {InferenceEngine::GNAConfigParams::KEY_GNA_COMPILE_TARGET, InferenceEngine::GNAConfigParams::GNA_TARGET_2_0}},
             {{InferenceEngine::GNAConfigParams::KEY_GNA_DEVICE_MODE, InferenceEngine::GNAConfigParams::GNA_AUTO}},
             {{InferenceEngine::GNAConfigParams::KEY_GNA_DEVICE_MODE, InferenceEngine::GNAConfigParams::GNA_SW_FP32}},
             {{InferenceEngine::GNAConfigParams::KEY_GNA_DEVICE_MODE, InferenceEngine::GNAConfigParams::GNA_SW}},

From 49a8714ee5c7470dbacdd7298ead77ba038f035a Mon Sep 17 00:00:00 2001
From: Vladislav Golubev <vladislav.golubev@intel.com>
Date: Tue, 18 May 2021 14:40:13 +0300
Subject: [PATCH 15/16] [CPU] FakeQuantize: new cases support (#5497)

---
 .../nodes/mkldnn_fake_quantize_node.cpp       | 26 +++++++++----------
 .../single_layer_tests/fake_quantize.cpp      | 23 ++++++++++++++++
 2 files changed, 36 insertions(+), 13 deletions(-)

diff --git a/inference-engine/src/mkldnn_plugin/nodes/mkldnn_fake_quantize_node.cpp b/inference-engine/src/mkldnn_plugin/nodes/mkldnn_fake_quantize_node.cpp
index 0a5ad38507bda5..b12bed6a47672b 100644
--- a/inference-engine/src/mkldnn_plugin/nodes/mkldnn_fake_quantize_node.cpp
+++ b/inference-engine/src/mkldnn_plugin/nodes/mkldnn_fake_quantize_node.cpp
@@ -10,6 +10,7 @@
 #include <mkldnn_types.h>
 #include <mkldnn_extension_utils.h>
 #include "utils/general_utils.h"
+#include "utils/cpu_utils.hpp"
 
 #include <algorithm>
 #include <set>
@@ -841,7 +842,7 @@ bool MKLDNNFakeQuantizeNode::isSupportedOperation(const std::shared_ptr<const ng
         }
         for (size_t i = 1; i < fq->get_input_size(); i++) {
             size_t count_not_unit_axis = 0;
-            auto shape = fq->get_input_shape(i);
+            auto shape = getNormalizedDimsBySize(fq->get_input_shape(i), fq->get_input_shape(0).size());
 
             if (ngraph::shape_size(shape) != 1) {
                 size_t not_unit_axis = 0;
@@ -885,9 +886,7 @@ MKLDNNFakeQuantizeNode::MKLDNNFakeQuantizeNode(const std::shared_ptr<ngraph::Nod
         if (fq->get_output_size() != 1)
             IE_THROW() << errorPrefix << "has incorrect number of output edges: " << fq->get_output_size();
 
-        auto initAxisIdx = [&](size_t edgeIdx) {
-            const auto &inputDims = fq->get_input_shape(edgeIdx);
-
+        auto initAxisIdx = [&](const ngraph::Shape& inputDims) {
             size_t axisIdx = 0;
             for (int i = 1; i < inputDims.size(); i++) {
                 if (inputDims[i] > 1) {
@@ -898,35 +897,36 @@ MKLDNNFakeQuantizeNode::MKLDNNFakeQuantizeNode(const std::shared_ptr<ngraph::Nod
             return axisIdx;
         };
 
-        axis = fq->get_input_shape(0).size() == 1 ? 0 : 1;
+        const size_t dataNDims = fq->get_input_shape(0).size();
+        axis = dataNDims == 1 ? 0 : 1;
         int axisSize = -1;
 
-        auto inputLowAxis = initAxisIdx(1);
-        const auto ilShape = fq->get_input_shape(1);
+        const auto ilShape = getNormalizedDimsBySize(fq->get_input_shape(1), dataNDims);
+        auto inputLowAxis = initAxisIdx(ilShape);
         isInputLowBroadcasted = (ngraph::is_scalar(ilShape) || ilShape[inputLowAxis] == 1);
         if (!isInputLowBroadcasted) {
             axis = inputLowAxis;
             axisSize = ilShape[inputLowAxis];
         }
 
-        auto inputHighAxis = initAxisIdx(2);
-        const auto ihShape = fq->get_input_shape(2);
+        const auto ihShape = getNormalizedDimsBySize(fq->get_input_shape(2), dataNDims);
+        auto inputHighAxis = initAxisIdx(ihShape);
         isInputHighBroadcasted = (ngraph::is_scalar(ihShape) || ihShape[inputHighAxis] == 1);
         if (!isInputHighBroadcasted) {
             axis = inputHighAxis;
             axisSize = ihShape[inputHighAxis];
         }
 
-        auto outputLowAxis = initAxisIdx(3);
-        const auto olShape = fq->get_input_shape(3);
+        const auto olShape = getNormalizedDimsBySize(fq->get_input_shape(3), dataNDims);
+        auto outputLowAxis = initAxisIdx(olShape);
         isOutputLowBroadcasted = (ngraph::is_scalar(olShape) || olShape[outputLowAxis] == 1);
         if (!isOutputLowBroadcasted) {
             axis = outputLowAxis;
             axisSize = olShape[outputLowAxis];
         }
 
-        auto outputHighAxis = initAxisIdx(4);
-        const auto ohShape = fq->get_input_shape(4);
+        const auto ohShape = getNormalizedDimsBySize(fq->get_input_shape(4), dataNDims);
+        auto outputHighAxis = initAxisIdx(ohShape);
         isOutputHighBroadcasted = (ngraph::is_scalar(ohShape) || ohShape[outputHighAxis] == 1);
         if (!isOutputHighBroadcasted) {
             axis = outputHighAxis;
diff --git a/inference-engine/tests/functional/plugin/cpu/shared_tests_instances/single_layer_tests/fake_quantize.cpp b/inference-engine/tests/functional/plugin/cpu/shared_tests_instances/single_layer_tests/fake_quantize.cpp
index fcaa41f72f09d3..3dbb6f5fd539b5 100644
--- a/inference-engine/tests/functional/plugin/cpu/shared_tests_instances/single_layer_tests/fake_quantize.cpp
+++ b/inference-engine/tests/functional/plugin/cpu/shared_tests_instances/single_layer_tests/fake_quantize.cpp
@@ -89,4 +89,27 @@ INSTANTIATE_TEST_CASE_P(smoke_FakeQuantizePerChannelAxis1, FakeQuantizeLayerTest
                                 ::testing::Values(CommonTestUtils::DEVICE_CPU),
                                 ::testing::Values(config)),
                         FakeQuantizeLayerTest::getTestCaseName);
+
+const std::vector<std::vector<size_t>> inputShapesPerChannel2D = {{1, 10}};
+const std::vector<std::vector<size_t>> constShapesPerChannel2D = { {10}, {1, 10}, {1} };
+const auto fqParamsPerChannel2D = ::testing::Combine(
+    ::testing::ValuesIn(levels),
+    ::testing::ValuesIn(constShapesPerChannel2D),
+    ::testing::Values(fqArgs),
+    ::testing::Values(inputParams)
+);
+
+INSTANTIATE_TEST_CASE_P(smoke_FakeQuantizePerChannel2D, FakeQuantizeLayerTest,
+    ::testing::Combine(
+        fqParamsPerChannel2D,
+        ::testing::ValuesIn(netPrecisions),
+        ::testing::Values(InferenceEngine::Precision::UNSPECIFIED),
+        ::testing::Values(InferenceEngine::Precision::UNSPECIFIED),
+        ::testing::Values(InferenceEngine::Layout::ANY),
+        ::testing::Values(InferenceEngine::Layout::ANY),
+        ::testing::ValuesIn(inputShapesPerChannel2D),
+        ::testing::Values(CommonTestUtils::DEVICE_CPU),
+        ::testing::Values(config)),
+    FakeQuantizeLayerTest::getTestCaseName);
+
 }  // namespace

From 21370c70b20576d5b8028a6d99b70eb6c861090f Mon Sep 17 00:00:00 2001
From: Dmitrii Khurtin <dmitrii.khurtin@intel.com>
Date: Tue, 18 May 2021 14:50:11 +0300
Subject: [PATCH 16/16] s/1191/1226 (#5636)

---
 inference-engine/cmake/dependencies.cmake | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/inference-engine/cmake/dependencies.cmake b/inference-engine/cmake/dependencies.cmake
index 0a0770f06ecfb0..4ce1ef3136550e 100644
--- a/inference-engine/cmake/dependencies.cmake
+++ b/inference-engine/cmake/dependencies.cmake
@@ -261,8 +261,8 @@ if (ENABLE_GNA)
             set(GNA_HASH "cc954e67525006bf8bd353a6682e38bf208f6d74e973e0fc292850e721f17452")
         endif()
         if(GNA_LIBRARY_VERSION STREQUAL "GNA2")
-            set(GNA_VERSION "02.00.00.1191.0")
-            set(GNA_HASH "a61b4a9133549b0a9f0b46d069f72906ced28bcbbe7d5c361e687645f53a1c8b")
+            set(GNA_VERSION "02.00.00.1226")
+            set(GNA_HASH "d5450af15c993e264c25ac4591a7dab44722e10d15fca4f222a1b84429d4e5b6")
         endif()
 
         set(FILES_TO_EXTRACT_LIST gna_${GNA_VERSION}/include)