From b731ce13d85fffd850e786333ac6d743178c6a6a Mon Sep 17 00:00:00 2001
From: Gleb Kazantaev <gleb.kazantaev@intel.com>
Date: Thu, 28 May 2020 16:45:48 +0300
Subject: [PATCH 01/24] Fixed NMSIE shape infer function (#648)

---
 .../transformations/src/ngraph_ops/nms_ie.cpp | 13 ++++++++---
 .../convert_nms_to_nms_ie.cpp                 | 15 +++++++++----
 .../convert_gather_to_gather_ie.cpp           |  2 ++
 .../convert_nms_to_nms_ie_test.cpp            | 22 +++++++++----------
 .../transformations/convert_topk_test.cpp     |  1 +
 5 files changed, 35 insertions(+), 18 deletions(-)
diff --git a/inference-engine/src/transformations/src/ngraph_ops/nms_ie.cpp b/inference-engine/src/transformations/src/ngraph_ops/nms_ie.cpp
index 9b7b7caf34215f..c7f696c075aa4e 100644
--- a/inference-engine/src/transformations/src/ngraph_ops/nms_ie.cpp
+++ b/inference-engine/src/transformations/src/ngraph_ops/nms_ie.cpp
@@ -34,11 +34,18 @@ std::shared_ptr<Node> op::NonMaxSuppressionIE::clone_with_new_inputs(const ngrap
 }
 
 void op::NonMaxSuppressionIE::validate_and_infer_types() {
+    auto squeeze_input = [](const Output<Node> & input) -> std::shared_ptr<Node> {
+        return std::make_shared<opset1::Squeeze>(input, opset1::Constant::create(element::i64, Shape{1}, {0}));
+    };
+
     // Calculate output shape using opset1::NonMaxSuppression
+    auto max_output_boxes_per_class = std::dynamic_pointer_cast<opset1::Constant>(input_value(2).get_node_shared_ptr());
     auto nms = std::make_shared<opset1::NonMaxSuppression>(input_value(0),
             input_value(1),
-            std::make_shared<opset1::Squeeze>(input_value(2), opset1::Constant::create(element::i64, Shape{1}, {0})),
-            std::make_shared<opset1::Squeeze>(input_value(3), opset1::Constant::create(element::i64, Shape{1}, {0})),
-            std::make_shared<opset1::Squeeze>(input_value(4), opset1::Constant::create(element::i64, Shape{1}, {0})));
+            /* second input is used for output calculation and only if it's Constant output shape won't be dynamic */
+            max_output_boxes_per_class ? opset1::Constant::create(element::i64, Shape{}, max_output_boxes_per_class->cast_vector<int64_t>()) :
+            squeeze_input(input_value(2)),
+            squeeze_input(input_value(3)),
+            squeeze_input(input_value(4)));
     set_output_type(0, nms->output(0).get_element_type(), nms->output(0).get_partial_shape());
 }
diff --git a/inference-engine/src/transformations/src/transformations/convert_opset1_to_legacy/convert_nms_to_nms_ie.cpp b/inference-engine/src/transformations/src/transformations/convert_opset1_to_legacy/convert_nms_to_nms_ie.cpp
index c50a393f098ef1..5b2e755249a556 100644
--- a/inference-engine/src/transformations/src/transformations/convert_opset1_to_legacy/convert_nms_to_nms_ie.cpp
+++ b/inference-engine/src/transformations/src/transformations/convert_opset1_to_legacy/convert_nms_to_nms_ie.cpp
@@ -42,10 +42,17 @@ void ngraph::pass::ConvertNMSToNMSIE::convert_nms_to_nms_ie() {
 
         auto new_max_per_class = nms->input_value(2);
         if (max_output_boxes_per_class_rank.get_length() == 0) {
-            new_max_per_class = std::make_shared<ngraph::op::Unsqueeze>(
-                    nms->input_value(2),
-                    opset1::Constant::create(element::i64, Shape{1}, {0}));
-            new_ops.push_back(new_max_per_class.get_node_shared_ptr());
+            // WA: we need to create Constant manually because it requires by NMS shape inference
+            //     otherwise we will get dynamic shape until first CF is executed. It can be resolved
+            //     if CF will be executed right after transformation and before Validate pass.
+            if (auto new_max_per_class_const = std::dynamic_pointer_cast<opset1::Constant>(new_max_per_class.get_node_shared_ptr())) {
+                new_max_per_class = opset1::Constant::create(element::i64, Shape{1}, new_max_per_class_const->cast_vector<int64_t>());
+            } else {
+                new_max_per_class = std::make_shared<ngraph::op::Unsqueeze>(
+                        nms->input_value(2),
+                        opset1::Constant::create(element::i64, Shape{1}, {0}));
+                new_ops.push_back(new_max_per_class.get_node_shared_ptr());
+            }
         }
         auto new_iou_threshold = nms->input_value(3);
         if (iou_threshold_rank.get_length() == 0) {
diff --git a/inference-engine/tests/functional/inference_engine/transformations/convert_gather_to_gather_ie.cpp b/inference-engine/tests/functional/inference_engine/transformations/convert_gather_to_gather_ie.cpp
index e284ebf717e592..afd0ded67e84c8 100644
--- a/inference-engine/tests/functional/inference_engine/transformations/convert_gather_to_gather_ie.cpp
+++ b/inference-engine/tests/functional/inference_engine/transformations/convert_gather_to_gather_ie.cpp
@@ -33,6 +33,7 @@ TEST(TransformationTests, ConvertGatherToGatherIEStatic1) {
         pass::InitNodeInfo().run_on_function(f);
         pass::ConvertGatherToGatherIE().run_on_function(f);
         ASSERT_NO_THROW(check_rt_info(f));
+        ASSERT_TRUE(f->get_output_partial_shape(0).is_static()) << "Shape " << f->get_output_partial_shape(0) << " should be static";
     }
 
     {
@@ -60,6 +61,7 @@ TEST(TransformationTests, ConvertGatherToGatherIEStatic2) {
         pass::InitNodeInfo().run_on_function(f);
         pass::ConvertGatherToGatherIE().run_on_function(f);
         ASSERT_NO_THROW(check_rt_info(f));
+        ASSERT_TRUE(f->get_output_partial_shape(0).is_static()) << "Shape " << f->get_output_partial_shape(0) << " should be static";
     }
 
     {
diff --git a/inference-engine/tests/functional/inference_engine/transformations/convert_nms_to_nms_ie_test.cpp b/inference-engine/tests/functional/inference_engine/transformations/convert_nms_to_nms_ie_test.cpp
index 860e916e8bb29e..bd19243e73c740 100644
--- a/inference-engine/tests/functional/inference_engine/transformations/convert_nms_to_nms_ie_test.cpp
+++ b/inference-engine/tests/functional/inference_engine/transformations/convert_nms_to_nms_ie_test.cpp
@@ -14,6 +14,7 @@
 #include <transformations/init_node_info.hpp>
 #include <transformations/utils/utils.hpp>
 #include <ngraph_ops/nms_ie.hpp>
+#include <ngraph/pass/constant_folding.hpp>
 
 #include "ngraph_test_utils.hpp"
 
@@ -33,25 +34,26 @@ TEST(TransformationTests, ConvertNMSToNMSIEStatic) {
 
         f = std::make_shared<Function>(NodeVector{nms}, ParameterVector{boxes, scores});
 
+        const auto & orig_shape = f->get_output_partial_shape(0);
         pass::InitNodeInfo().run_on_function(f);
         pass::ConvertNMSToNMSIE().run_on_function(f);
-        f->validate_nodes_and_infer_types();
         ASSERT_NO_THROW(check_rt_info(f));
+        ASSERT_TRUE(f->get_output_partial_shape(0).is_static()) << "Shape " << f->get_output_partial_shape(0) << " should be static";
     }
 
     {
         auto boxes = std::make_shared<opset1::Parameter>(element::f32, Shape{1, 1000, 4});
         auto scores = std::make_shared<opset1::Parameter>(element::f32, Shape{1, 1, 1000});
-        auto max_output_boxes_per_class = opset1::Constant::create(element::i64, Shape{}, {10});
+        auto max_output_boxes_per_class = opset1::Constant::create(element::i64, Shape{1}, {10});
         auto iou_threshold = opset1::Constant::create(element::f32, Shape{}, {0.75});
         auto score_threshold = opset1::Constant::create(element::f32, Shape{}, {0.7});
-        auto nms = std::make_shared<op::NonMaxSuppressionIE>(boxes, scores,
-                std::make_shared<opset1::Unsqueeze>(max_output_boxes_per_class, opset1::Constant::create(element::i64, Shape{1}, {0})),
+        auto nms = std::make_shared<op::NonMaxSuppressionIE>(boxes, scores, max_output_boxes_per_class,
                 std::make_shared<opset1::Unsqueeze>(iou_threshold, opset1::Constant::create(element::i64, Shape{1}, {0})),
                 std::make_shared<opset1::Unsqueeze>(score_threshold, opset1::Constant::create(element::i64, Shape{1}, {0})),
                 0, true);
 
         f_ref = std::make_shared<Function>(NodeVector{nms}, ParameterVector{boxes, scores});
+        ASSERT_TRUE(f_ref->get_output_partial_shape(0).is_static()) << "Shape " << f_ref->get_output_partial_shape(0) << " should be static";
     }
 
     auto res = compare_functions(f, f_ref);
@@ -80,11 +82,10 @@ TEST(TransformationTests, ConvertNMSToNMSIEDynamic1) {
     {
         auto boxes = std::make_shared<opset1::Parameter>(element::f32, PartialShape::dynamic());
         auto scores = std::make_shared<opset1::Parameter>(element::f32, PartialShape::dynamic());
-        auto max_output_boxes_per_class = opset1::Constant::create(element::i64, Shape{}, {10});
+        auto max_output_boxes_per_class = opset1::Constant::create(element::i64, Shape{1}, {10});
         auto iou_threshold = opset1::Constant::create(element::f32, Shape{}, {0.75});
         auto score_threshold = opset1::Constant::create(element::f32, Shape{}, {0.7});
-        auto nms = std::make_shared<op::NonMaxSuppressionIE>(boxes, scores,
-                std::make_shared<opset1::Unsqueeze>(max_output_boxes_per_class, opset1::Constant::create(element::i64, Shape{1}, {0})),
+        auto nms = std::make_shared<op::NonMaxSuppressionIE>(boxes, scores, max_output_boxes_per_class,
                 std::make_shared<opset1::Unsqueeze>(iou_threshold, opset1::Constant::create(element::i64, Shape{1}, {0})),
                 std::make_shared<opset1::Unsqueeze>(score_threshold, opset1::Constant::create(element::i64, Shape{1}, {0})),
                 0, true);
@@ -118,11 +119,10 @@ TEST(TransformationTests, ConvertNMSToNMSIEDynamic2) {
     {
         auto boxes = std::make_shared<opset1::Parameter>(element::f32, PartialShape{DYN, 1000, 4});
         auto scores = std::make_shared<opset1::Parameter>(element::f32, PartialShape{DYN, 1, 1000});
-        auto max_output_boxes_per_class = opset1::Constant::create(element::i64, Shape{}, {10});
+        auto max_output_boxes_per_class = opset1::Constant::create(element::i64, Shape{1}, {10});
         auto iou_threshold = opset1::Constant::create(element::f32, Shape{}, {0.75});
         auto score_threshold = opset1::Constant::create(element::f32, Shape{}, {0.7});
-        auto nms = std::make_shared<op::NonMaxSuppressionIE>(boxes, scores,
-                std::make_shared<opset1::Unsqueeze>(max_output_boxes_per_class, opset1::Constant::create(element::i64, Shape{1}, {0})),
+        auto nms = std::make_shared<op::NonMaxSuppressionIE>(boxes, scores, max_output_boxes_per_class,
                 std::make_shared<opset1::Unsqueeze>(iou_threshold, opset1::Constant::create(element::i64, Shape{1}, {0})),
                 std::make_shared<opset1::Unsqueeze>(score_threshold, opset1::Constant::create(element::i64, Shape{1}, {0})),
                 0, true);
@@ -132,4 +132,4 @@ TEST(TransformationTests, ConvertNMSToNMSIEDynamic2) {
 
     auto res = compare_functions(f, f_ref);
     ASSERT_TRUE(res.first) << res.second;
-}
\ No newline at end of file
+}
diff --git a/inference-engine/tests/functional/inference_engine/transformations/convert_topk_test.cpp b/inference-engine/tests/functional/inference_engine/transformations/convert_topk_test.cpp
index c66d71929c443a..197661f2dc3f25 100644
--- a/inference-engine/tests/functional/inference_engine/transformations/convert_topk_test.cpp
+++ b/inference-engine/tests/functional/inference_engine/transformations/convert_topk_test.cpp
@@ -33,6 +33,7 @@ TEST(TransformationTests, ConvertTopKToTopKIEStatic) {
         ngraph::pass::ConvertTopKToTopKIE().run_on_function(f);
         ASSERT_NO_THROW(check_rt_info(f));
         ngraph::pass::ConstantFolding().run_on_function(f);
+        ASSERT_TRUE(f->get_output_partial_shape(0).is_static()) << "Shape " << f->get_output_partial_shape(0) << " should be static";
     }
 
     {

From 23f41213bbe94c4135271ea39d6b2fe4c1a45efe Mon Sep 17 00:00:00 2001
From: Irina Efode <irina.efode@intel.com>
Date: Thu, 28 May 2020 17:22:19 +0300
Subject: [PATCH 02/24] [IE TESTS] MOVE plugin tests (#659)

---
 .../unit_test_utils/CMakeLists.txt            |  2 ++
 .../mocks}/mock_engine/CMakeLists.txt         |  0
 .../mocks}/mock_engine/dllmain.cpp            |  0
 .../mocks}/mock_engine/mock_plugin.cpp        |  0
 .../mocks}/mock_engine/mock_plugin.hpp        |  0
 .../unit/inference_engine/CMakeLists.txt      |  4 ++-
 .../cpp_interfaces/ie_plugin_test.cpp         |  2 +-
 .../unit/inference_engine/ie_plugin_ptr.cpp}  | 27 +++++++++++--------
 .../tests_deprecated/CMakeLists.txt           |  2 --
 9 files changed, 22 insertions(+), 15 deletions(-)
 rename inference-engine/{tests_deprecated => tests/ie_test_utils/unit_test_utils/mocks}/mock_engine/CMakeLists.txt (100%)
 rename inference-engine/{tests_deprecated => tests/ie_test_utils/unit_test_utils/mocks}/mock_engine/dllmain.cpp (100%)
 rename inference-engine/{tests_deprecated => tests/ie_test_utils/unit_test_utils/mocks}/mock_engine/mock_plugin.cpp (100%)
 rename inference-engine/{tests_deprecated => tests/ie_test_utils/unit_test_utils/mocks}/mock_engine/mock_plugin.hpp (100%)
 rename inference-engine/{tests_deprecated/unit/inference_engine_tests/inference_engine_plugin_test.cpp => tests/unit/inference_engine/ie_plugin_ptr.cpp} (78%)

diff --git a/inference-engine/tests/ie_test_utils/unit_test_utils/CMakeLists.txt b/inference-engine/tests/ie_test_utils/unit_test_utils/CMakeLists.txt
index 91076ff09ff43a..df611c700cc312 100644
--- a/inference-engine/tests/ie_test_utils/unit_test_utils/CMakeLists.txt
+++ b/inference-engine/tests/ie_test_utils/unit_test_utils/CMakeLists.txt
@@ -4,6 +4,8 @@
 
 set(TARGET_NAME unitTestUtils)
 
+add_subdirectory(mocks/mock_engine)
+
 list(APPEND EXPORT_DEPENDENCIES
         commonTestUtils_s
         inference_engine_s
diff --git a/inference-engine/tests_deprecated/mock_engine/CMakeLists.txt b/inference-engine/tests/ie_test_utils/unit_test_utils/mocks/mock_engine/CMakeLists.txt
similarity index 100%
rename from inference-engine/tests_deprecated/mock_engine/CMakeLists.txt
rename to inference-engine/tests/ie_test_utils/unit_test_utils/mocks/mock_engine/CMakeLists.txt
diff --git a/inference-engine/tests_deprecated/mock_engine/dllmain.cpp b/inference-engine/tests/ie_test_utils/unit_test_utils/mocks/mock_engine/dllmain.cpp
similarity index 100%
rename from inference-engine/tests_deprecated/mock_engine/dllmain.cpp
rename to inference-engine/tests/ie_test_utils/unit_test_utils/mocks/mock_engine/dllmain.cpp
diff --git a/inference-engine/tests_deprecated/mock_engine/mock_plugin.cpp b/inference-engine/tests/ie_test_utils/unit_test_utils/mocks/mock_engine/mock_plugin.cpp
similarity index 100%
rename from inference-engine/tests_deprecated/mock_engine/mock_plugin.cpp
rename to inference-engine/tests/ie_test_utils/unit_test_utils/mocks/mock_engine/mock_plugin.cpp
diff --git a/inference-engine/tests_deprecated/mock_engine/mock_plugin.hpp b/inference-engine/tests/ie_test_utils/unit_test_utils/mocks/mock_engine/mock_plugin.hpp
similarity index 100%
rename from inference-engine/tests_deprecated/mock_engine/mock_plugin.hpp
rename to inference-engine/tests/ie_test_utils/unit_test_utils/mocks/mock_engine/mock_plugin.hpp
diff --git a/inference-engine/tests/unit/inference_engine/CMakeLists.txt b/inference-engine/tests/unit/inference_engine/CMakeLists.txt
index 629a2cecfbb901..3eb21a4349fb6f 100644
--- a/inference-engine/tests/unit/inference_engine/CMakeLists.txt
+++ b/inference-engine/tests/unit/inference_engine/CMakeLists.txt
@@ -10,6 +10,8 @@ addIeTargetTest(
         LINK_LIBRARIES
             unitTestUtils
         ADD_CPPLINT
+        DEPENDENCIES
+            mock_engine
         LABELS
             IE
-)
\ No newline at end of file
+)
diff --git a/inference-engine/tests/unit/inference_engine/cpp_interfaces/ie_plugin_test.cpp b/inference-engine/tests/unit/inference_engine/cpp_interfaces/ie_plugin_test.cpp
index 5e6224ac3e85d0..de0097b32cf4ea 100644
--- a/inference-engine/tests/unit/inference_engine/cpp_interfaces/ie_plugin_test.cpp
+++ b/inference-engine/tests/unit/inference_engine/cpp_interfaces/ie_plugin_test.cpp
@@ -13,7 +13,6 @@
 #include "unit_test_utils/mocks/cpp_interfaces/impl/mock_inference_plugin_internal.hpp"
 #include "unit_test_utils/mocks/cpp_interfaces/impl/mock_executable_thread_safe_default.hpp"
 #include "unit_test_utils/mocks/cpp_interfaces/interface/mock_iinfer_request_internal.hpp"
-#include "unit_test_utils/mocks/mock_iinfer_request.hpp"
 
 using namespace ::testing;
 using namespace std;
@@ -163,3 +162,4 @@ TEST_F(InferenceEnginePluginInternalTest, pluginInternalEraseMagicAndNameWhenImp
     ASSERT_EQ(mockExeNetworkInternal->exportString, mock_plugin_impl->importedString);
     mock_plugin_impl->importedString = {};
 }
+
diff --git a/inference-engine/tests_deprecated/unit/inference_engine_tests/inference_engine_plugin_test.cpp b/inference-engine/tests/unit/inference_engine/ie_plugin_ptr.cpp
similarity index 78%
rename from inference-engine/tests_deprecated/unit/inference_engine_tests/inference_engine_plugin_test.cpp
rename to inference-engine/tests/unit/inference_engine/ie_plugin_ptr.cpp
index 2920d4b3f02180..a885def9322e85 100644
--- a/inference-engine/tests_deprecated/unit/inference_engine_tests/inference_engine_plugin_test.cpp
+++ b/inference-engine/tests/unit/inference_engine/ie_plugin_ptr.cpp
@@ -2,12 +2,11 @@
 // SPDX-License-Identifier: Apache-2.0
 //
 
-#include "tests_common.hpp"
-
 #include <ie_plugin_ptr.hpp>
+#include <common_test_utils/test_constants.hpp>
 #include "details/ie_so_loader.h"
 
-#include "../tests_deprecated/mock_engine/mock_plugin.hpp"
+#include "unit_test_utils/mocks/mock_engine/mock_plugin.hpp"
 #include "unit_test_utils/mocks/mock_error_listener.hpp"
 #include "unit_test_utils/mocks/mock_iinference_plugin.hpp"
 
@@ -19,20 +18,25 @@ using namespace InferenceEngine::details;
 
 IE_SUPPRESS_DEPRECATED_START
 
-class PluginTest: public TestsCommon {
+class PluginTest: public ::testing::Test {
 protected:
     unique_ptr<SharedObjectLoader> sharedObjectLoader;
     std::function<IInferencePlugin*(IInferencePlugin*)> createPluginEngineProxy;
-    InferenceEnginePluginPtr getPtr() ;
-    virtual void SetUp() {
+    InferenceEnginePluginPtr getPtr();
+
+    std::string get_mock_engine_name() {
+        std::string mockEngineName("mock_engine");
+        return CommonTestUtils::pre + mockEngineName + IE_BUILD_POSTFIX + CommonTestUtils::ext;
+    }
 
+    virtual void SetUp() {
         std::string libraryName = get_mock_engine_name();
         sharedObjectLoader.reset(new SharedObjectLoader(libraryName.c_str()));
         createPluginEngineProxy = make_std_function<IInferencePlugin*(IInferencePlugin*)>("CreatePluginEngineProxy");
     }
     template <class T>
     std::function<T> make_std_function(const std::string& functionName) {
-        std::function <T> ptr (reinterpret_cast<T*>(sharedObjectLoader->get_symbol(functionName.c_str())));
+        std::function <T> ptr(reinterpret_cast<T*>(sharedObjectLoader->get_symbol(functionName.c_str())));
         return ptr;
     }
 
@@ -43,7 +47,7 @@ class PluginTest: public TestsCommon {
 TEST_F(PluginTest, canCreatePlugin) {
     auto ptr = make_std_function<IInferencePlugin*(IInferencePlugin*)>("CreatePluginEngineProxy");
 
-    unique_ptr<IInferencePlugin, std::function<void (IInferencePlugin*)>> smart_ptr(ptr(nullptr), [](IInferencePlugin *p) {
+    unique_ptr<IInferencePlugin, std::function<void(IInferencePlugin*)>> smart_ptr(ptr(nullptr), [](IInferencePlugin *p) {
         p->Release();
     });
 
@@ -62,8 +66,7 @@ TEST_F(PluginTest, shouldThrowExceptionIfPluginNotExist) {
 
 ACTION_TEMPLATE(CallListenerWithErrorMessage,
                 HAS_1_TEMPLATE_PARAMS(int, k),
-                AND_1_VALUE_PARAMS(pointer))
-{
+                AND_1_VALUE_PARAMS(pointer)) {
     InferenceEngine::IErrorListener & data = ::std::get<k>(args);
     data.onError(pointer);
 }
@@ -71,7 +74,7 @@ ACTION_TEMPLATE(CallListenerWithErrorMessage,
 InferenceEnginePluginPtr PluginTest::getPtr() {
     InferenceEnginePluginPtr smart_ptr(get_mock_engine_name());
     return smart_ptr;
-};
+}
 
 TEST_F(PluginTest, canSetConfiguration) {
     InferenceEnginePluginPtr ptr = getPtr();
@@ -86,3 +89,5 @@ TEST_F(PluginTest, canSetConfiguration) {
 
     ASSERT_STREQ(reinterpret_cast<MockPlugin*>(*ptr)->config["key"].c_str(), "value");
 }
+
+IE_SUPPRESS_DEPRECATED_END
\ No newline at end of file
diff --git a/inference-engine/tests_deprecated/CMakeLists.txt b/inference-engine/tests_deprecated/CMakeLists.txt
index 5b3563e4ee0e78..66ad5012a317a2 100644
--- a/inference-engine/tests_deprecated/CMakeLists.txt
+++ b/inference-engine/tests_deprecated/CMakeLists.txt
@@ -8,8 +8,6 @@
 
 enable_testing()
 
-add_subdirectory(mock_engine)
-
 add_subdirectory(helpers)
 
 if (ENABLE_GAPI_TESTS)

From 77162bf8ee95f263e81907303e9e54cef6d4605b Mon Sep 17 00:00:00 2001
From: Andrew Bakalin <andrew.bakalin@intel.com>
Date: Thu, 28 May 2020 18:01:56 +0300
Subject: [PATCH 03/24] [VPU][Tests] Fix sanitizer issue in unit tests (#630)

---
 .../tests/unit/vpu/base/graph_transformer_tests.cpp           | 4 ++++
 1 file changed, 4 insertions(+)

diff --git a/inference-engine/tests/unit/vpu/base/graph_transformer_tests.cpp b/inference-engine/tests/unit/vpu/base/graph_transformer_tests.cpp
index 80a3f6f8fd30f4..21283429ee01bc 100644
--- a/inference-engine/tests/unit/vpu/base/graph_transformer_tests.cpp
+++ b/inference-engine/tests/unit/vpu/base/graph_transformer_tests.cpp
@@ -243,6 +243,10 @@ bool checkExecutionOrder(const Model& model, const std::vector<int>& execOrder)
     auto it = execOrder.begin();
 
     for (const auto& stage : model->getStages()) {
+        if (it == execOrder.end()) {
+            return true;
+        }
+
         if (stage->id() == *it) {
             ++it;
         }

From 33aca7d2c48b91b19728305fffafa75ddf62bc38 Mon Sep 17 00:00:00 2001
From: Vladimir Gavrilov <vladimir.gavrilov@intel.com>
Date: Thu, 28 May 2020 18:08:24 +0300
Subject: [PATCH 04/24] SplitConcatPairToInterpolate inserts Interpolate when
 input is 2D (#596)

* SplitConcatPairToInterpolate transformation was moved to middle stage and is applied only for 4D and 5D inputs.
---
 model-optimizer/automation/package_BOM.txt    |   2 +-
 .../tf/SplitConcatPairToInterpolate_test.py   | 412 -----------------
 .../SplitConcatPairToInterpolate.py           |  39 +-
 .../SplitConcatPairToInterpolate_test.py      | 427 ++++++++++++++++++
 4 files changed, 452 insertions(+), 428 deletions(-)
 delete mode 100644 model-optimizer/extensions/front/tf/SplitConcatPairToInterpolate_test.py
 rename model-optimizer/extensions/{front/tf => middle}/SplitConcatPairToInterpolate.py (81%)
 create mode 100644 model-optimizer/extensions/middle/SplitConcatPairToInterpolate_test.py

diff --git a/model-optimizer/automation/package_BOM.txt b/model-optimizer/automation/package_BOM.txt
index 5b5143859094e3..e4da215ad3cda9 100644
--- a/model-optimizer/automation/package_BOM.txt
+++ b/model-optimizer/automation/package_BOM.txt
@@ -436,7 +436,6 @@ extensions/front/tf/sparse_segment_sum_ext.py
 extensions/front/tf/sparse_to_dense_ext.py
 extensions/front/tf/sparse_weighted_sum.py
 extensions/front/tf/split_ext.py
-extensions/front/tf/SplitConcatPairToInterpolate.py
 extensions/front/tf/ssd_support.json
 extensions/front/tf/ssd_support_api_v1.14.json
 extensions/front/tf/ssd_support_api_v1.15.json
@@ -568,6 +567,7 @@ extensions/middle/SliceConverter.py
 extensions/middle/SliceLikeToStridedSlice.py
 extensions/middle/space_to_depth.py
 extensions/middle/sparse_reshape.py
+extensions/middle/SplitConcatPairToInterpolate.py
 extensions/middle/ssd_anchors_to_const.py
 extensions/middle/SwapAxesMiddleReplacer.py
 extensions/middle/TensorIterator_utils.py
diff --git a/model-optimizer/extensions/front/tf/SplitConcatPairToInterpolate_test.py b/model-optimizer/extensions/front/tf/SplitConcatPairToInterpolate_test.py
deleted file mode 100644
index 6eb9e5fee41b82..00000000000000
--- a/model-optimizer/extensions/front/tf/SplitConcatPairToInterpolate_test.py
+++ /dev/null
@@ -1,412 +0,0 @@
-"""
- Copyright (c) 2020 Intel Corporation
-
- Licensed under the Apache License, Version 2.0 (the "License");
- you may not use this file except in compliance with the License.
- You may obtain a copy of the License at
-
-      http://www.apache.org/licenses/LICENSE-2.0
-
- Unless required by applicable law or agreed to in writing, software
- distributed under the License is distributed on an "AS IS" BASIS,
- WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- See the License for the specific language governing permissions and
- limitations under the License.
-"""
-
-
-import unittest
-
-import numpy as np
-
-from extensions.front.tf.SplitConcatPairToInterpolate import SplitConcatPairToInterpolate
-from mo.front.common.partial_infer.utils import int64_array
-from mo.utils.ir_engine.compare_graphs import compare_graphs
-from mo.utils.unittest.graph import build_graph
-
-graph_node_attrs_for_2d_spatial_case = {
-        'placeholder': {'type': 'Parameter', 'kind': 'op', 'op': 'Parameter'},
-        'placeholder_data': {
-            'value': None,
-            'shape': int64_array([1, 100, 120, 150]),
-            'kind': 'data',
-            'data_type': None
-        },
-        'split': {'type': 'Split', 'kind': 'op', 'op': 'Split', 'num_splits': 3},
-        'split_axis_const': {
-            'kind': 'op',
-            'value': np.array(3, dtype=np.int64),
-            'op': 'Const',
-            'type': 'Const'
-        },
-        'split_axis_const_data': {'value': None, 'shape': np.array(3, dtype=np.int64).shape, 'kind': 'data'},
-        'concat': {'type': 'Concat', 'kind': 'op', 'axis': 3},
-        'split_data_0': {'value': None, 'shape': int64_array([1, 100, 120, 50]), 'kind': 'data'},
-        'split_data_1': {'value': None, 'shape': int64_array([1, 100, 120, 50]), 'kind': 'data'},
-        'split_data_2': {'value': None, 'shape': int64_array([1, 100, 120, 50]), 'kind': 'data'},
-        'concat_data': {'value': None, 'shape': int64_array([1, 100, 120, 300]), 'kind': 'data'},
-        'abs': {'type': 'Abs', 'kind': 'op', 'op': 'Abs'},
-        'abs_data': {'value': None, 'shape': int64_array([1, 100, 120, 300]), 'kind': 'data'},
-        'output': {'kind': 'op', 'op': 'Result'},
-    }
-
-
-graph_node_attrs_for_3d_spatial_case = {
-        'placeholder': {'type': 'Parameter', 'kind': 'op', 'op': 'Parameter'},
-        'placeholder_data': {
-            'value': None,
-            'shape': int64_array([1, 3, 100, 120, 150]),
-            'kind': 'data',
-            'data_type': None
-        },
-        'split': {'type': 'Split', 'kind': 'op', 'op': 'Split', 'num_splits': 3},
-        'split_axis_const': {
-            'kind': 'op',
-            'value': np.array(4, dtype=np.int64),
-            'op': 'Const',
-            'type': 'Const'
-        },
-        'split_axis_const_data': {'value': None, 'shape': np.array(4, dtype=np.int64).shape, 'kind': 'data'},
-        'concat': {'type': 'Concat', 'kind': 'op', 'axis': 4},
-        'split_data_0': {'value': None, 'shape': int64_array([1, 3, 100, 120, 50]), 'kind': 'data'},
-        'split_data_1': {'value': None, 'shape': int64_array([1, 3, 100, 120, 50]), 'kind': 'data'},
-        'split_data_2': {'value': None, 'shape': int64_array([1, 3, 100, 120, 50]), 'kind': 'data'},
-        'concat_data': {'value': None, 'shape': int64_array([1, 3, 100, 120, 300]), 'kind': 'data'},
-        'abs': {'type': 'Abs', 'kind': 'op', 'op': 'Abs'},
-        'abs_data': {'value': None, 'shape': int64_array([1, 3, 100, 120, 300]), 'kind': 'data'},
-        'output': {'kind': 'op', 'op': 'Result'},
-    }
-
-
-graph_edges = [
-        ('placeholder', 'placeholder_data'),
-        ('placeholder_data', 'split', {'in': 0}),
-        ('split_axis_const', 'split_axis_const_data'),
-        ('split_axis_const_data', 'split', {'in': 1}),
-        ('split', 'split_data_0', {'out': 0}),
-        ('split', 'split_data_1', {'out': 1}),
-        ('split', 'split_data_2', {'out': 2}),
-        ('split_data_0', 'concat', {'in': 0}),
-        ('split_data_0', 'concat', {'in': 1}),
-        ('split_data_1', 'concat', {'in': 2}),
-        ('split_data_1', 'concat', {'in': 3}),
-        ('split_data_2', 'concat', {'in': 4}),
-        ('split_data_2', 'concat', {'in': 5}),
-        ('concat', 'concat_data'),
-        ('concat_data', 'abs'),
-        ('abs', 'abs_data'),
-        ('abs_data', 'output')
-    ]
-
-
-ref_graph_edges = [
-        ('placeholder', 'placeholder_data'),
-        ('placeholder_data', 'interpolate', {'in': 0}),
-        ('placeholder_data', 'shape'),
-        ('shape', 'sslice', {'in': 0}),
-        ('slice_begin', 'sslice', {'in': 1}),
-        ('slice_end', 'sslice', {'in': 2}),
-        ('sslice', 'sslice_data'),
-        ('scales', 'scales_data'),
-        ('sslice_data', 'mul', {'in': 0}),
-        ('scales_data', 'mul', {'in': 1}),
-        ('mul', 'mul_data'),
-        ('mul_data', 'interpolate', {'in': 1}),
-        ('interpolate', 'interpolate_data'),
-        ('interpolate_data', 'abs'),
-        ('abs', 'abs_data'),
-        ('abs_data', 'output'),
-    ]
-
-
-ref_graph_node_attrs_for_2d_spatial_case_1 = {
-        'placeholder': {'type': 'Parameter', 'kind': 'op', 'op': 'Parameter'},
-        'placeholder_data': {
-            'value': None,
-            'shape': int64_array([1, 100, 120, 150]),
-            'kind': 'data',
-            'data_type': None
-        },
-        'interpolate': {
-            'type': 'Interpolate',
-            'kind': 'op',
-            'op': 'Interpolate',
-            'axes': int64_array([3]),
-            'mode': 'nearest'
-        },
-        'shape': {'type': 'ShapeOf', 'kind': 'op', 'op': 'ShapeOf'},
-        'slice_begin': {
-            'type': 'Const',
-            'op': 'Const',
-            'kind': 'op',
-            'value': int64_array([3]),
-            'shape': int64_array([1])
-        },
-        'slice_end': {'type': 'Const', 'op': 'Const', 'kind': 'op', 'value': int64_array([4])},
-        'sslice': {
-            'kind': 'op',
-            'type': 'StridedSlice',
-            'op': 'StridedSlice',
-            'begin_mask': int64_array([1]),
-            'end_mask': int64_array([1]),
-            'new_axis_mask': int64_array([0]),
-            'shrink_axis_mask': int64_array([0]),
-            'ellipsis_mask': int64_array([0]),
-        },
-        'sslice_data': {'kind': 'data', 'shape': None},
-        'scales': {
-            'type': 'Const',
-            'op': 'Const',
-            'kind': 'op',
-            'value': int64_array([2]),
-            'shape': int64_array([1])
-        },
-        'scales_data': {'kind': 'data', 'shape': None},
-        'mul': {'kind': 'op', 'op': 'Mul', 'type': 'Multiply'},
-        'mul_data': {'kind': 'data', 'shape': None},
-        'interpolate_data': {'value': None, 'shape': int64_array([1, 100, 120, 300]), 'kind': 'data'},
-        'abs': {'type': 'Abs', 'kind': 'op', 'op': 'Abs'},
-        'abs_data': {'value': None, 'shape': int64_array([1, 100, 120, 300]), 'kind': 'data'},
-        'output': {'kind': 'op', 'op': 'Result'},
-    }
-
-ref_graph_node_attrs_for_2d_spatial_case_2 = {
-        'placeholder': {'type': 'Parameter', 'kind': 'op', 'op': 'Parameter'},
-        'placeholder_data': {
-            'value': None,
-            'shape': int64_array([1, 100, 120, 150]),
-            'kind': 'data',
-            'data_type': None
-        },
-        'interpolate': {
-            'type': 'Interpolate',
-            'kind': 'op',
-            'op': 'Interpolate',
-            'axes': int64_array([2]),
-            'mode': 'nearest'
-        },
-        'shape': {'type': 'ShapeOf', 'kind': 'op', 'op': 'ShapeOf'},
-        'slice_begin': {
-            'type': 'Const',
-            'op': 'Const',
-            'kind': 'op',
-            'value': int64_array([2]),
-            'shape': int64_array([1])
-        },
-        'slice_end': {'type': 'Const', 'op': 'Const', 'kind': 'op', 'value': int64_array([3])},
-        'sslice': {
-            'kind': 'op',
-            'type': 'StridedSlice',
-            'op': 'StridedSlice',
-            'begin_mask': int64_array([1]),
-            'end_mask': int64_array([1]),
-            'new_axis_mask': int64_array([0]),
-            'shrink_axis_mask': int64_array([0]),
-            'ellipsis_mask': int64_array([0]),
-        },
-        'sslice_data': {'kind': 'data', 'shape': None},
-        'scales': {
-            'type': 'Const',
-            'op': 'Const',
-            'kind': 'op',
-            'value': int64_array([2]),
-            'shape': int64_array([1])
-        },
-        'scales_data': {'kind': 'data', 'shape': None},
-        'mul': {'kind': 'op', 'op': 'Mul', 'type': 'Multiply'},
-        'mul_data': {'kind': 'data', 'shape': None},
-        'interpolate_data': {'value': None, 'shape': int64_array([1, 100, 240, 150]), 'kind': 'data'},
-        'abs': {'type': 'Abs', 'kind': 'op', 'op': 'Abs'},
-        'abs_data': {'value': None, 'shape': int64_array([1, 100, 240, 150]), 'kind': 'data'},
-        'output': {'kind': 'op', 'op': 'Result'},
-    }
-
-
-ref_graph_node_attrs_for_3d_spatial_case_1 = {
-        'placeholder': {'type': 'Parameter', 'kind': 'op', 'op': 'Parameter'},
-        'placeholder_data': {
-            'value': None,
-            'shape': int64_array([1, 3, 100, 120, 150]),
-            'kind': 'data',
-            'data_type': None
-        },
-        'interpolate': {
-            'type': 'Interpolate',
-            'kind': 'op',
-            'op': 'Interpolate',
-            'axes': int64_array([4]),
-            'mode': 'nearest'
-        },
-        'shape': {'type': 'ShapeOf', 'kind': 'op', 'op': 'ShapeOf'},
-        'slice_begin': {
-            'type': 'Const',
-            'op': 'Const',
-            'kind': 'op',
-            'value': int64_array([4]),
-            'shape': int64_array([1])
-        },
-        'slice_end': {'type': 'Const', 'op': 'Const', 'kind': 'op', 'value': int64_array([5])},
-        'sslice': {
-            'kind': 'op',
-            'type': 'StridedSlice',
-            'op': 'StridedSlice',
-            'begin_mask': int64_array([1]),
-            'end_mask': int64_array([1]),
-            'new_axis_mask': int64_array([0]),
-            'shrink_axis_mask': int64_array([0]),
-            'ellipsis_mask': int64_array([0]),
-        },
-        'sslice_data': {'kind': 'data', 'shape': None},
-        'scales': {
-            'type': 'Const',
-            'op': 'Const',
-            'kind': 'op',
-            'value': int64_array([2]),
-            'shape': int64_array([1])
-        },
-        'scales_data': {'kind': 'data', 'shape': None},
-        'mul': {'kind': 'op', 'op': 'Mul', 'type': 'Multiply'},
-        'mul_data': {'kind': 'data', 'shape': None},
-        'interpolate_data': {'value': None, 'shape': int64_array([1, 3, 100, 120, 300]), 'kind': 'data'},
-        'abs': {'type': 'Abs', 'kind': 'op', 'op': 'Abs'},
-        'abs_data': {'value': None, 'shape': int64_array([1, 3, 100, 120, 300]), 'kind': 'data'},
-        'output': {'kind': 'op', 'op': 'Result'},
-    }
-
-
-ref_graph_node_attrs_for_3d_spatial_case_2 = {
-        'placeholder': {'type': 'Parameter', 'kind': 'op', 'op': 'Parameter'},
-        'placeholder_data': {
-            'value': None,
-            'shape': int64_array([1, 3, 100, 120, 150]),
-            'kind': 'data',
-            'data_type': None
-        },
-        'interpolate': {
-            'type': 'Interpolate',
-            'kind': 'op',
-            'op': 'Interpolate',
-            'axes': int64_array([3]),
-            'mode': 'nearest'
-        },
-        'shape': {'type': 'ShapeOf', 'kind': 'op', 'op': 'ShapeOf'},
-        'slice_begin': {
-            'type': 'Const',
-            'op': 'Const',
-            'kind': 'op',
-            'value': int64_array([4]),
-            'shape': int64_array([1])
-        },
-        'slice_end': {'type': 'Const', 'op': 'Const', 'kind': 'op', 'value': int64_array([5])},
-        'sslice': {
-            'kind': 'op',
-            'type': 'StridedSlice',
-            'op': 'StridedSlice',
-            'begin_mask': int64_array([1]),
-            'end_mask': int64_array([1]),
-            'new_axis_mask': int64_array([0]),
-            'shrink_axis_mask': int64_array([0]),
-            'ellipsis_mask': int64_array([0]),
-        },
-        'sslice_data': {'kind': 'data', 'shape': None},
-        'scales': {
-            'type': 'Const',
-            'op': 'Const',
-            'kind': 'op',
-            'value': int64_array([2]),
-            'shape': int64_array([1])
-        },
-        'scales_data': {'kind': 'data', 'shape': None},
-        'mul': {'kind': 'op', 'op': 'Mul', 'type': 'Multiply'},
-        'mul_data': {'kind': 'data', 'shape': None},
-        'interpolate_data': {'value': None, 'shape': int64_array([1, 3, 100, 240, 150]), 'kind': 'data'},
-        'abs': {'type': 'Abs', 'kind': 'op', 'op': 'Abs'},
-        'abs_data': {'value': None, 'shape': int64_array([1, 3, 100, 240, 150]), 'kind': 'data'},
-        'output': {'kind': 'op', 'op': 'Result'},
-    }
-
-
-class SplitConcatPairToInterpolateTest(unittest.TestCase):
-    def test_spatial_2d_split_concat_1(self):
-        graph = build_graph(
-            nodes_attrs=graph_node_attrs_for_2d_spatial_case,
-            edges=graph_edges
-        )
-        ref_graph = build_graph(
-            nodes_attrs=ref_graph_node_attrs_for_2d_spatial_case_1,
-            edges=ref_graph_edges
-        )
-        SplitConcatPairToInterpolate().find_and_replace_pattern(graph)
-        (flag, resp) = compare_graphs(graph, ref_graph, 'output')
-        self.assertTrue(flag, resp)
-
-    def test_spatial_2d_split_concat_2(self):
-        graph = build_graph(
-            nodes_attrs=graph_node_attrs_for_2d_spatial_case,
-            edges=graph_edges,
-            update_attributes={
-                'split': {'type': 'Split', 'kind': 'op', 'op': 'Split', 'num_splits': 3},
-                'split_axis_const': {
-                    'kind': 'op',
-                    'value': np.array(2, dtype=np.int64),
-                    'op': 'Const',
-                    'type': 'Const'
-                },
-                'split_axis_const_data': {'value': None, 'shape': np.array(2, dtype=np.int64).shape, 'kind': 'data'},
-                'concat': {'type': 'Concat', 'kind': 'op', 'axis': 2},
-                'split_data_0': {'value': None, 'shape': int64_array([1, 100, 40, 150]), 'kind': 'data'},
-                'split_data_1': {'value': None, 'shape': int64_array([1, 100, 40, 150]), 'kind': 'data'},
-                'split_data_2': {'value': None, 'shape': int64_array([1, 100, 40, 150]), 'kind': 'data'},
-                'concat_data': {'value': None, 'shape': int64_array([1, 100, 240, 150]), 'kind': 'data'},
-                'abs_data': {'value': None, 'shape': int64_array([1, 100, 240, 150]), 'kind': 'data'},
-            }
-        )
-        ref_graph = build_graph(
-            nodes_attrs=ref_graph_node_attrs_for_2d_spatial_case_2,
-            edges=ref_graph_edges
-        )
-        SplitConcatPairToInterpolate().find_and_replace_pattern(graph)
-        (flag, resp) = compare_graphs(graph, ref_graph, 'output')
-        self.assertTrue(flag, resp)
-
-    def test_spatial_3d_split_concat_1(self):
-        graph = build_graph(
-            nodes_attrs=graph_node_attrs_for_3d_spatial_case,
-            edges=graph_edges
-        )
-        ref_graph = build_graph(
-            nodes_attrs=ref_graph_node_attrs_for_3d_spatial_case_1,
-            edges=ref_graph_edges
-        )
-        SplitConcatPairToInterpolate().find_and_replace_pattern(graph)
-        (flag, resp) = compare_graphs(graph, ref_graph, 'output')
-        self.assertTrue(flag, resp)
-
-    def test_spatial_3d_split_concat_2(self):
-        graph = build_graph(
-            nodes_attrs=graph_node_attrs_for_3d_spatial_case,
-            edges=graph_edges,
-            update_attributes={
-                'split': {'type': 'Split', 'kind': 'op', 'op': 'Split', 'num_splits': 3},
-                'split_axis_const': {
-                    'kind': 'op',
-                    'value': np.array(3, dtype=np.int64),
-                    'op': 'Const',
-                    'type': 'Const'
-                },
-                'split_axis_const_data': {'value': None, 'shape': np.array(3, dtype=np.int64).shape, 'kind': 'data'},
-                'concat': {'type': 'Concat', 'kind': 'op', 'axis': 3},
-                'split_data_0': {'value': None, 'shape': int64_array([1, 3, 100, 40, 150]), 'kind': 'data'},
-                'split_data_1': {'value': None, 'shape': int64_array([1, 3, 100, 40, 150]), 'kind': 'data'},
-                'split_data_2': {'value': None, 'shape': int64_array([1, 3, 100, 40, 150]), 'kind': 'data'},
-                'concat_data': {'value': None, 'shape': int64_array([1, 3, 100, 240, 150]), 'kind': 'data'},
-                'abs_data': {'value': None, 'shape': int64_array([1, 3, 100, 240, 150]), 'kind': 'data'},
-            }
-        )
-        ref_graph = build_graph(
-            nodes_attrs=ref_graph_node_attrs_for_3d_spatial_case_2,
-            edges=ref_graph_edges
-        )
-        SplitConcatPairToInterpolate().find_and_replace_pattern(graph)
-        (flag, resp) = compare_graphs(graph, ref_graph, 'output')
-        self.assertTrue(flag, resp)
diff --git a/model-optimizer/extensions/front/tf/SplitConcatPairToInterpolate.py b/model-optimizer/extensions/middle/SplitConcatPairToInterpolate.py
similarity index 81%
rename from model-optimizer/extensions/front/tf/SplitConcatPairToInterpolate.py
rename to model-optimizer/extensions/middle/SplitConcatPairToInterpolate.py
index f46bb9255b7d5c..b55e2f106c16f2 100644
--- a/model-optimizer/extensions/front/tf/SplitConcatPairToInterpolate.py
+++ b/model-optimizer/extensions/middle/SplitConcatPairToInterpolate.py
@@ -20,8 +20,9 @@
 from extensions.ops.elementwise import Mul
 from extensions.ops.interpolate import Interpolate
 from mo.front.common.partial_infer.utils import int64_array
-from mo.front.common.replacement import FrontReplacementSubgraph
+from mo.front.tf.graph_utils import create_op_with_const_inputs
 from mo.graph.graph import Graph, Node
+from mo.middle.replacement import MiddleReplacementPattern
 from mo.ops.const import Const
 from mo.ops.shape import Shape
 from mo.ops.strided_slice import StridedSlice
@@ -53,6 +54,9 @@ def get_concat_after_split(split: Node) -> Optional[Node]:
 
 
 def get_interpolate_pattern(split: Node) -> dict:
+    split_shape = split.in_port(0).data.get_shape()
+    if len(split_shape) not in {4, 5}:
+        return {}
     concat = get_concat_after_split(split)
     if concat is None:
         return {}
@@ -79,19 +83,19 @@ def replace_interpolate_pattern(graph: Graph, match: dict):
     mul_node = Mul(graph, dict(name=split_node_name + '/Mul_')).create_node()
     scales_node.out_port(0).connect(mul_node.in_port(1))
 
-    slice_begin = Const(graph, dict(name=split_node_name + '/slice_begin_',
-                                    value=int64_array([axis]))).create_node()
-    slice_end = Const(graph, dict(name=split_node_name + '/slice_end_',
-                                  value=int64_array([axis + 1]))).create_node()
-
-    strided_slice_node = StridedSlice(graph,
-                                      {'name': split_node_name + '/StridedSlice_',
-                                       'begin_mask': int64_array([1]),
-                                       'end_mask': int64_array([1]),
-                                       'new_axis_mask': int64_array([0]),
-                                       'shrink_axis_mask': int64_array([0]),
-                                       'ellipsis_mask': int64_array([0]),
-                                       }).create_node([shape_node, slice_begin, slice_end])
+    strided_slice_node = create_op_with_const_inputs(graph,
+                                                     StridedSlice,
+                                                     {1: int64_array([axis]), 2: int64_array([axis + 1])},
+                                                     {
+                                                        'name': split_node_name + '/StridedSlice_',
+                                                        'begin_mask': int64_array([1]),
+                                                        'end_mask': int64_array([1]),
+                                                        'new_axis_mask': int64_array([0]),
+                                                        'shrink_axis_mask': int64_array([0]),
+                                                        'ellipsis_mask': int64_array([0])
+                                                     })
+    shape_node.out_port(0).connect(strided_slice_node.in_port(0))
+
     strided_slice_node.out_port(0).connect(mul_node.in_port(0))
 
     interp_node = Interpolate(graph, dict(name=split_node_name + '/Interpolate_',
@@ -106,7 +110,7 @@ def replace_interpolate_pattern(graph: Graph, match: dict):
     split_connection.get_source().connect(shape_node.in_port(0))
 
 
-class SplitConcatPairToInterpolate(FrontReplacementSubgraph):
+class SplitConcatPairToInterpolate(MiddleReplacementPattern):
     """
     This transformation looks for Interpolation layer implemented using simple operations, i.e. Split and Concat,
     and replaces found pattern with a sequence of Shape, StridedSlice, Const, Mul, Interpolate.
@@ -146,6 +150,11 @@ class SplitConcatPairToInterpolate(FrontReplacementSubgraph):
     by number of output ports of 'split'.
     """
     enabled = True
+    force_clean_up = True
+
+    def run_before(self):
+        from extensions.middle.InterpolateSequenceToInterpolate import InterpolateSequenceToInterpolate
+        return [InterpolateSequenceToInterpolate]
 
     def find_and_replace_pattern(self, graph: Graph):
         log.debug('Enabled replacement of a pair of Split and Concat with Interpolate.')
diff --git a/model-optimizer/extensions/middle/SplitConcatPairToInterpolate_test.py b/model-optimizer/extensions/middle/SplitConcatPairToInterpolate_test.py
new file mode 100644
index 00000000000000..b7f4fac23b5f66
--- /dev/null
+++ b/model-optimizer/extensions/middle/SplitConcatPairToInterpolate_test.py
@@ -0,0 +1,427 @@
+"""
+ Copyright (c) 2020 Intel Corporation
+
+ Licensed under the Apache License, Version 2.0 (the "License");
+ you may not use this file except in compliance with the License.
+ You may obtain a copy of the License at
+
+      http://www.apache.org/licenses/LICENSE-2.0
+
+ Unless required by applicable law or agreed to in writing, software
+ distributed under the License is distributed on an "AS IS" BASIS,
+ WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ See the License for the specific language governing permissions and
+ limitations under the License.
+"""
+
+
+import unittest
+
+import numpy as np
+
+from extensions.middle.SplitConcatPairToInterpolate import SplitConcatPairToInterpolate
+from mo.front.common.partial_infer.utils import int64_array
+from mo.utils.ir_engine.compare_graphs import compare_graphs
+from mo.utils.unittest.graph import build_graph
+
+graph_node_attrs_for_2d_spatial_case = {
+    'placeholder': {'type': 'Parameter', 'kind': 'op', 'op': 'Parameter'},
+    'placeholder_data': {
+        'value': None,
+        'shape': int64_array([1, 100, 120, 150]),
+        'kind': 'data',
+        'data_type': None
+    },
+    'split': {'type': 'Split', 'kind': 'op', 'op': 'Split', 'num_splits': 3},
+    'split_axis_const': {
+        'kind': 'op',
+        'value': np.array(3, dtype=np.int64),
+        'op': 'Const',
+        'type': 'Const'
+    },
+    'split_axis_const_data': {'value': None, 'shape': np.array(3, dtype=np.int64).shape, 'kind': 'data'},
+    'concat': {'type': 'Concat', 'kind': 'op', 'axis': 3},
+    'split_data_0': {'value': None, 'shape': int64_array([1, 100, 120, 50]), 'kind': 'data'},
+    'split_data_1': {'value': None, 'shape': int64_array([1, 100, 120, 50]), 'kind': 'data'},
+    'split_data_2': {'value': None, 'shape': int64_array([1, 100, 120, 50]), 'kind': 'data'},
+    'concat_data': {'value': None, 'shape': int64_array([1, 100, 120, 300]), 'kind': 'data'},
+    'abs': {'type': 'Abs', 'kind': 'op', 'op': 'Abs'},
+    'abs_data': {'value': None, 'shape': int64_array([1, 100, 120, 300]), 'kind': 'data'},
+    'output': {'kind': 'op', 'op': 'Result'},
+}
+
+
+graph_node_attrs_for_3d_spatial_case = {
+        'placeholder': {'type': 'Parameter', 'kind': 'op', 'op': 'Parameter'},
+        'placeholder_data': {
+            'value': None,
+            'shape': int64_array([1, 3, 100, 120, 150]),
+            'kind': 'data',
+            'data_type': None
+        },
+        'split': {'type': 'Split', 'kind': 'op', 'op': 'Split', 'num_splits': 3},
+        'split_axis_const': {
+            'kind': 'op',
+            'value': np.array(4, dtype=np.int64),
+            'op': 'Const',
+            'type': 'Const'
+        },
+        'split_axis_const_data': {'value': None, 'shape': np.array(4, dtype=np.int64).shape, 'kind': 'data'},
+        'concat': {'type': 'Concat', 'kind': 'op', 'axis': 4},
+        'split_data_0': {'value': None, 'shape': int64_array([1, 3, 100, 120, 50]), 'kind': 'data'},
+        'split_data_1': {'value': None, 'shape': int64_array([1, 3, 100, 120, 50]), 'kind': 'data'},
+        'split_data_2': {'value': None, 'shape': int64_array([1, 3, 100, 120, 50]), 'kind': 'data'},
+        'concat_data': {'value': None, 'shape': int64_array([1, 3, 100, 120, 300]), 'kind': 'data'},
+        'abs': {'type': 'Abs', 'kind': 'op', 'op': 'Abs'},
+        'abs_data': {'value': None, 'shape': int64_array([1, 3, 100, 120, 300]), 'kind': 'data'},
+        'output': {'kind': 'op', 'op': 'Result'},
+    }
+
+
+graph_edges = [
+    ('placeholder', 'placeholder_data'),
+    ('placeholder_data', 'split', {'in': 0}),
+    ('split_axis_const', 'split_axis_const_data'),
+    ('split_axis_const_data', 'split', {'in': 1}),
+    ('split', 'split_data_0', {'out': 0}),
+    ('split', 'split_data_1', {'out': 1}),
+    ('split', 'split_data_2', {'out': 2}),
+    ('split_data_0', 'concat', {'in': 0}),
+    ('split_data_0', 'concat', {'in': 1}),
+    ('split_data_1', 'concat', {'in': 2}),
+    ('split_data_1', 'concat', {'in': 3}),
+    ('split_data_2', 'concat', {'in': 4}),
+    ('split_data_2', 'concat', {'in': 5}),
+    ('concat', 'concat_data'),
+    ('concat_data', 'abs'),
+    ('abs', 'abs_data'),
+    ('abs_data', 'output')
+]
+
+
+ref_graph_edges = [
+        ('placeholder', 'placeholder_data'),
+        ('placeholder_data', 'interpolate', {'in': 0}),
+        ('placeholder_data', 'shape'),
+        ('shape', 'shape_data'),
+        ('shape_data', 'sslice', {'in': 0}),
+        ('slice_begin', 'slice_begin_data'),
+        ('slice_begin_data', 'sslice', {'in': 1}),
+        ('slice_end', 'slice_end_data'),
+        ('slice_end_data', 'sslice', {'in': 2}),
+        ('sslice', 'sslice_data'),
+        ('scales', 'scales_data'),
+        ('sslice_data', 'mul', {'in': 0}),
+        ('scales_data', 'mul', {'in': 1}),
+        ('mul', 'mul_data'),
+        ('mul_data', 'interpolate', {'in': 1}),
+        ('interpolate', 'interpolate_data'),
+        ('interpolate_data', 'abs'),
+        ('abs', 'abs_data'),
+        ('abs_data', 'output'),
+    ]
+
+
+ref_graph_node_attrs_for_2d_spatial_case_1 = {
+    'placeholder': {'type': 'Parameter', 'kind': 'op', 'op': 'Parameter'},
+    'placeholder_data': {
+        'value': None,
+        'shape': int64_array([1, 100, 120, 150]),
+        'kind': 'data',
+        'data_type': None
+    },
+    'interpolate': {
+        'type': 'Interpolate',
+        'kind': 'op',
+        'op': 'Interpolate',
+        'axes': int64_array([3]),
+        'mode': 'nearest'
+    },
+    'shape': {'type': 'ShapeOf', 'kind': 'op', 'op': 'ShapeOf'},
+    'shape_data': {'kind': 'data', 'shape': None, 'value': None},
+    'slice_begin': {
+        'type': 'Const',
+        'op': 'Const',
+        'kind': 'op',
+        'value': int64_array([3]),
+        'shape': int64_array([1])
+    },
+    'slice_begin_data': {'kind': 'data', 'shape': None, 'value': None},
+    'slice_end': {'type': 'Const', 'op': 'Const', 'kind': 'op', 'value': int64_array([4])},
+    'slice_end_data': {'kind': 'data', 'shape': None, 'value': None},
+    'sslice': {
+        'kind': 'op',
+        'type': 'StridedSlice',
+        'op': 'StridedSlice',
+        'begin_mask': int64_array([1]),
+        'end_mask': int64_array([1]),
+        'new_axis_mask': int64_array([0]),
+        'shrink_axis_mask': int64_array([0]),
+        'ellipsis_mask': int64_array([0]),
+    },
+    'sslice_data': {'kind': 'data', 'shape': None},
+    'scales': {
+        'type': 'Const',
+        'op': 'Const',
+        'kind': 'op',
+        'value': int64_array([2]),
+        'shape': int64_array([1])
+    },
+    'scales_data': {'kind': 'data', 'shape': None},
+    'mul': {'kind': 'op', 'op': 'Mul', 'type': 'Multiply'},
+    'mul_data': {'kind': 'data', 'shape': None},
+    'interpolate_data': {'value': None, 'shape': int64_array([1, 100, 120, 300]), 'kind': 'data'},
+    'abs': {'type': 'Abs', 'kind': 'op', 'op': 'Abs'},
+    'abs_data': {'value': None, 'shape': int64_array([1, 100, 120, 300]), 'kind': 'data'},
+    'output': {'kind': 'op', 'op': 'Result'},
+}
+
+ref_graph_node_attrs_for_2d_spatial_case_2 = {
+    'placeholder': {'type': 'Parameter', 'kind': 'op', 'op': 'Parameter'},
+    'placeholder_data': {
+        'value': None,
+        'shape': int64_array([1, 100, 120, 150]),
+        'kind': 'data',
+        'data_type': None
+    },
+    'interpolate': {
+        'type': 'Interpolate',
+        'kind': 'op',
+        'op': 'Interpolate',
+        'axes': int64_array([2]),
+        'mode': 'nearest'
+    },
+    'shape': {'type': 'ShapeOf', 'kind': 'op', 'op': 'ShapeOf'},
+    'shape_data': {'kind': 'data', 'shape': None, 'value': None},
+    'slice_begin': {
+        'type': 'Const',
+        'op': 'Const',
+        'kind': 'op',
+        'value': int64_array([2]),
+        'shape': int64_array([1])
+    },
+    'slice_begin_data': {'kind': 'data', 'shape': None, 'value': None},
+    'slice_end': {'type': 'Const', 'op': 'Const', 'kind': 'op', 'value': int64_array([3])},
+    'slice_end_data': {'kind': 'data', 'shape': None, 'value': None},
+    'sslice': {
+        'kind': 'op',
+        'type': 'StridedSlice',
+        'op': 'StridedSlice',
+        'begin_mask': int64_array([1]),
+        'end_mask': int64_array([1]),
+        'new_axis_mask': int64_array([0]),
+        'shrink_axis_mask': int64_array([0]),
+        'ellipsis_mask': int64_array([0]),
+    },
+    'sslice_data': {'kind': 'data', 'shape': None},
+    'scales': {
+        'type': 'Const',
+        'op': 'Const',
+        'kind': 'op',
+        'value': int64_array([2]),
+        'shape': int64_array([1])
+    },
+    'scales_data': {'kind': 'data', 'shape': None},
+    'mul': {'kind': 'op', 'op': 'Mul', 'type': 'Multiply'},
+    'mul_data': {'kind': 'data', 'shape': None},
+    'interpolate_data': {'value': None, 'shape': int64_array([1, 100, 240, 150]), 'kind': 'data'},
+    'abs': {'type': 'Abs', 'kind': 'op', 'op': 'Abs'},
+    'abs_data': {'value': None, 'shape': int64_array([1, 100, 240, 150]), 'kind': 'data'},
+    'output': {'kind': 'op', 'op': 'Result'},
+}
+
+
+ref_graph_node_attrs_for_3d_spatial_case_1 = {
+    'placeholder': {'type': 'Parameter', 'kind': 'op', 'op': 'Parameter'},
+    'placeholder_data': {
+        'value': None,
+        'shape': int64_array([1, 3, 100, 120, 150]),
+        'kind': 'data',
+        'data_type': None
+    },
+    'interpolate': {
+        'type': 'Interpolate',
+        'kind': 'op',
+        'op': 'Interpolate',
+        'axes': int64_array([4]),
+        'mode': 'nearest'
+    },
+    'shape': {'type': 'ShapeOf', 'kind': 'op', 'op': 'ShapeOf'},
+    'shape_data': {'kind': 'data', 'shape': None, 'value': None},
+    'slice_begin': {
+        'type': 'Const',
+        'op': 'Const',
+        'kind': 'op',
+        'value': int64_array([4]),
+        'shape': int64_array([1])
+    },
+    'slice_begin_data': {'kind': 'data', 'shape': None, 'value': None},
+    'slice_end': {'type': 'Const', 'op': 'Const', 'kind': 'op', 'value': int64_array([5])},
+    'slice_end_data': {'kind': 'data', 'shape': None, 'value': None},
+    'sslice': {
+        'kind': 'op',
+        'type': 'StridedSlice',
+        'op': 'StridedSlice',
+        'begin_mask': int64_array([1]),
+        'end_mask': int64_array([1]),
+        'new_axis_mask': int64_array([0]),
+        'shrink_axis_mask': int64_array([0]),
+        'ellipsis_mask': int64_array([0]),
+    },
+    'sslice_data': {'kind': 'data', 'shape': None},
+    'scales': {
+        'type': 'Const',
+        'op': 'Const',
+        'kind': 'op',
+        'value': int64_array([2]),
+        'shape': int64_array([1])
+    },
+    'scales_data': {'kind': 'data', 'shape': None},
+    'mul': {'kind': 'op', 'op': 'Mul', 'type': 'Multiply'},
+    'mul_data': {'kind': 'data', 'shape': None},
+    'interpolate_data': {'value': None, 'shape': int64_array([1, 3, 100, 120, 300]), 'kind': 'data'},
+    'abs': {'type': 'Abs', 'kind': 'op', 'op': 'Abs'},
+    'abs_data': {'value': None, 'shape': int64_array([1, 3, 100, 120, 300]), 'kind': 'data'},
+    'output': {'kind': 'op', 'op': 'Result'},
+}
+
+
+ref_graph_node_attrs_for_3d_spatial_case_2 = {
+    'placeholder': {'type': 'Parameter', 'kind': 'op', 'op': 'Parameter'},
+    'placeholder_data': {
+        'value': None,
+        'shape': int64_array([1, 3, 100, 120, 150]),
+        'kind': 'data',
+        'data_type': None
+    },
+    'interpolate': {
+        'type': 'Interpolate',
+        'kind': 'op',
+        'op': 'Interpolate',
+        'axes': int64_array([3]),
+        'mode': 'nearest'
+    },
+    'shape': {'type': 'ShapeOf', 'kind': 'op', 'op': 'ShapeOf'},
+    'shape_data': {'kind': 'data', 'shape': None, 'value': None},
+    'slice_begin': {
+        'type': 'Const',
+        'op': 'Const',
+        'kind': 'op',
+        'value': int64_array([4]),
+        'shape': int64_array([1])
+    },
+    'slice_begin_data': {'kind': 'data', 'shape': None, 'value': None},
+    'slice_end': {'type': 'Const', 'op': 'Const', 'kind': 'op', 'value': int64_array([5])},
+    'slice_end_data': {'kind': 'data', 'shape': None, 'value': None},
+    'sslice': {
+        'kind': 'op',
+        'type': 'StridedSlice',
+        'op': 'StridedSlice',
+        'begin_mask': int64_array([1]),
+        'end_mask': int64_array([1]),
+        'new_axis_mask': int64_array([0]),
+        'shrink_axis_mask': int64_array([0]),
+        'ellipsis_mask': int64_array([0]),
+    },
+    'sslice_data': {'kind': 'data', 'shape': None},
+    'scales': {
+        'type': 'Const',
+        'op': 'Const',
+        'kind': 'op',
+        'value': int64_array([2]),
+        'shape': int64_array([1])
+    },
+    'scales_data': {'kind': 'data', 'shape': None},
+    'mul': {'kind': 'op', 'op': 'Mul', 'type': 'Multiply'},
+    'mul_data': {'kind': 'data', 'shape': None},
+    'interpolate_data': {'value': None, 'shape': int64_array([1, 3, 100, 240, 150]), 'kind': 'data'},
+    'abs': {'type': 'Abs', 'kind': 'op', 'op': 'Abs'},
+    'abs_data': {'value': None, 'shape': int64_array([1, 3, 100, 240, 150]), 'kind': 'data'},
+    'output': {'kind': 'op', 'op': 'Result'},
+}
+
+
+class SplitConcatPairToInterpolateTest(unittest.TestCase):
+    def test_spatial_2d_split_concat_1(self):
+        graph = build_graph(
+            nodes_attrs=graph_node_attrs_for_2d_spatial_case,
+            edges=graph_edges
+        )
+        ref_graph = build_graph(
+            nodes_attrs=ref_graph_node_attrs_for_2d_spatial_case_1,
+            edges=ref_graph_edges
+        )
+        SplitConcatPairToInterpolate().find_and_replace_pattern(graph)
+        (flag, resp) = compare_graphs(graph, ref_graph, 'output')
+        self.assertTrue(flag, resp)
+
+    def test_spatial_2d_split_concat_2(self):
+        graph = build_graph(
+            nodes_attrs=graph_node_attrs_for_2d_spatial_case,
+            edges=graph_edges,
+            update_attributes={
+                'split': {'type': 'Split', 'kind': 'op', 'op': 'Split', 'num_splits': 3},
+                'split_axis_const': {
+                    'kind': 'op',
+                    'value': np.array(2, dtype=np.int64),
+                    'op': 'Const',
+                    'type': 'Const'
+                },
+                'split_axis_const_data': {'value': None, 'shape': np.array(2, dtype=np.int64).shape, 'kind': 'data'},
+                'concat': {'type': 'Concat', 'kind': 'op', 'axis': 2},
+                'split_data_0': {'value': None, 'shape': int64_array([1, 100, 40, 150]), 'kind': 'data'},
+                'split_data_1': {'value': None, 'shape': int64_array([1, 100, 40, 150]), 'kind': 'data'},
+                'split_data_2': {'value': None, 'shape': int64_array([1, 100, 40, 150]), 'kind': 'data'},
+                'concat_data': {'value': None, 'shape': int64_array([1, 100, 240, 150]), 'kind': 'data'},
+                'abs_data': {'value': None, 'shape': int64_array([1, 100, 240, 150]), 'kind': 'data'},
+            }
+        )
+        ref_graph = build_graph(
+            nodes_attrs=ref_graph_node_attrs_for_2d_spatial_case_2,
+            edges=ref_graph_edges
+        )
+        SplitConcatPairToInterpolate().find_and_replace_pattern(graph)
+        (flag, resp) = compare_graphs(graph, ref_graph, 'output')
+        self.assertTrue(flag, resp)
+
+    def test_spatial_3d_split_concat_1(self):
+        graph = build_graph(
+            nodes_attrs=graph_node_attrs_for_3d_spatial_case,
+            edges=graph_edges
+        )
+        ref_graph = build_graph(
+            nodes_attrs=ref_graph_node_attrs_for_3d_spatial_case_1,
+            edges=ref_graph_edges
+        )
+        SplitConcatPairToInterpolate().find_and_replace_pattern(graph)
+        (flag, resp) = compare_graphs(graph, ref_graph, 'output')
+        self.assertTrue(flag, resp)
+
+    def test_spatial_3d_split_concat_2(self):
+        graph = build_graph(
+            nodes_attrs=graph_node_attrs_for_3d_spatial_case,
+            edges=graph_edges,
+            update_attributes={
+                'split': {'type': 'Split', 'kind': 'op', 'op': 'Split', 'num_splits': 3},
+                'split_axis_const': {
+                    'kind': 'op',
+                    'value': np.array(3, dtype=np.int64),
+                    'op': 'Const',
+                    'type': 'Const'
+                },
+                'split_axis_const_data': {'value': None, 'shape': np.array(3, dtype=np.int64).shape, 'kind': 'data'},
+                'concat': {'type': 'Concat', 'kind': 'op', 'axis': 3},
+                'split_data_0': {'value': None, 'shape': int64_array([1, 3, 100, 40, 150]), 'kind': 'data'},
+                'split_data_1': {'value': None, 'shape': int64_array([1, 3, 100, 40, 150]), 'kind': 'data'},
+                'split_data_2': {'value': None, 'shape': int64_array([1, 3, 100, 40, 150]), 'kind': 'data'},
+                'concat_data': {'value': None, 'shape': int64_array([1, 3, 100, 240, 150]), 'kind': 'data'},
+                'abs_data': {'value': None, 'shape': int64_array([1, 3, 100, 240, 150]), 'kind': 'data'},
+            }
+        )
+        ref_graph = build_graph(
+            nodes_attrs=ref_graph_node_attrs_for_3d_spatial_case_2,
+            edges=ref_graph_edges
+        )
+        SplitConcatPairToInterpolate().find_and_replace_pattern(graph)
+        (flag, resp) = compare_graphs(graph, ref_graph, 'output')
+        self.assertTrue(flag, resp)

From bb41994f565eb74d4222ec8656b38e8ca2cccbdd Mon Sep 17 00:00:00 2001
From: Gleb Kazantaev <gleb.kazantaev@intel.com>
Date: Thu, 28 May 2020 18:27:54 +0300
Subject: [PATCH 05/24] Removed StridedSlice to StridedSliceIE transformation
 (#661)

---
 .../src/convert_function_to_cnn_network.cpp   |  2 -
 .../src/ie_cnn_layer_builder_ngraph.cpp       | 10 +-
 .../include/ngraph_ops/strided_slice_ie.hpp   | 51 ----------
 .../convert_opset1_to_legacy_tbl.hpp          |  1 -
 ...vert_strided_slice_to_strided_slice_ie.hpp | 37 -------
 .../src/ngraph_ops/strided_slice_ie.cpp       | 62 ------------
 .../convert_opset1_to_legacy.cpp              |  1 -
 ...vert_strided_slice_to_strided_slice_ie.cpp | 55 -----------
 ...t_stridedslice_to_stridedslice_ie_test.cpp | 97 -------------------
 9 files changed, 1 insertion(+), 315 deletions(-)
 delete mode 100644 inference-engine/src/transformations/include/ngraph_ops/strided_slice_ie.hpp
 delete mode 100644 inference-engine/src/transformations/include/transformations/convert_opset1_to_legacy/convert_strided_slice_to_strided_slice_ie.hpp
 delete mode 100644 inference-engine/src/transformations/src/ngraph_ops/strided_slice_ie.cpp
 delete mode 100644 inference-engine/src/transformations/src/transformations/convert_opset1_to_legacy/convert_strided_slice_to_strided_slice_ie.cpp
 delete mode 100644 inference-engine/tests/functional/inference_engine/transformations/convert_stridedslice_to_stridedslice_ie_test.cpp

diff --git a/inference-engine/src/legacy_api/src/convert_function_to_cnn_network.cpp b/inference-engine/src/legacy_api/src/convert_function_to_cnn_network.cpp
index 5b2b30153912cc..22db9348541fb0 100644
--- a/inference-engine/src/legacy_api/src/convert_function_to_cnn_network.cpp
+++ b/inference-engine/src/legacy_api/src/convert_function_to_cnn_network.cpp
@@ -34,7 +34,6 @@
 #include "ngraph_ops/nms_ie.hpp"
 #include "ngraph_ops/crop_ie.hpp"
 #include "ngraph_ops/selu_ie.hpp"
-#include "ngraph_ops/strided_slice_ie.hpp"
 #include "ngraph_ops/rnn_cell_ie.hpp"
 #include "ngraph_ops/topk_ie.hpp"
 #include "generic_ie.hpp"
@@ -555,7 +554,6 @@ std::shared_ptr<CNNNetworkImpl> convertFunctionToICNNNetwork(const std::shared_p
                 std::make_shared<Builder::NodeConverter<::ngraph::op::v1::Split>>(),
                 std::make_shared<Builder::NodeConverter<::ngraph::op::VariadicSplit>>(),
                 std::make_shared<Builder::NodeConverter<::ngraph::op::v1::StridedSlice>>(),
-                std::make_shared<Builder::NodeConverter<::ngraph::op::StridedSliceIE>>(),
                 std::make_shared<Builder::NodeConverter<::ngraph::op::Squeeze>>(),
                 std::make_shared<Builder::NodeConverter<::ngraph::op::Sqrt>>(),
                 std::make_shared<Builder::NodeConverter<::ngraph::op::Subtract>>(),
diff --git a/inference-engine/src/legacy_api/src/ie_cnn_layer_builder_ngraph.cpp b/inference-engine/src/legacy_api/src/ie_cnn_layer_builder_ngraph.cpp
index 67ab2ba8dd2e44..07e6adf3e08180 100644
--- a/inference-engine/src/legacy_api/src/ie_cnn_layer_builder_ngraph.cpp
+++ b/inference-engine/src/legacy_api/src/ie_cnn_layer_builder_ngraph.cpp
@@ -42,7 +42,6 @@
 #include "ngraph_ops/scaleshift.hpp"
 #include "ngraph_ops/tile_ie.hpp"
 #include "ngraph_ops/topk_ie.hpp"
-#include "ngraph_ops/strided_slice_ie.hpp"
 #include "ngraph_ops/rnn_cell_ie.hpp"
 #include "ngraph_ops/hard_sigmoid_ie.hpp"
 #include "generic_ie.hpp"
@@ -2114,17 +2113,10 @@ CNNLayer::Ptr NodeConverter<ngraph::op::Sqrt>::createLayer(const std::shared_ptr
 template <>
 CNNLayer::Ptr NodeConverter<ngraph::op::v1::StridedSlice>::createLayer(
         const std::shared_ptr<ngraph::Node>& layer) const {
-    THROW_IE_EXCEPTION << "StridedSlice operation has a form that is not supported." << layer->get_friendly_name()
-                       << " should be converted to StridedSliceIE operation";
-}
-
-template <>
-CNNLayer::Ptr NodeConverter<ngraph::op::StridedSliceIE>::createLayer(
-        const std::shared_ptr<ngraph::Node>& layer) const {
     LayerParams params = {layer->get_friendly_name(), "StridedSlice",
                           details::convertPrecision(layer->get_output_element_type(0))};
     auto res = std::make_shared<InferenceEngine::StridedSliceLayer>(params);
-    auto castedLayer = std::dynamic_pointer_cast<ngraph::op::StridedSliceIE>(layer);
+    auto castedLayer = std::dynamic_pointer_cast<ngraph::op::v1::StridedSlice>(layer);
     if (castedLayer == nullptr) THROW_IE_EXCEPTION << "Cannot get " << params.type << " layer " << params.name;
 
     std::string value;
diff --git a/inference-engine/src/transformations/include/ngraph_ops/strided_slice_ie.hpp b/inference-engine/src/transformations/include/ngraph_ops/strided_slice_ie.hpp
deleted file mode 100644
index cb83c4d5223deb..00000000000000
--- a/inference-engine/src/transformations/include/ngraph_ops/strided_slice_ie.hpp
+++ /dev/null
@@ -1,51 +0,0 @@
-// Copyright (C) 2018-2020 Intel Corporation
-// SPDX-License-Identifier: Apache-2.0
-//
-
-#pragma once
-
-#include <memory>
-#include <vector>
-
-#include <ie_api.h>
-
-#include "ngraph/op/op.hpp"
-
-namespace ngraph {
-namespace op {
-
-class INFERENCE_ENGINE_API_CLASS(StridedSliceIE) : public Op {
-public:
-    static constexpr NodeTypeInfo type_info{"StridedSliceIE", 1};
-    const NodeTypeInfo& get_type_info() const override { return type_info; }
-
-    StridedSliceIE(const Output<Node>& data,
-                 const Output<Node>& begin,
-                 const Output<Node>& end,
-                 const Output<Node>& strides,
-                 const std::vector<int64_t>& begin_mask,
-                 const std::vector<int64_t>& end_mask,
-                 const std::vector<int64_t>& new_axis_mask,
-                 const std::vector<int64_t>& shrink_axis_mask,
-                 const std::vector<int64_t>& ellipsis_mask);
-
-    void validate_and_infer_types() override;
-
-    std::shared_ptr<Node> clone_with_new_inputs(const OutputVector & new_args) const override;
-
-    const std::vector<int64_t>& get_begin_mask() const { return m_begin_mask; }
-    const std::vector<int64_t>& get_end_mask() const { return m_end_mask; }
-    const std::vector<int64_t>& get_new_axis_mask() const { return m_new_axis_mask; }
-    const std::vector<int64_t>& get_shrink_axis_mask() const { return m_shrink_axis_mask; }
-    const std::vector<int64_t>& get_ellipsis_mask() const { return m_ellipsis_mask; }
-
-protected:
-    const std::vector<int64_t> m_begin_mask;
-    const std::vector<int64_t> m_end_mask;
-    const std::vector<int64_t> m_new_axis_mask;
-    const std::vector<int64_t> m_shrink_axis_mask;
-    const std::vector<int64_t> m_ellipsis_mask;
-};
-
-}  // namespace op
-}  // namespace ngraph
diff --git a/inference-engine/src/transformations/include/transformations/convert_opset1_to_legacy/convert_opset1_to_legacy_tbl.hpp b/inference-engine/src/transformations/include/transformations/convert_opset1_to_legacy/convert_opset1_to_legacy_tbl.hpp
index a0b619e81ba9f7..6b06d0dd7b3195 100644
--- a/inference-engine/src/transformations/include/transformations/convert_opset1_to_legacy/convert_opset1_to_legacy_tbl.hpp
+++ b/inference-engine/src/transformations/include/transformations/convert_opset1_to_legacy/convert_opset1_to_legacy_tbl.hpp
@@ -53,7 +53,6 @@ NGRAPH_PASS(ConvertHardSigmoidToHardSigmoidIE, ::ngraph::pass)
 NGRAPH_PASS(ConvertCellsToCellsIE, ::ngraph::pass)
 NGRAPH_PASS(ConvertInterpolateToInterpOrResample, ::ngraph::pass)
 NGRAPH_PASS(ConvertStridedSliceToCrop, ::ngraph::pass)
-NGRAPH_PASS(ConvertStridedSliceToStridedSliceIE, ::ngraph::pass)
 NGRAPH_PASS(ConvertPowerToPowerIE, ::ngraph::pass)
 NGRAPH_PASS(ConvertSqrtToPowerIE, ::ngraph::pass)
 NGRAPH_PASS(ConvertPReLUToReLUIE, ::ngraph::pass)
diff --git a/inference-engine/src/transformations/include/transformations/convert_opset1_to_legacy/convert_strided_slice_to_strided_slice_ie.hpp b/inference-engine/src/transformations/include/transformations/convert_opset1_to_legacy/convert_strided_slice_to_strided_slice_ie.hpp
deleted file mode 100644
index df2fd645696d89..00000000000000
--- a/inference-engine/src/transformations/include/transformations/convert_opset1_to_legacy/convert_strided_slice_to_strided_slice_ie.hpp
+++ /dev/null
@@ -1,37 +0,0 @@
-// Copyright (C) 2018-2020 Intel Corporation
-// SPDX-License-Identifier: Apache-2.0
-//
-
-#pragma once
-
-#include <vector>
-#include <memory>
-
-#include <ie_api.h>
-
-#include <ngraph/pass/graph_rewrite.hpp>
-
-namespace ngraph {
-namespace pass {
-
-class INFERENCE_ENGINE_API_CLASS(ConvertStridedSliceToStridedSliceIE);
-
-}  // namespace pass
-}  // namespace ngraph
-
-/*
- * Description:
- *     This transformation converts opset1::StridedSlice to legacy StridedSliceIE
- *     StridedSliceIE takes begin, end and strides inputs ony in i32 precision.
- *     Inputs with precision != i32 are converted with Convert operation.
- */
-
-class ngraph::pass::ConvertStridedSliceToStridedSliceIE: public ngraph::pass::GraphRewrite {
-public:
-    ConvertStridedSliceToStridedSliceIE() : GraphRewrite() {
-        convert_strided_slice_to_strided_slice_ie();
-    }
-
-private:
-    void convert_strided_slice_to_strided_slice_ie();
-};
diff --git a/inference-engine/src/transformations/src/ngraph_ops/strided_slice_ie.cpp b/inference-engine/src/transformations/src/ngraph_ops/strided_slice_ie.cpp
deleted file mode 100644
index 085d96513931c8..00000000000000
--- a/inference-engine/src/transformations/src/ngraph_ops/strided_slice_ie.cpp
+++ /dev/null
@@ -1,62 +0,0 @@
-// Copyright (C) 2018-2020 Intel Corporation
-// SPDX-License-Identifier: Apache-2.0
-//
-
-#include "ngraph_ops/strided_slice_ie.hpp"
-
-#include <algorithm>
-#include <vector>
-#include <memory>
-#include <ngraph/ops.hpp>
-#include <ngraph/opsets/opset1.hpp>
-
-using namespace std;
-using namespace ngraph;
-
-constexpr NodeTypeInfo op::StridedSliceIE::type_info;
-
-op::StridedSliceIE::StridedSliceIE(const Output <Node> &data, const Output <Node> &begin, const Output <Node> &end,
-                                   const Output <Node> &strides, const std::vector<int64_t> &begin_mask,
-                                   const std::vector<int64_t> &end_mask, const std::vector<int64_t> &new_axis_mask,
-                                   const std::vector<int64_t> &shrink_axis_mask,
-                                   const std::vector<int64_t> &ellipsis_mask)
-    : Op({data, begin, end, strides})
-    , m_begin_mask(begin_mask)
-    , m_end_mask(end_mask)
-    , m_new_axis_mask(new_axis_mask)
-    , m_shrink_axis_mask(shrink_axis_mask)
-    , m_ellipsis_mask(ellipsis_mask) {
-    constructor_validate_and_infer_types();
-}
-
-std::shared_ptr<Node> op::StridedSliceIE::clone_with_new_inputs(const ngraph::OutputVector &new_args) const {
-    check_new_args_count(this, new_args);
-    return std::make_shared<op::StridedSliceIE>(new_args.at(0), new_args.at(1), new_args.at(2), new_args.at(3), m_begin_mask,
-            m_end_mask, m_new_axis_mask, m_shrink_axis_mask, m_ellipsis_mask);
-}
-
-void op::StridedSliceIE::validate_and_infer_types() {
-    const auto& begin_mask_et = input_value(1).get_element_type();
-    const auto& end_mask_et = input_value(2).get_element_type();
-    const auto& strides_et = input_value(3).get_element_type();
-
-    NODE_VALIDATION_CHECK(this,
-                          begin_mask_et.is_integral_number(),
-                          "Begin mask must have i32 type, but its: ",
-                          begin_mask_et);
-
-    NODE_VALIDATION_CHECK(this,
-                          end_mask_et == element::i32,
-                          "End mask must have i32 type, but its: ",
-                          end_mask_et);
-
-    NODE_VALIDATION_CHECK(this,
-                          strides_et.is_integral_number(),
-                          "Strides must have i32 type, but its: ",
-                          strides_et);
-
-    // Calculate output shape via opset1::StridedSlice operation
-    auto slice = std::make_shared<opset1::StridedSlice>(input_value(0), input_value(1), input_value(2), input_value(3),
-            m_begin_mask, m_end_mask, m_new_axis_mask, m_shrink_axis_mask, m_ellipsis_mask);
-    set_output_type(0, slice->output(0).get_element_type(), slice->output(0).get_partial_shape());
-}
diff --git a/inference-engine/src/transformations/src/transformations/convert_opset1_to_legacy/convert_opset1_to_legacy.cpp b/inference-engine/src/transformations/src/transformations/convert_opset1_to_legacy/convert_opset1_to_legacy.cpp
index ac8a2c6fa888fd..2a23c392bb8b14 100644
--- a/inference-engine/src/transformations/src/transformations/convert_opset1_to_legacy/convert_opset1_to_legacy.cpp
+++ b/inference-engine/src/transformations/src/transformations/convert_opset1_to_legacy/convert_opset1_to_legacy.cpp
@@ -44,7 +44,6 @@
 #include <transformations/convert_opset1_to_legacy/reshape_1d_ops.hpp>
 #include <transformations/convert_opset1_to_legacy/reshape_fully_connected.hpp>
 #include <transformations/pull_transpose_through_fq.hpp>
-#include <transformations/convert_opset1_to_legacy/convert_strided_slice_to_strided_slice_ie.hpp>
 #include <transformations/convert_opset1_to_legacy/convert_hard_sigmoid_to_hard_sigmoid_ie.hpp>
 
 #include <ngraph/pass/constant_folding.hpp>
diff --git a/inference-engine/src/transformations/src/transformations/convert_opset1_to_legacy/convert_strided_slice_to_strided_slice_ie.cpp b/inference-engine/src/transformations/src/transformations/convert_opset1_to_legacy/convert_strided_slice_to_strided_slice_ie.cpp
deleted file mode 100644
index 48f0a9d287d803..00000000000000
--- a/inference-engine/src/transformations/src/transformations/convert_opset1_to_legacy/convert_strided_slice_to_strided_slice_ie.cpp
+++ /dev/null
@@ -1,55 +0,0 @@
-// Copyright (C) 2018-2020 Intel Corporation
-// SPDX-License-Identifier: Apache-2.0
-//
-
-#include "transformations/convert_opset1_to_legacy/convert_strided_slice_to_strided_slice_ie.hpp"
-
-#include <memory>
-#include <vector>
-
-#include <ngraph/opsets/opset1.hpp>
-
-#include <ngraph_ops/strided_slice_ie.hpp>
-#include <ngraph/rt_info.hpp>
-
-void ngraph::pass::ConvertStridedSliceToStridedSliceIE::convert_strided_slice_to_strided_slice_ie() {
-    auto slice = std::make_shared<pattern::op::Label>(element::f32, Shape{}, pattern::has_class<opset1::StridedSlice>());
-
-    ngraph::graph_rewrite_callback callback = [](pattern::Matcher& m) {
-        auto slice = std::dynamic_pointer_cast<opset1::StridedSlice> (m.get_match_root());
-        if (!slice) {
-            return false;
-        }
-
-        auto data_node = slice->input_value(0);
-        auto begin_node = std::dynamic_pointer_cast<opset1::Constant>(slice->input_value(1).get_node_shared_ptr());
-        auto end_node = std::dynamic_pointer_cast<opset1::Constant>(slice->input_value(2).get_node_shared_ptr());
-        auto stride_node = std::dynamic_pointer_cast<opset1::Constant>(slice->input_value(3).get_node_shared_ptr());
-
-        if (!begin_node || !end_node || !stride_node) {
-            return false;
-        }
-
-        auto converted_begin = std::make_shared<opset1::Convert>(begin_node, element::i32);
-        auto converted_end = std::make_shared<opset1::Convert>(end_node, element::i32);
-        auto converted_stride = std::make_shared<opset1::Convert>(stride_node, element::i32);
-
-        auto slice_ie = std::make_shared<ngraph::op::StridedSliceIE>(data_node,
-                                                                     converted_begin,
-                                                                     converted_end,
-                                                                     converted_stride,
-                                                                     slice->get_begin_mask(),
-                                                                     slice->get_end_mask(),
-                                                                     slice->get_new_axis_mask(),
-                                                                     slice->get_shrink_axis_mask(),
-                                                                     slice->get_ellipsis_mask());
-        slice_ie->set_friendly_name(slice->get_friendly_name());
-
-        ngraph::copy_runtime_info(slice, {converted_begin, converted_end, converted_stride, slice_ie});
-        ngraph::replace_node(slice, slice_ie);
-        return true;
-    };
-
-    auto m = std::make_shared<ngraph::pattern::Matcher>(slice, "ConvertStridedSliceToStridedSliceIE");
-    this->add_matcher(m, callback, PassProperty::CHANGE_DYNAMIC_STATE);
-}
\ No newline at end of file
diff --git a/inference-engine/tests/functional/inference_engine/transformations/convert_stridedslice_to_stridedslice_ie_test.cpp b/inference-engine/tests/functional/inference_engine/transformations/convert_stridedslice_to_stridedslice_ie_test.cpp
deleted file mode 100644
index 41b58340f80c00..00000000000000
--- a/inference-engine/tests/functional/inference_engine/transformations/convert_stridedslice_to_stridedslice_ie_test.cpp
+++ /dev/null
@@ -1,97 +0,0 @@
-// Copyright (C) 2020 Intel Corporation
-// SPDX-License-Identifier: Apache-2.0
-//
-
-#include <gtest/gtest.h>
-
-#include <string>
-#include <memory>
-#include <queue>
-
-#include <ngraph/function.hpp>
-#include <ngraph/opsets/opset1.hpp>
-#include <transformations/convert_opset1_to_legacy/convert_strided_slice_to_strided_slice_ie.hpp>
-#include <transformations/init_node_info.hpp>
-#include <transformations/utils/utils.hpp>
-#include <ngraph/pass/constant_folding.hpp>
-#include <ngraph_ops/strided_slice_ie.hpp>
-
-#include "ngraph_test_utils.hpp"
-
-using namespace testing;
-
-TEST(TransformationTests, ConvertStridedSliceToStridedSliceIEStatic) {
-    std::shared_ptr<ngraph::Function> f(nullptr), f_ref(nullptr);
-    {
-        auto data = std::make_shared<ngraph::opset1::Parameter>(ngraph::element::f32, ngraph::Shape{1, 2, 3, 4});
-        auto begin = ngraph::opset1::Constant::create(ngraph::element::i64, ngraph::Shape{4}, {0, 0, 0, 0});
-        auto end = ngraph::opset1::Constant::create(ngraph::element::i64, ngraph::Shape{4}, {-1, -1, -1, -1});
-        auto stride = ngraph::opset1::Constant::create(ngraph::element::i64, ngraph::Shape{4}, {1});
-
-        std::vector<int64_t> begin_mask{0, 0, 0, 0};
-        std::vector<int64_t> end_mask{1, 1, 1, 1};
-
-        auto ss = std::make_shared<ngraph::opset1::StridedSlice>(data, begin, end, stride, begin_mask, end_mask);
-
-        f = std::make_shared<ngraph::Function>(ngraph::NodeVector{ss}, ngraph::ParameterVector{data});
-        ngraph::pass::InitNodeInfo().run_on_function(f);
-        ngraph::pass::ConvertStridedSliceToStridedSliceIE().run_on_function(f);
-        ASSERT_NO_THROW(check_rt_info(f));
-        ngraph::pass::ConstantFolding().run_on_function(f);
-    }
-
-    {
-        auto data = std::make_shared<ngraph::opset1::Parameter>(ngraph::element::f32, ngraph::Shape{1, 2, 3, 4});
-        auto begin = ngraph::opset1::Constant::create(ngraph::element::i32, ngraph::Shape{4}, {0, 0, 0, 0});
-        auto end = ngraph::opset1::Constant::create(ngraph::element::i32, ngraph::Shape{4}, {-1, -1, -1, -1});
-        auto stride = ngraph::opset1::Constant::create(ngraph::element::i32, ngraph::Shape{4}, {1});
-
-        std::vector<int64_t> begin_mask{0, 0, 0, 0}, end_mask{1, 1, 1, 1}, new_axis_mask{}, shrink_axis_mask{}, ellipsis_mask{};
-
-        auto ss = std::make_shared<ngraph::op::StridedSliceIE>(data, begin, end, stride,
-                begin_mask, end_mask, new_axis_mask, shrink_axis_mask, ellipsis_mask);
-
-        f_ref = std::make_shared<ngraph::Function>(ngraph::NodeVector{ss}, ngraph::ParameterVector{data});
-    }
-
-    auto res = compare_functions(f, f_ref);
-    ASSERT_TRUE(res.first) << res.second;
-}
-
-TEST(TransformationTests, ConvertStridedSliceToStridedSliceIEDynamic) {
-    std::shared_ptr<ngraph::Function> f(nullptr), f_ref(nullptr);
-    {
-        auto data = std::make_shared<ngraph::opset1::Parameter>(ngraph::element::f32, ngraph::PartialShape::dynamic(4));
-        auto begin = ngraph::opset1::Constant::create(ngraph::element::i64, ngraph::Shape{4}, {0, 0, 0, 0});
-        auto end = ngraph::opset1::Constant::create(ngraph::element::i64, ngraph::Shape{4}, {-1, -1, -1, -1});
-        auto stride = ngraph::opset1::Constant::create(ngraph::element::i64, ngraph::Shape{4}, {1});
-
-        std::vector<int64_t> begin_mask{0, 0, 0, 0};
-        std::vector<int64_t> end_mask{1, 1, 1, 1};
-
-        auto ss = std::make_shared<ngraph::opset1::StridedSlice>(data, begin, end, stride, begin_mask, end_mask);
-
-        f = std::make_shared<ngraph::Function>(ngraph::NodeVector{ss}, ngraph::ParameterVector{data});
-        ngraph::pass::InitNodeInfo().run_on_function(f);
-        ngraph::pass::ConvertStridedSliceToStridedSliceIE().run_on_function(f);
-        ASSERT_NO_THROW(check_rt_info(f));
-        ngraph::pass::ConstantFolding().run_on_function(f);
-    }
-
-    {
-        auto data = std::make_shared<ngraph::opset1::Parameter>(ngraph::element::f32, ngraph::PartialShape::dynamic(4));
-        auto begin = ngraph::opset1::Constant::create(ngraph::element::i32, ngraph::Shape{4}, {0, 0, 0, 0});
-        auto end = ngraph::opset1::Constant::create(ngraph::element::i32, ngraph::Shape{4}, {-1, -1, -1, -1});
-        auto stride = ngraph::opset1::Constant::create(ngraph::element::i32, ngraph::Shape{4}, {1});
-
-        std::vector<int64_t> begin_mask{0, 0, 0, 0}, end_mask{1, 1, 1, 1}, new_axis_mask{}, shrink_axis_mask{}, ellipsis_mask{};
-
-        auto ss = std::make_shared<ngraph::op::StridedSliceIE>(data, begin, end, stride,
-                begin_mask, end_mask, new_axis_mask, shrink_axis_mask, ellipsis_mask);
-
-        f_ref = std::make_shared<ngraph::Function>(ngraph::NodeVector{ss}, ngraph::ParameterVector{data});
-    }
-
-    auto res = compare_functions(f, f_ref);
-    ASSERT_TRUE(res.first) << res.second;
-}

From 5f6999ed7e2eb505f358c45e495f9a897e2490df Mon Sep 17 00:00:00 2001
From: Andrey Somsikov <andrey.somsikov@gmail.com>
Date: Thu, 28 May 2020 18:31:10 +0300
Subject: [PATCH 06/24] Remove Safety dependency (#627)

Safety tool should be isolated from the environment it is validating:
https://github.com/pyupio/safety/security/advisories/GHSA-7q25-qrjw-6fg2

Suggesting docker solution by default.
---
 model-optimizer/README.md            | 2 +-
 model-optimizer/requirements_dev.txt | 1 -
 2 files changed, 1 insertion(+), 2 deletions(-)

diff --git a/model-optimizer/README.md b/model-optimizer/README.md
index f48d7f843f09b0..260149bee92039 100644
--- a/model-optimizer/README.md
+++ b/model-optimizer/README.md
@@ -192,7 +192,7 @@ of the tool and can not be applied to the current version of Model Optimizer.
 
 1. Run the following command:
 <pre>
-    safety check -r requirements_file
+    cat requirements_file | docker run -i --rm pyupio/safety safety check --stdin
 </pre>
 
 > **NOTE**: here <code>requirements_file</code> is one of the following: <code>requirements.txt</code>, <code>requirements_caffe.txt</code>, <code>requirements_tf.txt</code>, <code>requirements_mxnet.txt</code>, <code>requirements_dev.txt</code>.
diff --git a/model-optimizer/requirements_dev.txt b/model-optimizer/requirements_dev.txt
index 38ecd7cfb0e71c..2123de2a031fd5 100644
--- a/model-optimizer/requirements_dev.txt
+++ b/model-optimizer/requirements_dev.txt
@@ -4,6 +4,5 @@ pyenchant==1.6.11
 astroid==2.1.0
 pylint==2.1.1
 Sphinx==1.6.5
-safety==1.8.5
 test-generator==0.1.1
 defusedxml>=0.5.0

From e51e1682ca4d7ddb05844087e96ddd393f687829 Mon Sep 17 00:00:00 2001
From: Ilya Churaev <ilya.churaev@intel.com>
Date: Thu, 28 May 2020 22:40:20 +0300
Subject: [PATCH 07/24] Enabled Unit tests and remove IReaderPtr (#653)

* Enabled Unit tests and remove IReaderPtr

* Fixed unicode tests for Windows

* Fixed typo
---
 .../src/inference_engine/ie_core.cpp          | 130 +-----------
 .../inference_engine/ie_network_reader.cpp    | 193 ++++++++++++++++++
 .../inference_engine/ie_network_reader.hpp    |  33 +++
 .../src/readers/reader_api/ie_reader_ptr.hpp  |  36 ----
 .../inference_engine/net_reader_test.cpp      |   2 +-
 5 files changed, 230 insertions(+), 164 deletions(-)
 create mode 100644 inference-engine/src/inference_engine/ie_network_reader.cpp
 create mode 100644 inference-engine/src/inference_engine/ie_network_reader.hpp
 delete mode 100644 inference-engine/src/readers/reader_api/ie_reader_ptr.hpp

diff --git a/inference-engine/src/inference_engine/ie_core.cpp b/inference-engine/src/inference_engine/ie_core.cpp
index 0aab9219911f27..754e530359c4d7 100644
--- a/inference-engine/src/inference_engine/ie_core.cpp
+++ b/inference-engine/src/inference_engine/ie_core.cpp
@@ -5,33 +5,28 @@
 #include "ie_core.hpp"
 
 #include <unordered_set>
-#include <fstream>
 #include <functional>
 #include <limits>
 #include <map>
 #include <memory>
-#include <sstream>
-#include <streambuf>
 #include <string>
 #include <utility>
 #include <vector>
 #include <istream>
 #include <mutex>
 
-#include "ie_blob_stream.hpp"
-#include <ie_reader_ptr.hpp>
 #include <ngraph/opsets/opset.hpp>
 #include "cpp/ie_cnn_net_reader.h"
 #include "cpp/ie_plugin_cpp.hpp"
 #include "cpp_interfaces/base/ie_plugin_base.hpp"
 #include "details/ie_exception_conversion.hpp"
 #include "details/ie_so_pointer.hpp"
-#include "file_utils.h"
 #include "ie_icore.hpp"
 #include "ie_plugin.hpp"
 #include "ie_plugin_config.hpp"
 #include "ie_profiling.hpp"
 #include "ie_util_internal.hpp"
+#include "ie_network_reader.hpp"
 #include "multi-device/multi_device_config.hpp"
 #include "xml_parse_utils.h"
 
@@ -133,79 +128,6 @@ Parameter copyParameterValue(const Parameter & value) {
 
 }  // namespace
 
-class Reader: public IReader {
-private:
-    InferenceEngine::IReaderPtr ptr;
-    std::once_flag readFlag;
-    std::string name;
-    std::string location;
-
-    InferenceEngine::IReaderPtr getReaderPtr() {
-        std::call_once(readFlag, [&] () {
-            FileUtils::FilePath libraryName = FileUtils::toFilePath(location);
-            FileUtils::FilePath readersLibraryPath = FileUtils::makeSharedLibraryName(getInferenceEngineLibraryPath(), libraryName);
-
-            if (!FileUtils::fileExist(readersLibraryPath)) {
-                THROW_IE_EXCEPTION << "Please, make sure that Inference Engine ONNX reader library "
-                    << FileUtils::fromFilePath(::FileUtils::makeSharedLibraryName({}, libraryName)) << " is in "
-                    << getIELibraryPath();
-            }
-            ptr = IReaderPtr(readersLibraryPath);
-        });
-
-        return ptr;
-    }
-
-    InferenceEngine::IReaderPtr getReaderPtr() const {
-        return const_cast<Reader*>(this)->getReaderPtr();
-    }
-
-    void Release() noexcept override {
-        delete this;
-    }
-
-public:
-    using Ptr = std::shared_ptr<Reader>;
-    Reader(const std::string& name, const std::string location): name(name), location(location) {}
-    bool supportModel(std::istream& model) const override {
-        auto reader = getReaderPtr();
-        return reader->supportModel(model);
-    }
-    CNNNetwork read(std::istream& model, const std::vector<IExtensionPtr>& exts) const override {
-        auto reader = getReaderPtr();
-        return reader->read(model, exts);
-    }
-    CNNNetwork read(std::istream& model, std::istream& weights, const std::vector<IExtensionPtr>& exts) const override {
-        auto reader = getReaderPtr();
-        return reader->read(model, weights, exts);
-    }
-    std::vector<std::string> getDataFileExtensions() const override {
-        auto reader = getReaderPtr();
-        return reader->getDataFileExtensions();
-    }
-    std::string getName() const {
-        return name;
-    }
-};
-
-namespace {
-
-// Extension to plugins creator
-std::multimap<std::string, Reader::Ptr> readers;
-
-void registerReaders() {
-    static std::mutex readerMutex;
-    std::lock_guard<std::mutex> lock(readerMutex);
-    // TODO: Read readers info from XML
-    auto onnxReader = std::make_shared<Reader>("ONNX", std::string("inference_engine_onnx_reader") + std::string(IE_BUILD_POSTFIX));
-    readers.emplace("onnx", onnxReader);
-    readers.emplace("prototxt", onnxReader);
-    auto irReader = std::make_shared<Reader>("IR", std::string("inference_engine_ir_reader") + std::string(IE_BUILD_POSTFIX));
-    readers.emplace("xml", irReader);
-}
-
-}  // namespace
-
 CNNNetReaderPtr CreateCNNNetReaderPtr() noexcept {
     auto loader = createCnnReaderLoader();
     return CNNNetReaderPtr(loader);
@@ -374,57 +296,12 @@ class Core::Impl : public ICore {
 
     CNNNetwork ReadNetwork(const std::string& modelPath, const std::string& binPath) const override {
         IE_PROFILING_AUTO_SCOPE(Core::ReadNetwork)
-
-        std::ifstream modelStream(modelPath, std::ios::binary);
-        if (!modelStream.is_open())
-            THROW_IE_EXCEPTION << "Model file " << modelPath << " cannot be opened!";
-
-        auto fileExt = modelPath.substr(modelPath.find_last_of(".") + 1);
-        for (auto it = readers.lower_bound(fileExt); it != readers.upper_bound(fileExt); it++) {
-            auto reader = it->second;
-            if (reader->supportModel(modelStream)) {
-                // Find weights
-                std::string bPath = binPath;
-                if (bPath.empty()) {
-                    auto pathWoExt = modelPath;
-                    auto pos = modelPath.rfind('.');
-                    if (pos != std::string::npos) pathWoExt = modelPath.substr(0, pos);
-                    for (const auto& ext : reader->getDataFileExtensions()) {
-                        bPath = pathWoExt + "." + ext;
-                        if (!FileUtils::fileExist(bPath)) {
-                            bPath.clear();
-                        } else {
-                            break;
-                        }
-                    }
-                }
-                if (!bPath.empty()) {
-                    std::ifstream binStream;
-                    binStream.open(bPath, std::ios::binary);
-                    if (!binStream.is_open())
-                        THROW_IE_EXCEPTION << "Weights file " << bPath << " cannot be opened!";
-                    return reader->read(modelStream, binStream, extensions);
-                }
-                return reader->read(modelStream, extensions);
-            }
-        }
-        THROW_IE_EXCEPTION << "Unknown model format! Cannot read the model: " << modelPath;
+        return details::ReadNetwork(modelPath, binPath, extensions);
     }
 
     CNNNetwork ReadNetwork(const std::string& model, const Blob::CPtr& weights) const override {
         IE_PROFILING_AUTO_SCOPE(Core::ReadNetwork)
-        std::istringstream modelStream(model);
-        details::BlobStream binStream(weights);
-
-        for (auto it = readers.begin(); it != readers.end(); it++) {
-            auto reader = it->second;
-            if (reader->supportModel(modelStream)) {
-                if (weights)
-                    return reader->read(modelStream, binStream, extensions);
-                return reader->read(modelStream, extensions);
-            }
-        }
-        THROW_IE_EXCEPTION << "Unknown model format! Cannot read the model from string!";
+        return details::ReadNetwork(model, weights, extensions);
     }
 
     ExecutableNetwork LoadNetwork(const CNNNetwork& network, const std::string& deviceName,
@@ -704,7 +581,6 @@ Core::Impl::Impl() {
     opsetNames.insert("opset1");
     opsetNames.insert("opset2");
     opsetNames.insert("opset3");
-    registerReaders();
 }
 
 Core::Impl::~Impl() {}
diff --git a/inference-engine/src/inference_engine/ie_network_reader.cpp b/inference-engine/src/inference_engine/ie_network_reader.cpp
new file mode 100644
index 00000000000000..9d739b6afb3c46
--- /dev/null
+++ b/inference-engine/src/inference_engine/ie_network_reader.cpp
@@ -0,0 +1,193 @@
+// Copyright (C) 2020 Intel Corporation
+// SPDX-License-Identifier: Apache-2.0
+//
+
+#include "ie_network_reader.hpp"
+
+#include <details/ie_so_pointer.hpp>
+#include <file_utils.h>
+#include <ie_blob_stream.hpp>
+#include <ie_profiling.hpp>
+#include <ie_reader.hpp>
+
+#include <fstream>
+#include <istream>
+#include <mutex>
+#include <map>
+
+namespace InferenceEngine {
+
+namespace details {
+
+/**
+ * @brief This class defines the name of the fabric for creating an IReader object in DLL
+ */
+template <>
+class SOCreatorTrait<IReader> {
+public:
+    /**
+     * @brief A name of the fabric for creating IReader object in DLL
+     */
+    static constexpr auto name = "CreateReader";
+};
+
+}  // namespace details
+
+/**
+ * @brief This class is a wrapper for reader interfaces
+ */
+class Reader: public IReader {
+private:
+    InferenceEngine::details::SOPointer<IReader> ptr;
+    std::once_flag readFlag;
+    std::string name;
+    std::string location;
+
+    InferenceEngine::details::SOPointer<IReader> getReaderPtr() {
+        std::call_once(readFlag, [&] () {
+            FileUtils::FilePath libraryName = FileUtils::toFilePath(location);
+            FileUtils::FilePath readersLibraryPath = FileUtils::makeSharedLibraryName(getInferenceEngineLibraryPath(), libraryName);
+
+            if (!FileUtils::fileExist(readersLibraryPath)) {
+                THROW_IE_EXCEPTION << "Please, make sure that Inference Engine ONNX reader library "
+                    << FileUtils::fromFilePath(::FileUtils::makeSharedLibraryName({}, libraryName)) << " is in "
+                    << getIELibraryPath();
+            }
+            ptr = InferenceEngine::details::SOPointer<IReader>(readersLibraryPath);
+        });
+
+        return ptr;
+    }
+
+    InferenceEngine::details::SOPointer<IReader> getReaderPtr() const {
+        return const_cast<Reader*>(this)->getReaderPtr();
+    }
+
+    void Release() noexcept override {
+        delete this;
+    }
+
+public:
+    using Ptr = std::shared_ptr<Reader>;
+    Reader(const std::string& name, const std::string location): name(name), location(location) {}
+    bool supportModel(std::istream& model) const override {
+        auto reader = getReaderPtr();
+        return reader->supportModel(model);
+    }
+    CNNNetwork read(std::istream& model, const std::vector<IExtensionPtr>& exts) const override {
+        auto reader = getReaderPtr();
+        return reader->read(model, exts);
+    }
+    CNNNetwork read(std::istream& model, std::istream& weights, const std::vector<IExtensionPtr>& exts) const override {
+        auto reader = getReaderPtr();
+        return reader->read(model, weights, exts);
+    }
+    std::vector<std::string> getDataFileExtensions() const override {
+        auto reader = getReaderPtr();
+        return reader->getDataFileExtensions();
+    }
+    std::string getName() const {
+        return name;
+    }
+};
+
+namespace {
+
+// Extension to plugins creator
+std::multimap<std::string, Reader::Ptr> readers;
+
+void registerReaders() {
+    IE_PROFILING_AUTO_SCOPE(details::registerReaders)
+    static bool initialized = false;
+    static std::mutex readerMutex;
+    std::lock_guard<std::mutex> lock(readerMutex);
+    if (initialized) return;
+    // TODO: Read readers info from XML
+    auto onnxReader = std::make_shared<Reader>("ONNX", std::string("inference_engine_onnx_reader") + std::string(IE_BUILD_POSTFIX));
+    readers.emplace("onnx", onnxReader);
+    readers.emplace("prototxt", onnxReader);
+    auto irReader = std::make_shared<Reader>("IR", std::string("inference_engine_ir_reader") + std::string(IE_BUILD_POSTFIX));
+    readers.emplace("xml", irReader);
+    initialized = true;
+}
+
+}  // namespace
+
+CNNNetwork details::ReadNetwork(const std::string& modelPath, const std::string& binPath, const std::vector<IExtensionPtr>& exts) {
+    IE_PROFILING_AUTO_SCOPE(details::ReadNetwork)
+    // Register readers if it is needed
+    registerReaders();
+
+    // Fix unicode name
+#if defined(ENABLE_UNICODE_PATH_SUPPORT) && defined(_WIN32)
+    std::wstring model_path = InferenceEngine::details::multiByteCharToWString(modelPath.c_str());
+#else
+    std::string model_path = modelPath;
+#endif
+    // Try to open model file
+    std::ifstream modelStream(model_path, std::ios::binary);
+    if (!modelStream.is_open())
+        THROW_IE_EXCEPTION << "Model file " << modelPath << " cannot be opened!";
+
+    // Find reader for model extension
+    auto fileExt = modelPath.substr(modelPath.find_last_of(".") + 1);
+    for (auto it = readers.lower_bound(fileExt); it != readers.upper_bound(fileExt); it++) {
+        auto reader = it->second;
+        // Check that reader supports the model
+        if (reader->supportModel(modelStream)) {
+            // Find weights
+            std::string bPath = binPath;
+            if (bPath.empty()) {
+                auto pathWoExt = modelPath;
+                auto pos = modelPath.rfind('.');
+                if (pos != std::string::npos) pathWoExt = modelPath.substr(0, pos);
+                for (const auto& ext : reader->getDataFileExtensions()) {
+                    bPath = pathWoExt + "." + ext;
+                    if (!FileUtils::fileExist(bPath)) {
+                        bPath.clear();
+                    } else {
+                        break;
+                    }
+                }
+            }
+            if (!bPath.empty()) {
+                // Open weights file
+#if defined(ENABLE_UNICODE_PATH_SUPPORT) && defined(_WIN32)
+                std::wstring weights_path = InferenceEngine::details::multiByteCharToWString(bPath.c_str());
+#else
+                std::string weights_path = bPath;
+#endif
+                std::ifstream binStream;
+                binStream.open(weights_path, std::ios::binary);
+                if (!binStream.is_open())
+                    THROW_IE_EXCEPTION << "Weights file " << bPath << " cannot be opened!";
+
+                // read model with weights
+                return reader->read(modelStream, binStream, exts);
+            }
+            // read model without weights
+            return reader->read(modelStream, exts);
+        }
+    }
+    THROW_IE_EXCEPTION << "Unknown model format! Cannot read the model: " << modelPath;
+}
+
+CNNNetwork details::ReadNetwork(const std::string& model, const Blob::CPtr& weights, const std::vector<IExtensionPtr>& exts) {
+    IE_PROFILING_AUTO_SCOPE(details::ReadNetwork)
+    // Register readers if it is needed
+    registerReaders();
+    std::istringstream modelStream(model);
+    details::BlobStream binStream(weights);
+
+    for (auto it = readers.begin(); it != readers.end(); it++) {
+        auto reader = it->second;
+        if (reader->supportModel(modelStream)) {
+            if (weights)
+                return reader->read(modelStream, binStream, exts);
+            return reader->read(modelStream, exts);
+        }
+    }
+    THROW_IE_EXCEPTION << "Unknown model format! Cannot read the model from string!";
+}
+
+}  // namespace InferenceEngine
diff --git a/inference-engine/src/inference_engine/ie_network_reader.hpp b/inference-engine/src/inference_engine/ie_network_reader.hpp
new file mode 100644
index 00000000000000..2d8ea6338fc4da
--- /dev/null
+++ b/inference-engine/src/inference_engine/ie_network_reader.hpp
@@ -0,0 +1,33 @@
+// Copyright (C) 2020 Intel Corporation
+// SPDX-License-Identifier: Apache-2.0
+//
+
+#pragma once
+
+#include <cpp/ie_cnn_network.h>
+#include <ie_blob.h>
+#include <string>
+
+namespace InferenceEngine {
+namespace details {
+
+/**
+ * @brief Reads IR xml and bin files
+ * @param modelPath path to IR file
+ * @param binPath path to bin file, if path is empty, will try to read bin file with the same name as xml and
+ * if bin file with the same name was not found, will load IR without weights.
+ * @param exts vector with extensions
+ * @return CNNNetwork
+ */
+CNNNetwork ReadNetwork(const std::string& modelPath, const std::string& binPath, const std::vector<IExtensionPtr>& exts);
+/**
+ * @brief Reads IR xml and bin (with the same name) files
+ * @param model string with IR
+ * @param weights shared pointer to constant blob with weights
+ * @param exts vector with extensions
+ * @return CNNNetwork
+ */
+CNNNetwork ReadNetwork(const std::string& model, const Blob::CPtr& weights, const std::vector<IExtensionPtr>& exts);
+
+}  // namespace details
+}  // namespace InferenceEngine
diff --git a/inference-engine/src/readers/reader_api/ie_reader_ptr.hpp b/inference-engine/src/readers/reader_api/ie_reader_ptr.hpp
deleted file mode 100644
index 9c3aee3ac51249..00000000000000
--- a/inference-engine/src/readers/reader_api/ie_reader_ptr.hpp
+++ /dev/null
@@ -1,36 +0,0 @@
-// Copyright (C) 2020 Intel Corporation
-// SPDX-License-Identifier: Apache-2.0
-//
-
-#pragma once
-
-#include <string>
-
-#include <details/ie_so_pointer.hpp>
-#include "ie_reader.hpp"
-
-namespace InferenceEngine {
-namespace details {
-
-/**
- * @brief This class defines the name of the fabric for creating an IReader object in DLL
- */
-template <>
-class SOCreatorTrait<IReader> {
-public:
-    /**
-     * @brief A name of the fabric for creating IReader object in DLL
-     */
-    static constexpr auto name = "CreateReader";
-};
-
-}  // namespace details
-
-/**
- * @brief A C++ helper to work with objects created by the plugin.
- *
- * Implements different interfaces.
- */
-using IReaderPtr = InferenceEngine::details::SOPointer<IReader>;
-
-}  // namespace InferenceEngine
diff --git a/inference-engine/tests/functional/inference_engine/net_reader_test.cpp b/inference-engine/tests/functional/inference_engine/net_reader_test.cpp
index 4df6db84f56944..2d008bbd905049 100644
--- a/inference-engine/tests/functional/inference_engine/net_reader_test.cpp
+++ b/inference-engine/tests/functional/inference_engine/net_reader_test.cpp
@@ -107,7 +107,7 @@ TEST_P(NetReaderTest, ReadNetworkTwiceSeparately) {
 
 #ifdef ENABLE_UNICODE_PATH_SUPPORT
 
-TEST_P(NetReaderTest, DISABLED_ReadCorrectModelWithWeightsUnicodePath) {
+TEST_P(NetReaderTest, ReadCorrectModelWithWeightsUnicodePath) {
     GTEST_COUT << "params.modelPath: '" << _modelPath << "'" << std::endl;
     GTEST_COUT << "params.weightsPath: '" << _weightsPath << "'" << std::endl;
     GTEST_COUT << "params.netPrc: '" << _netPrc.name() << "'" << std::endl;

From 5cc8114322d7fcd8057a80a3229a9bb16276fa70 Mon Sep 17 00:00:00 2001
From: Evgenya Stepyreva <evgenya.stepyreva@intel.com>
Date: Fri, 29 May 2020 09:11:22 +0300
Subject: [PATCH 08/24] [ MO: CVS-32286 ] IdentityN fix (#668)

---
 .../front/tf/identityN_to_identity.py           | 15 ++++++++++++++-
 .../front/tf/identityN_to_identity_test.py      | 17 +++++++++++++++++
 2 files changed, 31 insertions(+), 1 deletion(-)

diff --git a/model-optimizer/extensions/front/tf/identityN_to_identity.py b/model-optimizer/extensions/front/tf/identityN_to_identity.py
index 4e3d38ff0bf089..7578ef97d8d8da 100644
--- a/model-optimizer/extensions/front/tf/identityN_to_identity.py
+++ b/model-optimizer/extensions/front/tf/identityN_to_identity.py
@@ -29,6 +29,11 @@ class IdentityN_to_Identity(FrontReplacementPattern):
         IdentityN                   Identity    Identity
         /       \                       |           |
     output_0    output_1            output_0    output_1
+
+    ATTENTION: not all in/outputs of the IdentityN may survive during ModelOptimizer pipeline.
+    And it breaks the original operation semantics.
+    For example, output_1 may be not be used during network output computations.
+    To preserve this unused in/output ports we disconnect the corresponding out/input port.
     """
     enabled = True
 
@@ -41,12 +46,20 @@ def replace_identityN(node: Node):
         dtypes = node.data_types
 
         for idx, port in node.in_ports().items():
-            assert node.is_out_port_connected(idx), 'IdentityN {} has inconsistent input and output ports'.format(name)
+            if not node.is_in_port_connected(idx) or not node.is_out_port_connected(idx):
+                # ATTENTION section in the description above
+                continue
             assert idx < len(dtypes), 'IdentityN {} has inconsistent `data_types` attribute {}'.format(name, dtypes)
             identity = Identity(graph, {'name': '{}/{}_port'.format(name, idx), 'data_type': dtypes[idx]}).create_node()
             port.get_connection().set_destination(identity.in_port(0))
             node.out_port(idx).get_connection().set_source(identity.out_port(0))
 
+        # ATTENTION section in the description above
+        for in_port in node.in_ports().values():
+            in_port.disconnect()
+        for out_port in node.out_ports().values():
+            out_port.disconnect()
+
     def find_and_replace_pattern(self, graph: Graph):
         for identityN in graph.get_op_nodes(op='IdentityN'):
             self.replace_identityN(identityN)
diff --git a/model-optimizer/extensions/front/tf/identityN_to_identity_test.py b/model-optimizer/extensions/front/tf/identityN_to_identity_test.py
index f6422ce8ff8bc2..71571d7783a8cf 100644
--- a/model-optimizer/extensions/front/tf/identityN_to_identity_test.py
+++ b/model-optimizer/extensions/front/tf/identityN_to_identity_test.py
@@ -61,3 +61,20 @@ def test_identityN(self):
 
         (flag, resp) = compare_graphs(graph, graph_ref, 'output0', check_op_attrs=True)
         self.assertTrue(flag, resp)
+
+    def test_identityN_unused_ports(self):
+            graph = build_graph(nodes, [
+                *connect('placeholder_0', '0:identityN'),
+                *connect('placeholder_1', '1:identityN'),
+                *connect('identityN:0', 'output0'),
+            ], nodes_with_edges_only=True)
+
+            IdentityN_to_Identity().find_and_replace_pattern(graph)
+
+            graph_ref = build_graph(nodes, [
+                *connect('placeholder_0', 'identity0'),
+                *connect('identity0', 'output0'),
+            ], nodes_with_edges_only=True)
+
+            (flag, resp) = compare_graphs(graph, graph_ref, 'output0', check_op_attrs=True)
+            self.assertTrue(flag, resp)

From e290b14ab147596f6b5607786ce383283a35b7b8 Mon Sep 17 00:00:00 2001
From: Evgenya Stepyreva <evgenya.stepyreva@intel.com>
Date: Fri, 29 May 2020 09:15:47 +0300
Subject: [PATCH 09/24] [ MO Interpolate ] Fixing broken model reshape-ability
 (#619)

---
 model-optimizer/automation/package_BOM.txt    |   1 +
 .../extensions/back/InterpolateReshape.py     | 154 ++++++++++++++++++
 .../back/InterpolateReshape_test.py           |  97 +++++++++++
 3 files changed, 252 insertions(+)
 create mode 100644 model-optimizer/extensions/back/InterpolateReshape.py
 create mode 100644 model-optimizer/extensions/back/InterpolateReshape_test.py

diff --git a/model-optimizer/automation/package_BOM.txt b/model-optimizer/automation/package_BOM.txt
index e4da215ad3cda9..2d19e07fbcb815 100644
--- a/model-optimizer/automation/package_BOM.txt
+++ b/model-optimizer/automation/package_BOM.txt
@@ -29,6 +29,7 @@ extensions/back/GatherNormalizer.py
 extensions/back/GroupedConvWeightsNormalize.py
 extensions/back/I64ToI32.py
 extensions/back/insert_compatibility_l2normalization.py
+extensions/back/InterpolateReshape.py
 extensions/back/InterpolateToInterpOrResample.py
 extensions/back/kaldi_remove_memory_output.py
 extensions/back/LeakyReLUMutation.py
diff --git a/model-optimizer/extensions/back/InterpolateReshape.py b/model-optimizer/extensions/back/InterpolateReshape.py
new file mode 100644
index 00000000000000..e1ecbebbcd8a9b
--- /dev/null
+++ b/model-optimizer/extensions/back/InterpolateReshape.py
@@ -0,0 +1,154 @@
+"""
+ Copyright (C) 2018-2020 Intel Corporation
+
+ Licensed under the Apache License, Version 2.0 (the "License");
+ you may not use this file except in compliance with the License.
+ You may obtain a copy of the License at
+
+      http://www.apache.org/licenses/LICENSE-2.0
+
+ Unless required by applicable law or agreed to in writing, software
+ distributed under the License is distributed on an "AS IS" BASIS,
+ WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ See the License for the specific language governing permissions and
+ limitations under the License.
+"""
+import numpy as np
+
+from extensions.ops.elementwise import Mul
+from extensions.ops.gather import Gather
+from mo.back.replacement import BackReplacementPattern
+from mo.front.caffe.extractors.utils import get_canonical_axis_index
+from mo.front.common.partial_infer.utils import int64_array
+from mo.front.tf.graph_utils import create_op_with_const_inputs, create_op_node_with_second_input
+from mo.graph.graph import Graph
+from mo.ops.shape import Shape
+
+
+class InterpolateConcat(BackReplacementPattern):
+    """
+    Replaces hard-coded 1-port input of Interpolate with reshape-able sub-graph using the following Concat inputs
+
+    BEFORE:
+            input                   Const
+    shape=[1, 3, 30, 40]      value=[60, 160]
+            \                   /
+           Interpolate(axes=(2, 3))     input_1
+            shape=[1, 3, 60, 160]    shape=[1, 4, 60, 160]
+                        \           /
+                        Concat(axis=1)
+                    shape=[1, 7, 60, 160]
+    AFTER:
+                input
+            shape=[1, 3, 30, 40]           input_1
+               |                     shape=[1, 4, 60, 160]
+               |                      /        |
+               |                  ShapeOf      |
+               |                    |          |
+               |               Gather          |
+               |     indices=(2, 3); axis=0    |
+               \                    |          |
+                Interpolate(axes=(2, 3))      |
+            shape=[1, 3, 60, 160]             |
+                        \                   /
+                           Concat(axis=1)
+                        shape=[1, 7, 60, 160]
+
+    """
+    enabled = True
+    graph_condition = [lambda graph: graph.graph['cmd_params'].keep_shape_ops]
+    force_shape_inference = True
+    id = 'reshape_interpolate_through_concat'
+
+    @staticmethod
+    def make_interpolate_reshapeable(interpolate, concat):
+        assert interpolate.soft_get('type') == 'Interpolate'
+        assert concat.soft_get('type') == 'Concat'
+
+        output_shape = interpolate.out_port(0).data.get_shape()
+
+        interp_axes = [get_canonical_axis_index(output_shape, axis) for axis in interpolate.axes]
+        concat_axis = get_canonical_axis_index(output_shape, concat.axis)
+        if concat_axis in interp_axes:
+            return
+
+        concat_srcs = [port.get_source() for port in concat.in_ports().values()]
+        non_interp_concat_srcs = [src for src in concat_srcs if src.node.soft_get('type') != 'Interpolate']
+        if len(non_interp_concat_srcs) == 0:
+            return
+
+        graph = interpolate.graph
+        src = non_interp_concat_srcs[0]
+
+        shape = Shape(graph, {'name': src.node.soft_get('name', src.node.id) + '/Shape'}).create_node()
+        shape.in_port(0).connect(src)
+        gather = create_op_with_const_inputs(graph, Gather, {1: np.array(interpolate.axes, dtype=np.int32), 2: int64_array(0)},
+                                             {'name': shape.name + '/Gathered'}, shape)
+        interpolate.in_port(1).get_connection().set_source(gather.out_port(0))
+
+    def find_and_replace_pattern(self, graph: Graph):
+        for interpolate in graph.get_op_nodes(type='Interpolate'):
+            if interpolate.in_port(1).get_source().node.soft_get('type') != 'Const':
+                continue
+            dsts = interpolate.out_port(0).get_destinations()
+            if len(dsts) == 1 and dsts[0].node.soft_get('type') == 'Concat':
+                self.make_interpolate_reshapeable(interpolate, dsts[0].node)
+
+
+class InterpolateReshapeWA(BackReplacementPattern):
+    """
+    Replaces hard-coded 1-port input of Interpolate with reshape-able sub-graph.
+    WARNING: Could cause troubles if model has hard-coded Interpolate intentionally -- rare situation
+
+    BEFORE:
+        input                   Const
+    shape=[1, 3, 30, 40]      value=[60, 160]
+            \                   /
+           Interpolate(axes=(2, 3))
+            shape=[1, 3, 60, 160]
+
+    AFTER:
+            input
+    shape=[1, 3, 30, 40]
+        |                \
+        |              ShapeOf
+        |                |
+        |              Gather                Const
+        |        indices=(2, 3); axis=0    value=[2, 4]
+        |                \                /
+        |                    Multiply
+        |                   /
+    Interpolate(axes=(2, 3))
+      shape=[1, 3, 60, 160]
+    """
+    enabled = False
+    graph_condition = [lambda graph: graph.graph['cmd_params'].keep_shape_ops]
+    force_shape_inference = True
+    id = 'reshape_interpolate_wa'
+
+    def run_after(self):
+        return [InterpolateConcat]
+
+    @staticmethod
+    def make_interpolate_reshapeable(interpolate):
+        assert interpolate.soft_get('type') == 'Interpolate'
+        axes = interpolate.axes
+        input_shape = interpolate.in_port(0).data.get_shape()
+        output_shape = interpolate.out_port(0).data.get_shape()
+        if not np.all(np.remainder(output_shape, input_shape) == 0) and \
+                not np.all(np.remainder(input_shape, output_shape) == 0):
+            return
+        graph = interpolate.graph
+        name = interpolate.soft_get('name', interpolate.id)
+        shape = Shape(graph, {'name': name + '/ShapeOf'}).create_node()
+        shape.in_port(0).connect(interpolate.in_port(0).get_source())
+        gather = create_op_with_const_inputs(graph, Gather, {1: np.array(axes, dtype=np.int32), 2: int64_array(0)},
+                                             {'name': shape.name + '/Gathered'}, shape)
+        multipliers = output_shape[axes] / input_shape[axes]
+        mul = create_op_node_with_second_input(graph, Mul, multipliers, {'name': gather.name + '/Multiplied'}, gather)
+        interpolate.in_port(1).get_connection().set_source(mul.out_port(0))
+
+    def find_and_replace_pattern(self, graph: Graph):
+        for interpolate in graph.get_op_nodes(type='Interpolate'):
+            if interpolate.in_port(1).get_source().node.soft_get('type') == 'Const':
+                self.make_interpolate_reshapeable(interpolate)
diff --git a/model-optimizer/extensions/back/InterpolateReshape_test.py b/model-optimizer/extensions/back/InterpolateReshape_test.py
new file mode 100644
index 00000000000000..f793a4b592fceb
--- /dev/null
+++ b/model-optimizer/extensions/back/InterpolateReshape_test.py
@@ -0,0 +1,97 @@
+"""
+ Copyright (C) 2018-2020 Intel Corporation
+
+ Licensed under the Apache License, Version 2.0 (the "License");
+ you may not use this file except in compliance with the License.
+ You may obtain a copy of the License at
+
+      http://www.apache.org/licenses/LICENSE-2.0
+
+ Unless required by applicable law or agreed to in writing, software
+ distributed under the License is distributed on an "AS IS" BASIS,
+ WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ See the License for the specific language governing permissions and
+ limitations under the License.
+"""
+
+import unittest
+from argparse import Namespace
+
+import numpy as np
+
+from extensions.back.InterpolateReshape import InterpolateReshapeWA, InterpolateConcat
+from mo.utils.ir_engine.compare_graphs import compare_graphs
+from mo.utils.unittest.graph import build_graph, result, regular_op_with_shaped_data, valued_const_with_data, connect, \
+    connect_data
+
+nodes = {
+    **regular_op_with_shaped_data('placeholder', [1, 3, 30, 40], {'type': 'Parameter'}),
+    **valued_const_with_data('out_shape', np.array([60, 160])),
+
+    **regular_op_with_shaped_data('interpolate', [1, 3, 60, 160], {'type': 'Interpolate', 'axes': [2, 3]}),
+
+    **regular_op_with_shaped_data('shape', [4], {'type': 'ShapeOf'}),
+    **valued_const_with_data('indices', np.array([2, 3])),
+    **valued_const_with_data('axis', np.array(0)),
+    **regular_op_with_shaped_data('gather', [2], {'type': 'Gather'}),
+
+    **valued_const_with_data('multiplier', np.array([2, 4])),
+    **regular_op_with_shaped_data('mul', [2], {'type': 'Multiply'}),
+
+    **regular_op_with_shaped_data('placeholder_1', [1, 3, 60, 160], {'type': 'Parameter'}),
+    **regular_op_with_shaped_data('concat', [1, 7, 60, 160], {'type': 'Concat', 'axis': 1}),
+
+    **result(),
+}
+
+
+class TestInterpolateReshapeWA(unittest.TestCase):
+    def test_interpolate_reshape_graph_comparison(self):
+        graph = build_graph(nodes, [
+            *connect('placeholder', '0:interpolate'),
+            *connect('out_shape', '1:interpolate'),
+            *connect('interpolate', 'output'),
+        ], nodes_with_edges_only=True)
+        InterpolateReshapeWA().find_and_replace_pattern(graph)
+        graph.graph['cmd_params'] = Namespace(keep_shape_ops=True)
+        graph.clean_up()
+        graph_ref = build_graph(nodes, [
+            *connect('placeholder', '0:interpolate'),
+            *connect_data('placeholder', 'shape'),
+            *connect('shape', '0:gather'),
+            *connect('indices', '1:gather'),
+            *connect('axis', '2:gather'),
+            *connect('gather', '0:mul'),
+            *connect('multiplier', '1:mul'),
+            *connect('mul', '1:interpolate'),
+            *connect('interpolate', 'output'),
+        ], nodes_with_edges_only=True)
+        (flag, resp) = compare_graphs(graph, graph_ref, 'output', check_op_attrs=True)
+        self.assertTrue(flag, resp)
+
+
+class TestInterpolateConcat(unittest.TestCase):
+    def test_interpolate_concat_reshape_graph_comparison(self):
+        graph = build_graph(nodes, [
+            *connect('placeholder', '0:interpolate'),
+            *connect('out_shape', '1:interpolate'),
+            *connect('interpolate', '0:concat'),
+            *connect('placeholder_1', '1:concat'),
+            *connect('concat', 'output'),
+        ], nodes_with_edges_only=True)
+        InterpolateConcat().find_and_replace_pattern(graph)
+        graph.graph['cmd_params'] = Namespace(keep_shape_ops=True)
+        graph.clean_up()
+        graph_ref = build_graph(nodes, [
+            *connect('placeholder', '0:interpolate'),
+            *connect('placeholder_1', 'shape'),
+            *connect('shape', '0:gather'),
+            *connect('indices', '1:gather'),
+            *connect('axis', '2:gather'),
+            *connect('gather', '1:interpolate'),
+            *connect('interpolate', '0:concat'),
+            *connect_data('placeholder_1', '1:concat'),
+            *connect('concat', 'output'),
+        ], nodes_with_edges_only=True)
+        (flag, resp) = compare_graphs(graph, graph_ref, 'output', check_op_attrs=True)
+        self.assertTrue(flag, resp)

From 67d733d5a8db85efffbfd0d713746f8e1f099efc Mon Sep 17 00:00:00 2001
From: "Gladilov, Gleb" <gleb.gladilov@gmail.com>
Date: Fri, 29 May 2020 09:32:10 +0300
Subject: [PATCH 10/24] Enables VPU maintainers notification in case of PR to
 VPU related folders and files (#667)

---
 CODEOWNERS | 8 ++++++++
 1 file changed, 8 insertions(+)

diff --git a/CODEOWNERS b/CODEOWNERS
index b3bd89148d9ce9..9d19cd384ebfa5 100644
--- a/CODEOWNERS
+++ b/CODEOWNERS
@@ -33,6 +33,14 @@ azure-pipelines.yml  @openvinotoolkit/openvino-admins
 /inference-engine/src/vpu/  @openvinotoolkit/openvino-ie-vpu-maintainers
 /inference-engine/include/vpu/  @openvinotoolkit/openvino-ie-vpu-maintainers
 /inference-engine/thirdparty/movidius/  @openvinotoolkit/openvino-ie-vpu-maintainers
+/inference-engine/tests_deprecated/unit/engines/vpu/  @openvinotoolkit/openvino-ie-vpu-maintainers @openvinotoolkit/openvino-ie-tests-maintainers
+/inference-engine/tests_deprecated/functional/vpu/  @openvinotoolkit/openvino-ie-vpu-maintainers @openvinotoolkit/openvino-ie-tests-maintainers
+/inference-engine/tests_deprecated/behavior/vpu/  @openvinotoolkit/openvino-ie-vpu-maintainers @openvinotoolkit/openvino-ie-tests-maintainers
+/inference-engine/tests/functional/plugin/myriad/  @openvinotoolkit/openvino-ie-vpu-maintainers @openvinotoolkit/openvino-ie-tests-maintainers
+/inference-engine/tests/unit/vpu/  @openvinotoolkit/openvino-ie-vpu-maintainers @openvinotoolkit/openvino-ie-tests-maintainers
+/inference-engine/tests/unit/engines/vpu/  @openvinotoolkit/openvino-ie-vpu-maintainers @openvinotoolkit/openvino-ie-tests-maintainers
+/inference-engine/tools/vpu/  @openvinotoolkit/openvino-ie-vpu-maintainers
+/inference-engine/scripts/run_tests_myriad_multistick.sh  @openvinotoolkit/openvino-ie-vpu-maintainers
 
 # IE GNA:
 /inference-engine/src/gna_plugin/  @openvinotoolkit/openvino-ie-gna-maintainers

From 09192b804e071c0205f504f761a087b184c60fdb Mon Sep 17 00:00:00 2001
From: Artyom Anokhov <artyom.anokhov@intel.com>
Date: Fri, 29 May 2020 13:50:17 +0300
Subject: [PATCH 11/24] [OpenVINO scripts] Fixed *.sh files index from 644 to
 755 (#664)

* Fixed *.sh files index from 644 to 755

* Added convert.py executable permission
---
 inference-engine/samples/build_samples.sh                         | 0
 inference-engine/scripts/dependencies.sh                          | 0
 inference-engine/scripts/run_tests_myriad_multistick.sh           | 0
 inference-engine/thirdparty/clDNN/create_unixmake_gcc.sh          | 0
 inference-engine/thirdparty/fluid/check.sh                        | 0
 inference-engine/thirdparty/fluid/checksum.sh                     | 0
 .../thirdparty/fluid/modules/gapi/doc/slides/get_sty.sh           | 0
 inference-engine/thirdparty/fluid/update.sh                       | 0
 install_dependencies.sh                                           | 0
 model-optimizer/mo/utils/convert.py                               | 0
 ngraph/maint/apply-code-format.sh                                 | 0
 ngraph/maint/bash_lib.sh                                          | 0
 ngraph/maint/check-code-format.sh                                 | 0
 ngraph/maint/clang_format_lib.sh                                  | 0
 ngraph/test/update_convolution_reference.sh                       | 0
 ngraph/test/update_dyn_replace_slice_reference.sh                 | 0
 ngraph/test/update_dyn_slice_reference.sh                         | 0
 scripts/demo/demo_benchmark_app.sh                                | 0
 scripts/demo/demo_security_barrier_camera.sh                      | 0
 scripts/demo/demo_squeezenet_download_convert_run.sh              | 0
 scripts/demo/utils.sh                                             | 0
 scripts/install_dependencies/install_4_14_kernel.sh               | 0
 scripts/install_dependencies/install_GST_dependencies.sh          | 0
 scripts/install_dependencies/install_NCS_udev_rules.sh            | 0
 scripts/install_dependencies/install_NEO_OCL_driver.sh            | 0
 scripts/install_dependencies/install_openvino_dependencies.sh     | 0
 scripts/setupvars/setupvars.sh                                    | 0
 27 files changed, 0 insertions(+), 0 deletions(-)
 mode change 100644 => 100755 inference-engine/samples/build_samples.sh
 mode change 100644 => 100755 inference-engine/scripts/dependencies.sh
 mode change 100644 => 100755 inference-engine/scripts/run_tests_myriad_multistick.sh
 mode change 100644 => 100755 inference-engine/thirdparty/clDNN/create_unixmake_gcc.sh
 mode change 100644 => 100755 inference-engine/thirdparty/fluid/check.sh
 mode change 100644 => 100755 inference-engine/thirdparty/fluid/checksum.sh
 mode change 100644 => 100755 inference-engine/thirdparty/fluid/modules/gapi/doc/slides/get_sty.sh
 mode change 100644 => 100755 inference-engine/thirdparty/fluid/update.sh
 mode change 100644 => 100755 install_dependencies.sh
 mode change 100644 => 100755 model-optimizer/mo/utils/convert.py
 mode change 100644 => 100755 ngraph/maint/apply-code-format.sh
 mode change 100644 => 100755 ngraph/maint/bash_lib.sh
 mode change 100644 => 100755 ngraph/maint/check-code-format.sh
 mode change 100644 => 100755 ngraph/maint/clang_format_lib.sh
 mode change 100644 => 100755 ngraph/test/update_convolution_reference.sh
 mode change 100644 => 100755 ngraph/test/update_dyn_replace_slice_reference.sh
 mode change 100644 => 100755 ngraph/test/update_dyn_slice_reference.sh
 mode change 100644 => 100755 scripts/demo/demo_benchmark_app.sh
 mode change 100644 => 100755 scripts/demo/demo_security_barrier_camera.sh
 mode change 100644 => 100755 scripts/demo/demo_squeezenet_download_convert_run.sh
 mode change 100644 => 100755 scripts/demo/utils.sh
 mode change 100644 => 100755 scripts/install_dependencies/install_4_14_kernel.sh
 mode change 100644 => 100755 scripts/install_dependencies/install_GST_dependencies.sh
 mode change 100644 => 100755 scripts/install_dependencies/install_NCS_udev_rules.sh
 mode change 100644 => 100755 scripts/install_dependencies/install_NEO_OCL_driver.sh
 mode change 100644 => 100755 scripts/install_dependencies/install_openvino_dependencies.sh
 mode change 100644 => 100755 scripts/setupvars/setupvars.sh

diff --git a/inference-engine/samples/build_samples.sh b/inference-engine/samples/build_samples.sh
old mode 100644
new mode 100755
diff --git a/inference-engine/scripts/dependencies.sh b/inference-engine/scripts/dependencies.sh
old mode 100644
new mode 100755
diff --git a/inference-engine/scripts/run_tests_myriad_multistick.sh b/inference-engine/scripts/run_tests_myriad_multistick.sh
old mode 100644
new mode 100755
diff --git a/inference-engine/thirdparty/clDNN/create_unixmake_gcc.sh b/inference-engine/thirdparty/clDNN/create_unixmake_gcc.sh
old mode 100644
new mode 100755
diff --git a/inference-engine/thirdparty/fluid/check.sh b/inference-engine/thirdparty/fluid/check.sh
old mode 100644
new mode 100755
diff --git a/inference-engine/thirdparty/fluid/checksum.sh b/inference-engine/thirdparty/fluid/checksum.sh
old mode 100644
new mode 100755
diff --git a/inference-engine/thirdparty/fluid/modules/gapi/doc/slides/get_sty.sh b/inference-engine/thirdparty/fluid/modules/gapi/doc/slides/get_sty.sh
old mode 100644
new mode 100755
diff --git a/inference-engine/thirdparty/fluid/update.sh b/inference-engine/thirdparty/fluid/update.sh
old mode 100644
new mode 100755
diff --git a/install_dependencies.sh b/install_dependencies.sh
old mode 100644
new mode 100755
diff --git a/model-optimizer/mo/utils/convert.py b/model-optimizer/mo/utils/convert.py
old mode 100644
new mode 100755
diff --git a/ngraph/maint/apply-code-format.sh b/ngraph/maint/apply-code-format.sh
old mode 100644
new mode 100755
diff --git a/ngraph/maint/bash_lib.sh b/ngraph/maint/bash_lib.sh
old mode 100644
new mode 100755
diff --git a/ngraph/maint/check-code-format.sh b/ngraph/maint/check-code-format.sh
old mode 100644
new mode 100755
diff --git a/ngraph/maint/clang_format_lib.sh b/ngraph/maint/clang_format_lib.sh
old mode 100644
new mode 100755
diff --git a/ngraph/test/update_convolution_reference.sh b/ngraph/test/update_convolution_reference.sh
old mode 100644
new mode 100755
diff --git a/ngraph/test/update_dyn_replace_slice_reference.sh b/ngraph/test/update_dyn_replace_slice_reference.sh
old mode 100644
new mode 100755
diff --git a/ngraph/test/update_dyn_slice_reference.sh b/ngraph/test/update_dyn_slice_reference.sh
old mode 100644
new mode 100755
diff --git a/scripts/demo/demo_benchmark_app.sh b/scripts/demo/demo_benchmark_app.sh
old mode 100644
new mode 100755
diff --git a/scripts/demo/demo_security_barrier_camera.sh b/scripts/demo/demo_security_barrier_camera.sh
old mode 100644
new mode 100755
diff --git a/scripts/demo/demo_squeezenet_download_convert_run.sh b/scripts/demo/demo_squeezenet_download_convert_run.sh
old mode 100644
new mode 100755
diff --git a/scripts/demo/utils.sh b/scripts/demo/utils.sh
old mode 100644
new mode 100755
diff --git a/scripts/install_dependencies/install_4_14_kernel.sh b/scripts/install_dependencies/install_4_14_kernel.sh
old mode 100644
new mode 100755
diff --git a/scripts/install_dependencies/install_GST_dependencies.sh b/scripts/install_dependencies/install_GST_dependencies.sh
old mode 100644
new mode 100755
diff --git a/scripts/install_dependencies/install_NCS_udev_rules.sh b/scripts/install_dependencies/install_NCS_udev_rules.sh
old mode 100644
new mode 100755
diff --git a/scripts/install_dependencies/install_NEO_OCL_driver.sh b/scripts/install_dependencies/install_NEO_OCL_driver.sh
old mode 100644
new mode 100755
diff --git a/scripts/install_dependencies/install_openvino_dependencies.sh b/scripts/install_dependencies/install_openvino_dependencies.sh
old mode 100644
new mode 100755
diff --git a/scripts/setupvars/setupvars.sh b/scripts/setupvars/setupvars.sh
old mode 100644
new mode 100755

From a4f13ae9fe0f65f5eed67689e1a5ba461df9dfa7 Mon Sep 17 00:00:00 2001
From: Ivan Tikhonov <ivan.tikhonov@intel.com>
Date: Fri, 29 May 2020 14:09:20 +0300
Subject: [PATCH 12/24] fix constant folding of Concat op (#675)

---
 ngraph/src/ngraph/op/concat.cpp | 2 ++
 1 file changed, 2 insertions(+)

diff --git a/ngraph/src/ngraph/op/concat.cpp b/ngraph/src/ngraph/op/concat.cpp
index a4ebe34c028081..5a54eac7717bb5 100644
--- a/ngraph/src/ngraph/op/concat.cpp
+++ b/ngraph/src/ngraph/op/concat.cpp
@@ -191,6 +191,8 @@ namespace
             break;
             TYPE_CASE(u64)(args, out, concatenation_axis);
             break;
+            TYPE_CASE(f16)(args, out, concatenation_axis);
+            break;
             TYPE_CASE(f32)(args, out, concatenation_axis);
             break;
             TYPE_CASE(f64)(args, out, concatenation_axis);

From 5f8f9ec108abd958ee954cc0dd04c433d4d96182 Mon Sep 17 00:00:00 2001
From: Katarzyna Mitrus <katarzyna.mitrus@intel.com>
Date: Fri, 29 May 2020 13:29:18 +0200
Subject: [PATCH 13/24] [nGraph] Reorder nGraph LSTMSequence inputs and outputs
 dimensions (#560)

* Reorder nGraph LSTMSequence input/outpt dimensions

* Update nGraph pythonAPI for LSTMSequence

* Reorder axes in ONNX importer LSTM

* Tests update

* Fix clang warning

* Use opset3 namespace

* Style apply

* Tests update

* Use opset1  namespace

* Remove usage of  GetOutputElement in ONNX importer LSTM

* Remove opset0 header

* Use Node::output()
---
 ngraph/python/src/ngraph/ops.py               |   6 +-
 ngraph/python/test/ngraph/test_create_op.py   |  18 +-
 .../ngraph/frontend/onnx_import/op/lstm.cpp   |  34 ++--
 ngraph/src/ngraph/op/fused/lstm_sequence.cpp  | 184 +++++++++---------
 ngraph/src/ngraph/op/fused/lstm_sequence.hpp  |   4 +-
 ngraph/test/attributes.cpp                    |  29 ++-
 ngraph/test/onnx/onnx_import_rnn.in.cpp       |   1 +
 ngraph/test/type_prop/lstm_sequence.cpp       |  98 ++++++++--
 8 files changed, 231 insertions(+), 143 deletions(-)

diff --git a/ngraph/python/src/ngraph/ops.py b/ngraph/python/src/ngraph/ops.py
index 58c0f8970bfb08..51a299ef8f97c8 100644
--- a/ngraph/python/src/ngraph/ops.py
+++ b/ngraph/python/src/ngraph/ops.py
@@ -472,11 +472,11 @@ def lstm_sequence(
 ) -> Node:
     """Return a node which performs LSTMSequence operation.
 
-    :param X: The input tensor. Shape: [seq_length, batch_size, input_size].
+    :param X: The input tensor. Shape: [batch_size, seq_length, input_size].
     :param initial_hidden_state:    The hidden state tensor.
-                                    Shape: [num_directions, batch_size, hidden_size].
+                                    Shape: [batch_size, num_directions, hidden_size].
     :param initial_cell_state:      The cell state tensor.
-                                    Shape: [num_directions, batch_size, hidden_size].
+                                    Shape: [batch_size, num_directions, hidden_size].
     :param sequence_lengths:        Specifies real sequence lengths for each batch element.
                                     Shape: [batch_size]. Integer type.
     :param W: Tensor with weights for matrix multiplication operation with input portion of data.
diff --git a/ngraph/python/test/ngraph/test_create_op.py b/ngraph/python/test/ngraph/test_create_op.py
index 662dc6a047f7be..abb50adce9e26d 100644
--- a/ngraph/python/test/ngraph/test_create_op.py
+++ b/ngraph/python/test/ngraph/test_create_op.py
@@ -258,9 +258,9 @@ def test_lstm_sequence_operator_bidirectional(dtype):
     num_directions = 2
     seq_length = 2
 
-    X_shape = [seq_length, batch_size, input_size]
-    H_t_shape = [num_directions, batch_size, hidden_size]
-    C_t_shape = [num_directions, batch_size, hidden_size]
+    X_shape = [batch_size, seq_length, input_size]
+    H_t_shape = [batch_size, num_directions, hidden_size]
+    C_t_shape = [batch_size, num_directions, hidden_size]
     seq_len_shape = [batch_size]
     W_shape = [num_directions, 4 * hidden_size, input_size]
     R_shape = [num_directions, 4 * hidden_size, hidden_size]
@@ -323,9 +323,9 @@ def test_lstm_sequence_operator_reverse(dtype):
     num_directions = 1
     seq_length = 2
 
-    X_shape = [seq_length, batch_size, input_size]
-    H_t_shape = [num_directions, batch_size, hidden_size]
-    C_t_shape = [num_directions, batch_size, hidden_size]
+    X_shape = [batch_size, seq_length, input_size]
+    H_t_shape = [batch_size, num_directions, hidden_size]
+    C_t_shape = [batch_size, num_directions, hidden_size]
     seq_len_shape = [batch_size]
     W_shape = [num_directions, 4 * hidden_size, input_size]
     R_shape = [num_directions, 4 * hidden_size, hidden_size]
@@ -389,9 +389,9 @@ def test_lstm_sequence_operator_forward(dtype):
     num_directions = 1
     seq_length = 2
 
-    X_shape = [seq_length, batch_size, input_size]
-    H_t_shape = [num_directions, batch_size, hidden_size]
-    C_t_shape = [num_directions, batch_size, hidden_size]
+    X_shape = [batch_size, seq_length, input_size]
+    H_t_shape = [batch_size, num_directions, hidden_size]
+    C_t_shape = [batch_size, num_directions, hidden_size]
     seq_len_shape = [batch_size]
     W_shape = [num_directions, 4 * hidden_size, input_size]
     R_shape = [num_directions, 4 * hidden_size, hidden_size]
diff --git a/ngraph/src/ngraph/frontend/onnx_import/op/lstm.cpp b/ngraph/src/ngraph/frontend/onnx_import/op/lstm.cpp
index 08e472e9eef942..16f0c45b49268d 100644
--- a/ngraph/src/ngraph/frontend/onnx_import/op/lstm.cpp
+++ b/ngraph/src/ngraph/frontend/onnx_import/op/lstm.cpp
@@ -24,13 +24,13 @@
 #include "default_opset.hpp"
 #include "exceptions.hpp"
 #include "lstm.hpp"
+#include "ngraph/builder/reshape.hpp"
 #include "ngraph/builder/split.hpp"
 #include "ngraph/frontend/onnx_import/op/lstm.hpp"
 #include "ngraph/op/add.hpp"
 #include "ngraph/op/constant.hpp"
 #include "ngraph/op/fused/lstm_sequence.hpp"
 #include "ngraph/op/get_output_element.hpp"
-#include "ngraph/opsets/opset0.hpp"
 #include "ngraph/shape.hpp"
 #include "ngraph/type/element_type.hpp"
 
@@ -71,7 +71,8 @@ namespace ngraph
 
                         // ----- Mandatory inputs ------
                         // Packed input sequences. Shape: [seq_length, batch_size, input_size]
-                        m_map[LSTMInput::LSTM_INPUT_X] = ng_inputs.at(0);
+                        m_map[LSTMInput::LSTM_INPUT_X] =
+                            builder::opset1::reorder_axes(ng_inputs.at(0), {1, 0, 2});
                         // Weight tensor for the gates.
                         // Shape: [num_directions, 4*hidden_size, input_size]
                         m_map[LSTMInput::LSTM_INPUT_W] = ng_inputs.at(1);
@@ -82,7 +83,7 @@ namespace ngraph
                         const std::size_t hidden_size =
                             m_map[LSTMInput::LSTM_INPUT_R]->get_shape().back();
                         const std::size_t batch_size =
-                            m_map[LSTMInput::LSTM_INPUT_X]->get_shape().at(1);
+                            m_map[LSTMInput::LSTM_INPUT_X]->get_shape().at(0);
                         const std::size_t num_directions =
                             m_map[LSTMInput::LSTM_INPUT_W]->get_shape().front();
 
@@ -115,33 +116,35 @@ namespace ngraph
                                     Shape{batch_size},
                                     std::vector<std::int32_t>(
                                         batch_size,
-                                        m_map[LSTMInput::LSTM_INPUT_X]->get_shape().at(0)));
+                                        m_map[LSTMInput::LSTM_INPUT_X]->get_shape().at(1)));
                         }
                         // The initial value of the hidden.
                         // Shape [num_directions, batch_size, hidden_size]
                         if (ng_inputs.size() > 5 && !ng_inputs.at(5)->is_null())
                         {
-                            m_map[LSTMInput::LSTM_INPUT_INIT_H] = ng_inputs.at(5);
+                            m_map[LSTMInput::LSTM_INPUT_INIT_H] =
+                                builder::opset1::reorder_axes(ng_inputs.at(5), {1, 0, 2});
                         }
                         else
                         {
                             m_map[LSTMInput::LSTM_INPUT_INIT_H] = default_opset::Constant::create(
                                 element::f32,
-                                Shape{num_directions, batch_size, hidden_size},
-                                std::vector<float>(num_directions * batch_size * hidden_size, 0.f));
+                                Shape{batch_size, num_directions, hidden_size},
+                                std::vector<float>(batch_size * num_directions * hidden_size, 0.f));
                         }
                         // The initial value of the cell.
                         // Shape [num_directions, batch_size, hidden_size]
                         if (ng_inputs.size() > 6 && !ng_inputs.at(6)->is_null())
                         {
-                            m_map[LSTMInput::LSTM_INPUT_INIT_C] = ng_inputs.at(6);
+                            m_map[LSTMInput::LSTM_INPUT_INIT_C] =
+                                builder::opset1::reorder_axes(ng_inputs.at(6), {1, 0, 2});
                         }
                         else
                         {
                             m_map[LSTMInput::LSTM_INPUT_INIT_C] = default_opset::Constant::create(
                                 element::f32,
-                                Shape{num_directions, batch_size, hidden_size},
-                                std::vector<float>(num_directions * batch_size * hidden_size, 0.f));
+                                Shape{batch_size, num_directions, hidden_size},
+                                std::vector<float>(batch_size * num_directions * hidden_size, 0.f));
                         }
                         // The weight tensor for peepholes. Shape [num_directions, 3*hidde_size]
                         if (ng_inputs.size() > 7 && !ng_inputs.at(7)->is_null())
@@ -239,9 +242,14 @@ namespace ngraph
                         attributes.m_activations,
                         attributes.m_clip_threshold,
                         attributes.m_input_forget);
-                    return {std::make_shared<ngraph::opset0::GetOutputElement>(lstmSequence, 0),
-                            std::make_shared<ngraph::opset0::GetOutputElement>(lstmSequence, 1),
-                            std::make_shared<ngraph::opset0::GetOutputElement>(lstmSequence, 2)};
+
+                    const auto Y = lstmSequence->output(0);
+                    const auto Y_h = lstmSequence->output(1);
+                    const auto Y_c = lstmSequence->output(2);
+
+                    return {builder::opset1::reorder_axes(Y, {2, 1, 0, 3}),
+                            builder::opset1::reorder_axes(Y_h, {1, 0, 2}),
+                            builder::opset1::reorder_axes(Y_c, {1, 0, 2})};
                 }
             } // namespace set_1
 
diff --git a/ngraph/src/ngraph/op/fused/lstm_sequence.cpp b/ngraph/src/ngraph/op/fused/lstm_sequence.cpp
index 2e411d7fc6603f..a8b758446c84a0 100644
--- a/ngraph/src/ngraph/op/fused/lstm_sequence.cpp
+++ b/ngraph/src/ngraph/op/fused/lstm_sequence.cpp
@@ -20,19 +20,13 @@
 #include "ngraph/builder/autobroadcast.hpp"
 #include "ngraph/builder/reshape.hpp"
 #include "ngraph/builder/split.hpp"
-#include "ngraph/frontend/onnx_import/utils/reshape.hpp"
-#include "ngraph/op/concat.hpp"
-#include "ngraph/op/constant.hpp"
-#include "ngraph/op/fused/lstm_cell.hpp"
-#include "ngraph/op/get_output_element.hpp"
-#include "ngraph/op/greater.hpp"
-#include "ngraph/op/reverse_sequence.hpp"
-#include "ngraph/op/select.hpp"
+
+#include "ngraph/opsets/opset1.hpp"
 
 using namespace ngraph;
 using namespace std;
 
-constexpr NodeTypeInfo op::LSTMSequence::type_info;
+constexpr NodeTypeInfo op::v0::LSTMSequence::type_info;
 bool ngraph::op::v0::LSTMSequence::visit_attributes(AttributeVisitor& visitor)
 {
     visitor.on_attribute("hidden_size", m_hidden_size);
@@ -46,7 +40,7 @@ bool ngraph::op::v0::LSTMSequence::visit_attributes(AttributeVisitor& visitor)
     visitor.on_attribute("weights_format", m_weights_format);
     return true;
 }
-NodeVector op::LSTMSequence::decompose_op() const
+NodeVector op::v0::LSTMSequence::decompose_op() const
 {
     NodeVector results;
     if (m_direction == direction::FORWARD || m_direction == direction::REVERSE)
@@ -60,55 +54,55 @@ NodeVector op::LSTMSequence::decompose_op() const
 
         // Stack together respective outputs from both forward and reverse passess.
         shared_ptr<Node> Y{
-            make_shared<op::Concat>(NodeVector{fwd_results.at(0), rev_results.at(0)}, 1)};
+            make_shared<opset1::Concat>(NodeVector{fwd_results.at(0), rev_results.at(0)}, 1)};
         shared_ptr<Node> Y_h{
-            make_shared<op::Concat>(NodeVector{fwd_results.at(1), rev_results.at(1)}, 0)};
+            make_shared<opset1::Concat>(NodeVector{fwd_results.at(1), rev_results.at(1)}, 1)};
         shared_ptr<Node> Y_c{
-            make_shared<op::Concat>(NodeVector{fwd_results.at(2), rev_results.at(2)}, 0)};
+            make_shared<opset1::Concat>(NodeVector{fwd_results.at(2), rev_results.at(2)}, 1)};
         results = NodeVector{Y, Y_h, Y_c};
     }
     return results;
 }
 
-shared_ptr<Node> op::LSTMSequence::clone_with_new_inputs(const OutputVector& new_args) const
+shared_ptr<Node> op::v0::LSTMSequence::clone_with_new_inputs(const OutputVector& new_args) const
 {
     check_new_args_count(this, new_args);
     if (new_args.size() == 8)
     {
-        return make_shared<LSTMSequence>(new_args.at(0), // X
-                                         new_args.at(1), // initial_hidden_state
-                                         new_args.at(2), // initial_cell_state
-                                         new_args.at(3), // sequence_lengths
-                                         new_args.at(4), // W
-                                         new_args.at(5), // R
-                                         new_args.at(6), // B
-                                         new_args.at(7), // P
-                                         m_hidden_size,
-                                         m_direction,
-                                         m_weights_format,
-                                         m_activations_alpha,
-                                         m_activations_beta,
-                                         m_activations,
-                                         m_clip_threshold,
-                                         m_input_forget);
+        return make_shared<op::v0::LSTMSequence>(new_args.at(0), // X
+                                                 new_args.at(1), // initial_hidden_state
+                                                 new_args.at(2), // initial_cell_state
+                                                 new_args.at(3), // sequence_lengths
+                                                 new_args.at(4), // W
+                                                 new_args.at(5), // R
+                                                 new_args.at(6), // B
+                                                 new_args.at(7), // P
+                                                 m_hidden_size,
+                                                 m_direction,
+                                                 m_weights_format,
+                                                 m_activations_alpha,
+                                                 m_activations_beta,
+                                                 m_activations,
+                                                 m_clip_threshold,
+                                                 m_input_forget);
     }
     else if (new_args.size() == 7)
     {
-        return make_shared<LSTMSequence>(new_args.at(0), // X
-                                         new_args.at(1), // initial_hidden_state
-                                         new_args.at(2), // initial_cell_state
-                                         new_args.at(3), // sequence_lengths
-                                         new_args.at(4), // W
-                                         new_args.at(5), // R
-                                         new_args.at(6), // B
-                                         m_hidden_size,
-                                         m_direction,
-                                         m_weights_format,
-                                         m_activations_alpha,
-                                         m_activations_beta,
-                                         m_activations,
-                                         m_clip_threshold,
-                                         m_input_forget);
+        return make_shared<op::v0::LSTMSequence>(new_args.at(0), // X
+                                                 new_args.at(1), // initial_hidden_state
+                                                 new_args.at(2), // initial_cell_state
+                                                 new_args.at(3), // sequence_lengths
+                                                 new_args.at(4), // W
+                                                 new_args.at(5), // R
+                                                 new_args.at(6), // B
+                                                 m_hidden_size,
+                                                 m_direction,
+                                                 m_weights_format,
+                                                 m_activations_alpha,
+                                                 m_activations_beta,
+                                                 m_activations,
+                                                 m_clip_threshold,
+                                                 m_input_forget);
     }
     else
     {
@@ -116,46 +110,44 @@ shared_ptr<Node> op::LSTMSequence::clone_with_new_inputs(const OutputVector& new
     }
 }
 
-shared_ptr<Node> op::LSTMSequence::get_masked_node(const Output<Node>& data,
-                                                   int32_t time_step,
-                                                   size_t batch_axis,
-                                                   const Output<Node>& default_value) const
+shared_ptr<Node> op::v0::LSTMSequence::get_masked_node(const Output<Node>& data,
+                                                       int32_t time_step,
+                                                       size_t batch_axis,
+                                                       const Output<Node>& default_value) const
 {
     Output<Node> mask_value = default_value;
     // Create zero mask value node.
     if (!mask_value.get_node_shared_ptr())
     {
-        mask_value = op::Constant::create(data.get_element_type(),
-                                          data.get_shape(),
-                                          vector<float>(shape_size(data.get_shape()), 0.f));
+        mask_value = opset1::Constant::create(data.get_element_type(),
+                                              data.get_shape(),
+                                              vector<float>(shape_size(data.get_shape()), 0.f));
     }
 
     // Create predicate nodes. The condition is whether current time step value
     // is greater than sequence length for respective batch inputs.
-    shared_ptr<Node> curr_time_step_node = op::Constant::create(
+    shared_ptr<Node> curr_time_step_node = opset1::Constant::create(
         element::i32, data.get_shape(), vector<int32_t>(shape_size(data.get_shape()), time_step));
 
-    Output<Node> batch_seq_length =
-        builder::legacy_broadcast_for_binary_operation(
-            curr_time_step_node, input_value(3).get_node_shared_ptr(), batch_axis)
-            .at(1);
+    Output<Node> batch_seq_length = builder::opset1::legacy_broadcast_for_binary_operation(
+        curr_time_step_node, input_value(3).get_node_shared_ptr(), batch_axis);
 
     // Create mask node deciding whether or not to mask batch data.
     shared_ptr<Node> mask_condition =
-        make_shared<op::Greater>(curr_time_step_node, batch_seq_length);
+        make_shared<opset1::Greater>(curr_time_step_node, batch_seq_length);
 
     // Select values depnding on mask_condition.
     // Select(<condition>, <true_value>, <false_value>)
-    return make_shared<op::Select>(mask_condition, mask_value, data);
+    return make_shared<opset1::Select>(mask_condition, mask_value, data);
 }
 
-NodeVector op::LSTMSequence::lstm_pass(bool is_reverse) const
+NodeVector op::v0::LSTMSequence::lstm_pass(bool is_reverse) const
 {
     // ------ VARIABLE'S NAMES AND ACRONYM DEFINITIONS ------
     // The names used below are analogous to the one used in ONNX documentation.
     //
     // ------ INPUTS ------
-    // X - The input tensor. [seq_length, batch_size, input_size]
+    // X - The input tensor. [batch_size, seq_length, input_size]
     // W - The weight tensor. [num_directions, 4*hidden_size, input_size]
     // R - The recurrence weight tensor. [num_directions, 4*hidden_size, hidden_size]
     // B - The bias tensor for input gate. [num_directions, 8*hidden_size]
@@ -167,14 +159,14 @@ NodeVector op::LSTMSequence::lstm_pass(bool is_reverse) const
     // c - cell gate
     // t - time step (t-1 means previous time step)
     // ------ VARIABLE NAMES ------
-    // H_t     - Hidden state vector at current time step.
-    // C_t     - Cell state vector at current time step.
+    // H_t     - Hidden state vector at current time step. [batch_size, num_directions, hidden_size]
+    // C_t     - Cell state vector at current time step. [batch_size, num_directions, hidden_size]
     // h_list  - The list of hidden states at all processed time steps.
 
     NodeVector h_list;
     shared_ptr<Node> X = input_value(0).get_node_shared_ptr();
-    shared_ptr<Node> H_t = prepare_input(input_value(1), is_reverse);
-    shared_ptr<Node> C_t = prepare_input(input_value(2), is_reverse);
+    shared_ptr<Node> H_t = prepare_input(input_value(1), is_reverse, 1);
+    shared_ptr<Node> C_t = prepare_input(input_value(2), is_reverse, 1);
     shared_ptr<Node> seq_lengths = input_value(3).get_node_shared_ptr();
     shared_ptr<Node> W = prepare_input(input_value(4), is_reverse);
     shared_ptr<Node> R = prepare_input(input_value(5), is_reverse);
@@ -183,34 +175,34 @@ NodeVector op::LSTMSequence::lstm_pass(bool is_reverse) const
 
     if (is_reverse)
     {
-        X = make_shared<op::ReverseSequence>(X, seq_lengths, 1 /*batch_axis*/, 0 /*seq_axis*/);
+        X = make_shared<opset1::ReverseSequence>(X, seq_lengths, 0 /*batch_axis*/, 1 /*seq_axis*/);
     }
 
-    NodeVector in_seqs = builder::split(X, X->get_shape().at(0));
+    NodeVector in_seqs = builder::opset1::split(X, X->get_shape().at(1), 1);
 
     for (auto& in_x : in_seqs)
     {
-        // remove first empty dim, after above split.
-        in_x = builder::squeeze(in_x);
+        // Remove empty dim, after above split.
+        in_x = builder::opset1::squeeze(in_x, {1});
     }
 
     int32_t time_step{1};
     for (const auto& in_x : in_seqs)
     {
-        shared_ptr<Node> lstm_cell = make_shared<op::LSTMCell>(in_x,
-                                                               H_t,
-                                                               C_t,
-                                                               W,
-                                                               R,
-                                                               B,
-                                                               P,
-                                                               m_hidden_size,
-                                                               m_weights_format,
-                                                               m_activations,
-                                                               m_activations_alpha,
-                                                               m_activations_beta,
-                                                               m_clip_threshold,
-                                                               m_input_forget);
+        shared_ptr<Node> lstm_cell = make_shared<opset1::LSTMCell>(in_x,
+                                                                   H_t,
+                                                                   C_t,
+                                                                   W,
+                                                                   R,
+                                                                   B,
+                                                                   P,
+                                                                   m_hidden_size,
+                                                                   m_weights_format,
+                                                                   m_activations,
+                                                                   m_activations_alpha,
+                                                                   m_activations_beta,
+                                                                   m_clip_threshold,
+                                                                   m_input_forget);
 
         Output<Node> H = lstm_cell->output(0);
         Output<Node> C = lstm_cell->output(1);
@@ -220,7 +212,7 @@ NodeVector op::LSTMSequence::lstm_pass(bool is_reverse) const
         // Mask hidden state tensor in order to handle mixed sequence lengths.
         // This results in zeroing out values in batches with sequence shorter
         // than current time_step.
-        h_list.push_back(get_masked_node(builder::expand_dims(H), time_step, 1));
+        h_list.push_back(get_masked_node(builder::opset1::expand_dims(H, 1), time_step, 0));
         // Reference implementation in ONNX Runtime doesn't mask values of Y_h
         // and Y_c outputs, thus here we make sure that only appropriate batches
         // (in respect to its sequence length) are updated. Those batches which
@@ -230,36 +222,38 @@ NodeVector op::LSTMSequence::lstm_pass(bool is_reverse) const
         time_step++;
     }
     // The tensor that concats all the intermediate output values of the hidden.
-    // It has shape [seq_length, batch_size, hidden_size]
-    shared_ptr<Node> Y{make_shared<op::Concat>(h_list, 0)};
+    // It has shape [batch_size, seq_length, hidden_size]
+    shared_ptr<Node> Y{make_shared<opset1::Concat>(h_list, 1)};
 
     // Get back the original order of the output data.
     if (is_reverse)
     {
-        Y = make_shared<op::ReverseSequence>(Y, seq_lengths, 1 /*batch_axis*/, 0 /*seq_axis*/);
+        Y = make_shared<opset1::ReverseSequence>(Y, seq_lengths, 0 /*batch_axis*/, 1 /*seq_axis*/);
     }
 
     // Expand Y so that it has expected shape:
-    // [seq_length, num_directions, batch_size, hidden_size]
-    Y = builder::expand_dims(Y, 1);
+    // [batch_size, num_directions, seq_length, hidden_size]
+    Y = builder::opset1::expand_dims(Y, 1);
 
     // expand H_t and C_t so that it has expected shape:
-    // [num_directions, batch_size, hidden_size]
-    auto Y_h = builder::expand_dims(H_t);
-    auto Y_c = builder::expand_dims(C_t);
+    // [ batch_size, num_directions, hidden_size]
+    auto Y_h = builder::opset1::expand_dims(H_t, 1);
+    auto Y_c = builder::opset1::expand_dims(C_t, 1);
     return {Y, Y_h, Y_c};
 }
 
-shared_ptr<Node> op::LSTMSequence::prepare_input(Output<Node> node, bool is_reverse) const
+shared_ptr<Node> op::v0::LSTMSequence::prepare_input(Output<Node> node,
+                                                     bool is_reverse,
+                                                     size_t num_direction_axis) const
 {
     // In bidirectional mode inputs are stacked together, so we must split them.
     shared_ptr<Node> tmp = node.get_node_shared_ptr();
     if (m_direction == direction::BIDIRECTIONAL)
     {
-        tmp = builder::split(node, 2).at(is_reverse ? 1 : 0);
+        tmp = builder::opset1::split(node, 2, num_direction_axis).at(is_reverse ? 1 : 0);
     }
     // Since we have forward LSTM we can squeeze `num_directions` axis from inputs.
-    return builder::squeeze(tmp);
+    return builder::opset1::squeeze(tmp, {num_direction_axis});
 }
 
 namespace ngraph
diff --git a/ngraph/src/ngraph/op/fused/lstm_sequence.hpp b/ngraph/src/ngraph/op/fused/lstm_sequence.hpp
index a583b4f3cd69d5..dc7b4209795cd8 100644
--- a/ngraph/src/ngraph/op/fused/lstm_sequence.hpp
+++ b/ngraph/src/ngraph/op/fused/lstm_sequence.hpp
@@ -173,7 +173,9 @@ namespace ngraph
                 NodeVector lstm_pass(bool is_reverse = false) const;
 
                 // Split(bi-directional) and squeeze input data to remove 'num_direction' dimension.
-                std::shared_ptr<Node> prepare_input(Output<Node> node, bool is_reverse) const;
+                std::shared_ptr<Node> prepare_input(Output<Node> node,
+                                                    bool is_reverse,
+                                                    size_t num_direction_axis = 0) const;
 
                 std::vector<float> m_activations_alpha;
                 std::vector<float> m_activations_beta;
diff --git a/ngraph/test/attributes.cpp b/ngraph/test/attributes.cpp
index 709b7b5fcc7905..264dbbfb4ccb2e 100644
--- a/ngraph/test/attributes.cpp
+++ b/ngraph/test/attributes.cpp
@@ -1104,16 +1104,27 @@ TEST(attributes, lstm_cell_op)
 TEST(attributes, lstm_sequence_op)
 {
     FactoryRegistry<Node>::get().register_factory<opset1::LSTMSequence>();
-    const auto X = make_shared<op::Parameter>(element::f32, Shape{1, 2, 4});
-    const auto initial_hidden_state = make_shared<op::Parameter>(element::f32, Shape{1, 2, 3});
-    const auto initial_cell_state = make_shared<op::Parameter>(element::f32, Shape{1, 2, 3});
-    const auto sequence_lengths = make_shared<op::Parameter>(element::i32, Shape{2});
-    const auto W = make_shared<op::Parameter>(element::f32, Shape{1, 12, 4});
-    const auto R = make_shared<op::Parameter>(element::f32, Shape{1, 12, 3});
-    const auto B = make_shared<op::Parameter>(element::f32, Shape{1, 12});
 
-    const auto hidden_size = 3;
-    const auto lstm_direction = op::LSTMSequence::direction::FORWARD;
+    const auto batch_size = 4;
+    const auto num_directions = 2;
+    const auto seq_length = 8;
+    const auto input_size = 16;
+    const auto hidden_size = 64;
+
+    const auto X =
+        make_shared<op::Parameter>(element::f32, Shape{batch_size, seq_length, input_size});
+    const auto initial_hidden_state =
+        make_shared<op::Parameter>(element::f32, Shape{batch_size, num_directions, hidden_size});
+    const auto initial_cell_state =
+        make_shared<op::Parameter>(element::f32, Shape{batch_size, num_directions, hidden_size});
+    const auto sequence_lengths = make_shared<op::Parameter>(element::i32, Shape{batch_size});
+    const auto W = make_shared<op::Parameter>(element::f32,
+                                              Shape{num_directions, 4 * hidden_size, input_size});
+    const auto R = make_shared<op::Parameter>(element::f32,
+                                              Shape{num_directions, 4 * hidden_size, hidden_size});
+    const auto B = make_shared<op::Parameter>(element::f32, Shape{num_directions, 4 * hidden_size});
+
+    const auto lstm_direction = op::LSTMSequence::direction::BIDIRECTIONAL;
     const auto weights_format = op::LSTMWeightsFormat::ICOF;
     const std::vector<float> activations_alpha = {1, 2, 3};
     const std::vector<float> activations_beta = {4, 5, 6};
diff --git a/ngraph/test/onnx/onnx_import_rnn.in.cpp b/ngraph/test/onnx/onnx_import_rnn.in.cpp
index 626f65db0f9d9b..091afda954fc46 100644
--- a/ngraph/test/onnx/onnx_import_rnn.in.cpp
+++ b/ngraph/test/onnx/onnx_import_rnn.in.cpp
@@ -39,6 +39,7 @@ using namespace ngraph;
 
 static std::string s_manifest = "${MANIFEST}";
 
+// ONNX LSTM tests (implemented by nGraph LSTMCell and LSTMSequence)
 NGRAPH_TEST(${BACKEND_NAME}, onnx_model_lstm_fwd_with_clip)
 {
     auto function = onnx_import::import_onnx_model(
diff --git a/ngraph/test/type_prop/lstm_sequence.cpp b/ngraph/test/type_prop/lstm_sequence.cpp
index 0b5ff51a83b800..f3d814b92efd43 100644
--- a/ngraph/test/type_prop/lstm_sequence.cpp
+++ b/ngraph/test/type_prop/lstm_sequence.cpp
@@ -21,16 +21,28 @@
 using namespace std;
 using namespace ngraph;
 
-TEST(type_prop, lstm_sequence)
+TEST(type_prop, lstm_sequence_forward)
 {
-    const auto X = make_shared<op::Parameter>(element::f32, Shape{1, 2, 4});
-    const auto W = make_shared<op::Parameter>(element::f32, Shape{1, 12, 4});
-    const auto R = make_shared<op::Parameter>(element::f32, Shape{1, 12, 3});
-    const auto initial_hidden_state = make_shared<op::Parameter>(element::f32, Shape{1, 2, 3});
-    const auto initial_cell_state = make_shared<op::Parameter>(element::f32, Shape{1, 2, 3});
-    const auto B = make_shared<op::Parameter>(element::f32, Shape{1, 12});
-    const auto sequence_lengths = make_shared<op::Parameter>(element::i32, Shape{2});
-    const auto hidden_size = 3;
+    const auto batch_size = 8;
+    const auto num_directions = 1;
+    const auto seq_length = 6;
+    const auto input_size = 4;
+    const auto hidden_size = 128;
+
+    const auto X =
+        make_shared<op::Parameter>(element::f32, Shape{batch_size, seq_length, input_size});
+    const auto initial_hidden_state =
+        make_shared<op::Parameter>(element::f32, Shape{batch_size, num_directions, hidden_size});
+    const auto initial_cell_state =
+        make_shared<op::Parameter>(element::f32, Shape{batch_size, num_directions, hidden_size});
+    const auto sequence_lengths = make_shared<op::Parameter>(element::i32, Shape{batch_size});
+    const auto W = make_shared<op::Parameter>(element::f32,
+                                              Shape{num_directions, 4 * hidden_size, input_size});
+    const auto R = make_shared<op::Parameter>(element::f32,
+                                              Shape{num_directions, 4 * hidden_size, hidden_size});
+    const auto B = make_shared<op::Parameter>(element::f32, Shape{num_directions, 4 * hidden_size});
+
+    const auto lstm_direction = op::LSTMSequence::direction::FORWARD;
 
     const auto lstm_sequence = make_shared<op::LSTMSequence>(X,
                                                              initial_hidden_state,
@@ -40,7 +52,7 @@ TEST(type_prop, lstm_sequence)
                                                              R,
                                                              B,
                                                              hidden_size,
-                                                             op::LSTMSequence::direction::FORWARD);
+                                                             lstm_direction);
     EXPECT_EQ(lstm_sequence->get_hidden_size(), hidden_size);
     EXPECT_EQ(lstm_sequence->get_direction(), op::LSTMSequence::direction::FORWARD);
     EXPECT_EQ(lstm_sequence->get_weights_format(), op::LSTMWeightsFormat::IFCO);
@@ -52,9 +64,69 @@ TEST(type_prop, lstm_sequence)
     EXPECT_EQ(lstm_sequence->get_clip_threshold(), 0.f);
     EXPECT_FALSE(lstm_sequence->get_input_forget());
     EXPECT_EQ(lstm_sequence->get_output_element_type(0), element::f32);
-    EXPECT_EQ(lstm_sequence->get_output_shape(0), (Shape{1, 1, 2, 3}));
+    EXPECT_EQ(lstm_sequence->get_output_shape(0),
+              (Shape{batch_size, num_directions, seq_length, hidden_size}));
+    EXPECT_EQ(lstm_sequence->get_output_element_type(1), element::f32);
+    EXPECT_EQ(lstm_sequence->get_output_shape(1), (Shape{batch_size, num_directions, hidden_size}));
+    EXPECT_EQ(lstm_sequence->get_output_element_type(2), element::f32);
+    EXPECT_EQ(lstm_sequence->get_output_shape(2), (Shape{batch_size, num_directions, hidden_size}));
+}
+
+TEST(type_prop, lstm_sequence_bidirectional)
+{
+    const auto batch_size = 24;
+    const auto num_directions = 2;
+    const auto seq_length = 12;
+    const auto input_size = 8;
+    const auto hidden_size = 256;
+
+    const auto X =
+        make_shared<op::Parameter>(element::f32, Shape{batch_size, seq_length, input_size});
+    const auto initial_hidden_state =
+        make_shared<op::Parameter>(element::f32, Shape{batch_size, num_directions, hidden_size});
+    const auto initial_cell_state =
+        make_shared<op::Parameter>(element::f32, Shape{batch_size, num_directions, hidden_size});
+    const auto sequence_lengths = make_shared<op::Parameter>(element::i32, Shape{batch_size});
+    const auto W = make_shared<op::Parameter>(element::f32,
+                                              Shape{num_directions, 4 * hidden_size, input_size});
+    const auto R = make_shared<op::Parameter>(element::f32,
+                                              Shape{num_directions, 4 * hidden_size, hidden_size});
+    const auto B = make_shared<op::Parameter>(element::f32, Shape{num_directions, 4 * hidden_size});
+
+    const auto weights_format = op::LSTMWeightsFormat::FICO;
+    const auto lstm_direction = op::LSTMSequence::direction::BIDIRECTIONAL;
+    const std::vector<float> activations_alpha = {2.7, 7.0, 32.367};
+    const std::vector<float> activations_beta = {0.0, 5.49, 6.0};
+    const std::vector<std::string> activations = {"tanh", "sigmoid", "sigmoid"};
+
+    const auto lstm_sequence = make_shared<op::LSTMSequence>(X,
+                                                             initial_hidden_state,
+                                                             initial_cell_state,
+                                                             sequence_lengths,
+                                                             W,
+                                                             R,
+                                                             B,
+                                                             hidden_size,
+                                                             lstm_direction,
+                                                             weights_format,
+                                                             activations_alpha,
+                                                             activations_beta,
+                                                             activations);
+    EXPECT_EQ(lstm_sequence->get_hidden_size(), hidden_size);
+    EXPECT_EQ(lstm_sequence->get_direction(), op::LSTMSequence::direction::BIDIRECTIONAL);
+    EXPECT_EQ(lstm_sequence->get_weights_format(), op::LSTMWeightsFormat::FICO);
+    EXPECT_EQ(lstm_sequence->get_activations_alpha(), activations_alpha);
+    EXPECT_EQ(lstm_sequence->get_activations_beta(), activations_beta);
+    EXPECT_EQ(lstm_sequence->get_activations()[0], "tanh");
+    EXPECT_EQ(lstm_sequence->get_activations()[1], "sigmoid");
+    EXPECT_EQ(lstm_sequence->get_activations()[2], "sigmoid");
+    EXPECT_EQ(lstm_sequence->get_clip_threshold(), 0.f);
+    EXPECT_FALSE(lstm_sequence->get_input_forget());
+    EXPECT_EQ(lstm_sequence->get_output_element_type(0), element::f32);
+    EXPECT_EQ(lstm_sequence->get_output_shape(0),
+              (Shape{batch_size, num_directions, seq_length, hidden_size}));
     EXPECT_EQ(lstm_sequence->get_output_element_type(1), element::f32);
-    EXPECT_EQ(lstm_sequence->get_output_shape(1), (Shape{1, 2, 3}));
+    EXPECT_EQ(lstm_sequence->get_output_shape(1), (Shape{batch_size, num_directions, hidden_size}));
     EXPECT_EQ(lstm_sequence->get_output_element_type(2), element::f32);
-    EXPECT_EQ(lstm_sequence->get_output_shape(2), (Shape{1, 2, 3}));
+    EXPECT_EQ(lstm_sequence->get_output_shape(2), (Shape{batch_size, num_directions, hidden_size}));
 }

From 011128cb543fbd5f8a0cf783547d3ff255df365d Mon Sep 17 00:00:00 2001
From: Ilya Lavrenov <ilya.lavrenov@intel.com>
Date: Fri, 29 May 2020 14:45:59 +0300
Subject: [PATCH 14/24] Python: Fixed installation rules to install additional
 .so files generated from .pyx (#676)

---
 .../python/src/openvino/inference_engine/CMakeLists.txt       | 4 +++-
 1 file changed, 3 insertions(+), 1 deletion(-)

diff --git a/inference-engine/ie_bridges/python/src/openvino/inference_engine/CMakeLists.txt b/inference-engine/ie_bridges/python/src/openvino/inference_engine/CMakeLists.txt
index aa2a30c0555ce0..9ce70b546629d3 100644
--- a/inference-engine/ie_bridges/python/src/openvino/inference_engine/CMakeLists.txt
+++ b/inference-engine/ie_bridges/python/src/openvino/inference_engine/CMakeLists.txt
@@ -14,6 +14,7 @@ set_source_files_properties(${SOURCE} PROPERTIES CYTHON_IS_CXX ON)
 # create target
 
 cython_add_module(${TARGET_NAME} ${SOURCE})
+set(INSTALLED_TARGETS ${TARGET_NAME})
 
 file(GLOB OTHER_SOURCES
         ${CMAKE_CURRENT_SOURCE_DIR}/*.pyx)
@@ -26,6 +27,7 @@ foreach(PYX_FILE ${OTHER_SOURCES})
     add_dependencies(${TARGET_NAME} ${PYX_NAME})
     target_include_directories(${PYX_NAME} PRIVATE "${CMAKE_CURRENT_SOURCE_DIR}")
     target_link_libraries(${PYX_NAME} PRIVATE ${InferenceEngine_LIBRARIES})
+    list(APPEND INSTALLED_TARGETS ${PYX_NAME})
 endforeach()
 
 function(python_disable_deprecated_warnings)
@@ -64,7 +66,7 @@ endif()
 
 # install
 
-install(TARGETS ${TARGET_NAME}
+install(TARGETS ${INSTALLED_TARGETS}
         RUNTIME DESTINATION python/${PYTHON_VERSION}/openvino/inference_engine COMPONENT ${PYTHON_VERSION}
         ARCHIVE DESTINATION python/${PYTHON_VERSION}/openvino/inference_engine COMPONENT ${PYTHON_VERSION}
         LIBRARY DESTINATION python/${PYTHON_VERSION}/openvino/inference_engine COMPONENT ${PYTHON_VERSION})

From be3b711972442435479136419a0f58b3713d4bc2 Mon Sep 17 00:00:00 2001
From: Anna Khakimova <anna.khakimova@intel.com>
Date: Fri, 29 May 2020 15:44:12 +0300
Subject: [PATCH 15/24] Pre-processing(GAPI): AVX2/AVX512 implementation of
 3C/4C Resize via universal intrinsics. (#612)

---
 .../ie_bridges/c/src/CMakeLists.txt           |   6 +
 .../src/preprocessing/CMakeLists.txt          |   6 +
 .../ie_preprocess_gapi_kernels_avx2.cpp       | 234 +++++++++++++-
 .../ie_preprocess_gapi_kernels_avx512.cpp     | 289 +++++++++++++++++-
 .../ie_preprocess_gapi_kernels_sse42.cpp      | 222 ++++++++++++--
 .../ie_preprocess_gapi_kernels.cpp            |  58 +++-
 .../fluid_test_computations/CMakeLists.txt    |   6 +
 .../thirdparty/ocv/opencv_hal_avx.hpp         |  87 +++++-
 .../thirdparty/ocv/opencv_hal_avx512.hpp      | 202 +++++++++++-
 .../thirdparty/ocv/opencv_hal_sse.hpp         |  65 +++-
 10 files changed, 1093 insertions(+), 82 deletions(-)

diff --git a/inference-engine/ie_bridges/c/src/CMakeLists.txt b/inference-engine/ie_bridges/c/src/CMakeLists.txt
index ef8527adf1cf9e..ab981fd7939366 100644
--- a/inference-engine/ie_bridges/c/src/CMakeLists.txt
+++ b/inference-engine/ie_bridges/c/src/CMakeLists.txt
@@ -21,6 +21,12 @@ target_include_directories(${TARGET_NAME} PUBLIC "${InferenceEngine_C_API_SOURCE
 
 add_cpplint_target(${TARGET_NAME}_cpplint FOR_TARGETS ${TARGET_NAME})
 
+# Workaround to avoid warnings caused with bug in the avx512intrin.h of GCC5
+if((CMAKE_CXX_COMPILER_ID STREQUAL "GNU") AND
+   (CMAKE_CXX_COMPILER_VERSION VERSION_LESS_EQUAL 5.5))
+    set_target_properties(${TARGET_NAME} PROPERTIES LINK_FLAGS_RELEASE "-Wno-error=maybe-uninitialized -Wno-maybe-uninitialized")
+endif()
+
 # export
 
 export(TARGETS ${TARGET_NAME} NAMESPACE IE:: APPEND FILE "${CMAKE_BINARY_DIR}/targets.cmake")
diff --git a/inference-engine/src/preprocessing/CMakeLists.txt b/inference-engine/src/preprocessing/CMakeLists.txt
index 9201a6ed53ffa0..adc52f065f1890 100644
--- a/inference-engine/src/preprocessing/CMakeLists.txt
+++ b/inference-engine/src/preprocessing/CMakeLists.txt
@@ -168,6 +168,12 @@ target_link_libraries(${TARGET_NAME} PRIVATE fluid PUBLIC inference_engine ${INT
 
 target_include_directories(${TARGET_NAME} INTERFACE "${CMAKE_CURRENT_SOURCE_DIR}")
 
+# Workaround to avoid warnings caused with bug in the avx512intrin.h of GCC5
+if((CMAKE_CXX_COMPILER_ID STREQUAL "GNU") AND
+   (CMAKE_CXX_COMPILER_VERSION VERSION_LESS_EQUAL 5.5))
+    set_target_properties(${TARGET_NAME} PROPERTIES LINK_FLAGS_RELEASE "-Wno-error=maybe-uninitialized -Wno-maybe-uninitialized")
+endif()
+
 if(WIN32)
     set_target_properties(${TARGET_NAME} PROPERTIES COMPILE_PDB_NAME ${TARGET_NAME})
 endif()
diff --git a/inference-engine/src/preprocessing/cpu_x86_avx2/ie_preprocess_gapi_kernels_avx2.cpp b/inference-engine/src/preprocessing/cpu_x86_avx2/ie_preprocess_gapi_kernels_avx2.cpp
index da16de2cc51c21..71c23ced0b07d9 100644
--- a/inference-engine/src/preprocessing/cpu_x86_avx2/ie_preprocess_gapi_kernels_avx2.cpp
+++ b/inference-engine/src/preprocessing/cpu_x86_avx2/ie_preprocess_gapi_kernels_avx2.cpp
@@ -5,8 +5,6 @@
 #include <algorithm>
 #include <utility>
 
-#include "ie_preprocess_gapi_kernels.hpp"
-#include "ie_preprocess_gapi_kernels_impl.hpp"
 #include "ie_preprocess_gapi_kernels_avx2.hpp"
 
 #include <immintrin.h>
@@ -44,16 +42,6 @@ namespace kernels {
 
 namespace avx {
 
-static inline v_uint16x16 v_expand_low(const v_uint8x32& a) {
-    return v_uint16x16(_mm256_unpacklo_epi8(a.val, _mm256_setzero_si256()));
-}
-
-static inline v_uint16x16 v_expand_high(const v_uint8x32& a) {
-    return v_uint16x16(_mm256_unpackhi_epi8(a.val, _mm256_setzero_si256()));
-}
-
-//------------------------------------------------------------------------------
-
 void mergeRow_8UC2(const uint8_t in0[], const uint8_t in1[],
                    uint8_t out[], int length) {
     mergeRow_8UC2_Impl(in0, in1, out, length);
@@ -114,8 +102,6 @@ void splitRow_32FC4(const float in[], float out0[], float out1[],
     splitRow_32FC4_Impl(in, out0, out1, out2, out3, length);
 }
 
-
-
 void calculate_nv12_to_rgb(const  uchar **srcY,
                            const  uchar *srcUV,
                                   uchar **dstRGBx,
@@ -145,6 +131,226 @@ void calcRowArea_32F(float dst[], const float *src[], const Size& inSz,
     calcRowArea_impl(dst, src, inSz, outSz, yalpha, ymap, xmaxdf, xindex, xalpha, vbuf);
 }
 
+template<int chanNum>
+void calcRowLinear_8UC_Impl(std::array<std::array<uint8_t*, 4>, chanNum> &dst,
+                            const uint8_t *src0[],
+                            const uint8_t *src1[],
+                            const short    alpha[],
+                            const short    clone[],  // 4 clones of alpha
+                            const short    mapsx[],
+                            const short    beta[],
+                                uint8_t    tmp[],
+                             const Size    &inSz,
+                             const Size    &outSz,
+                                    int    lpi) {
+    constexpr int half_nlanes = (v_uint8::nlanes / 2);
+    const int shift = (half_nlanes / 4);
+
+    if (4 == lpi) {
+        GAPI_DbgAssert(inSz.width >= half_nlanes);
+
+        v_uint8 shuf_mask1 = v_setr_s8(0, 8,  4, 12, 1, 9,  5, 13,
+                                       2, 10, 6, 14, 3, 11, 7, 15,
+                                       0, 8,  4, 12, 1, 9,  5, 13,
+                                       2, 10, 6, 14, 3, 11, 7, 15);
+
+        v_uint8 shuf_mask2 = v_setr_s8(0, 4, 8, 12, 2, 6, 10, 14,
+                                       1, 5, 9, 13, 3, 7, 11, 15,
+                                       0, 4, 8, 12, 2, 6, 10, 14,
+                                       1, 5, 9, 13, 3, 7, 11, 15);
+
+        v_uint8 shuf_mask3 = v_setr_s8(0, 1, 8,  9,  2, 3, 10, 11,
+                                       4, 5, 12, 13, 6, 7, 14, 15,
+                                       0, 1, 8,  9,  2, 3, 10, 11,
+                                       4, 5, 12, 13, 6, 7, 14, 15);
+
+        // vertical pass
+        v_int16 b0 = vx_setall_s16(beta[0]);
+        v_int16 b1 = vx_setall_s16(beta[1]);
+        v_int16 b2 = vx_setall_s16(beta[2]);
+        v_int16 b3 = vx_setall_s16(beta[3]);
+
+        for (int w = 0; w < inSz.width*chanNum; ) {
+            for (; w <= inSz.width*chanNum - half_nlanes && w >= 0; w += half_nlanes) {
+                v_int16 val0_0 = v_load_ccache_expand(&src0[0][w]);
+                v_int16 val0_1 = v_load_ccache_expand(&src0[1][w]);
+                v_int16 val0_2 = v_load_ccache_expand(&src0[2][w]);
+                v_int16 val0_3 = v_load_ccache_expand(&src0[3][w]);
+
+                v_int16 val1_0 = v_load_ccache_expand(&src1[0][w]);
+                v_int16 val1_1 = v_load_ccache_expand(&src1[1][w]);
+                v_int16 val1_2 = v_load_ccache_expand(&src1[2][w]);
+                v_int16 val1_3 = v_load_ccache_expand(&src1[3][w]);
+
+                v_int16 t0 = v_mulhrs(v_sub_wrap(val0_0, val1_0), b0);
+                v_int16 t1 = v_mulhrs(v_sub_wrap(val0_1, val1_1), b1);
+                v_int16 t2 = v_mulhrs(v_sub_wrap(val0_2, val1_2), b2);
+                v_int16 t3 = v_mulhrs(v_sub_wrap(val0_3, val1_3), b3);
+
+                v_int16 r0 = v_add_wrap(val1_0, t0);
+                v_int16 r1 = v_add_wrap(val1_1, t1);
+                v_int16 r2 = v_add_wrap(val1_2, t2);
+                v_int16 r3 = v_add_wrap(val1_3, t3);
+
+                v_uint8 q0 = v_packus(r0, r1);
+                v_uint8 q1 = v_packus(r2, r3);
+
+                v_uint8 q2 = v_blend_shiftleft<0xCC /*0b11001100*/, 4>(q0, q1);
+                v_uint8 q3 = v_blend_shiftright<0xCC /*0b11001100*/, 4>(q0, q1);             
+
+                v_uint8 q4 = v_shuffle_s8(q2, shuf_mask1);
+                v_uint8 q5 = v_shuffle_s8(q3, shuf_mask1);
+
+                v_uint8 q6 = v256_permute2x128<0x20>(q4, q5);
+                v_uint8 q7 = v256_permute2x128<0x31>(q4, q5);
+
+                vx_store(&tmp[4 * w + 0],  q6);
+                vx_store(&tmp[4 * w + 2 * half_nlanes], q7);
+            }
+
+            if (w < inSz.width*chanNum) {
+                w = inSz.width*chanNum - half_nlanes;
+            }
+        }
+
+        // horizontal pass
+        v_uint8 val_0, val_1, val_2, val_3;
+        GAPI_DbgAssert(outSz.width >= half_nlanes);
+        for (int x = 0; x < outSz.width; ) {
+            for (; x <= outSz.width - half_nlanes && x >= 0; x += half_nlanes) {
+                v_int16 a10 = vx_load(&clone[4 * x]);
+                v_int16 a32 = vx_load(&clone[4 * (x + 4)]);
+                v_int16 a54 = vx_load(&clone[4 * (x + 8)]);
+                v_int16 a76 = vx_load(&clone[4 * (x + 12)]);
+
+                for (int c = 0; c < chanNum; ++c) {
+                    v_gather_channel(val_0, tmp, mapsx, chanNum, c, x, 0);
+                    v_gather_channel(val_1, tmp, mapsx, chanNum, c, x, shift);
+                    v_gather_channel(val_2, tmp, mapsx, chanNum, c, x, shift * 2);
+                    v_gather_channel(val_3, tmp, mapsx, chanNum, c, x, shift * 3);
+
+                    v_int16 val0_0 = v_reinterpret_as_s16(v_expand_low(val_0));
+                    v_int16 val0_1 = v_reinterpret_as_s16(v_expand_low(val_1));
+                    v_int16 val0_2 = v_reinterpret_as_s16(v_expand_low(val_2));
+                    v_int16 val0_3 = v_reinterpret_as_s16(v_expand_low(val_3));
+
+                    v_int16 val1_0 = v_reinterpret_as_s16(v_expand_high(val_0));
+                    v_int16 val1_1 = v_reinterpret_as_s16(v_expand_high(val_1));
+                    v_int16 val1_2 = v_reinterpret_as_s16(v_expand_high(val_2));
+                    v_int16 val1_3 = v_reinterpret_as_s16(v_expand_high(val_3));
+
+                    v_int16 t0 = v_mulhrs(v_sub_wrap(val0_0, val1_0), a10);
+                    v_int16 t1 = v_mulhrs(v_sub_wrap(val0_1, val1_1), a32);
+                    v_int16 t2 = v_mulhrs(v_sub_wrap(val0_2, val1_2), a54);
+                    v_int16 t3 = v_mulhrs(v_sub_wrap(val0_3, val1_3), a76);
+
+                    v_int16 r0 = v_add_wrap(val1_0, t0);
+                    v_int16 r1 = v_add_wrap(val1_1, t1);
+                    v_int16 r2 = v_add_wrap(val1_2, t2);
+                    v_int16 r3 = v_add_wrap(val1_3, t3);
+
+                    v_uint8 q0 = v_packus(r0, r1);
+                    v_uint8 q1 = v_packus(r2, r3);                    
+
+                    v_uint8 q2 = v_shuffle_s8(q0, shuf_mask2);
+                    v_uint8 q3 = v_shuffle_s8(q1, shuf_mask2);
+                    
+                    v_uint8 q4 = v_blend_shiftleft<0xCC /*0b11001100*/, 4>(q2, q3);
+                    v_uint8 q5 = v_blend_shiftright<0xCC /*0b11001100*/, 4>(q2, q3);
+
+                    v_uint8 q6 = v256_permute4x64<0xD8>(q4);
+                    v_uint8 q7 = v256_permute4x64<0xD8>(q5);                    
+
+                    v_uint8 q8 = v_shuffle_s8(q6, shuf_mask3);
+                    v_uint8 q9 = v_shuffle_s8(q7, shuf_mask3);
+                    
+                    v_store_low(&dst[c][0][x], q8);
+                    v_store_high(&dst[c][1][x], q8);
+                    v_store_low(&dst[c][2][x], q9);
+                    v_store_high(&dst[c][3][x], q9);
+                }
+            }
+
+            if (x < outSz.width) {
+                x = outSz.width - half_nlanes;
+            }
+        }
+    } else {  // if any lpi
+        for (int l = 0; l < lpi; ++l) {
+            short beta0 = beta[l];
+
+            // vertical pass
+            GAPI_DbgAssert(inSz.width*chanNum >= half_nlanes);
+            for (int w = 0; w < inSz.width*chanNum; ) {
+                for (; w <= inSz.width*chanNum - half_nlanes; w += half_nlanes) {
+                    v_int16 s0 = v_reinterpret_as_s16(vx_load_expand(&src0[l][w]));
+                    v_int16 s1 = v_reinterpret_as_s16(vx_load_expand(&src1[l][w]));
+                    v_int16 t = v_mulhrs(s0 - s1, beta0) + s1;
+                    v_pack_u_store(tmp + w, t);
+                }
+
+                if (w < inSz.width*chanNum) {
+                    w = inSz.width*chanNum - half_nlanes;
+                }
+            }
+
+            // horizontal pass
+            GAPI_DbgAssert(outSz.width >= half_nlanes);
+
+            for (int x = 0; x < outSz.width; ) {
+                for (; x <= outSz.width - half_nlanes && x >= 0; x += half_nlanes) {
+                    for (int c = 0; c < chanNum; ++c) {
+                        v_int16 a0 = vx_load(&alpha[x]);        // as signed Q1.1.14
+                        v_int16 sx = vx_load(&mapsx[x]);        // as integer (int16)
+                        v_int16 t0 = v_gather_chan<chanNum>(tmp, sx, c, 0);
+                        v_int16 t1 = v_gather_chan<chanNum>(tmp, sx, c, 1);
+                        v_int16 d = v_mulhrs(t0 - t1, a0) + t1;
+                        v_pack_u_store(&dst[c][l][x], d);
+                    }
+                }
+
+                if (x < outSz.width) {
+                    x = outSz.width - half_nlanes;
+                }
+            }
+        }
+    }
+}
+
+// Resize (bi-linear, 8UC3)
+void calcRowLinear_8U(C3, std::array<std::array<uint8_t*, 4>, 3> &dst,
+                      const uint8_t *src0[],
+                      const uint8_t *src1[],
+                      const short    alpha[],
+                      const short    clone[],  // 4 clones of alpha
+                      const short    mapsx[],
+                      const short    beta[],
+                      uint8_t  tmp[],
+                      const Size    &inSz,
+                      const Size    &outSz,
+                      int      lpi) {
+    constexpr const int chanNum = 3;
+
+    calcRowLinear_8UC_Impl<chanNum>(dst, src0, src1, alpha, clone, mapsx, beta, tmp, inSz, outSz, lpi);
+}
+
+// Resize (bi-linear, 8UC4)
+void calcRowLinear_8U(C4, std::array<std::array<uint8_t*, 4>, 4> &dst,
+                      const uint8_t *src0[],
+                      const uint8_t *src1[],
+                      const short    alpha[],
+                      const short    clone[],  // 4 clones of alpha
+                      const short    mapsx[],
+                      const short    beta[],
+                      uint8_t  tmp[],
+                      const Size    &inSz,
+                      const Size    &outSz,
+                      int      lpi) {
+    constexpr const int chanNum = 4;
+
+    calcRowLinear_8UC_Impl<chanNum>(dst, src0, src1, alpha, clone, mapsx, beta, tmp, inSz, outSz, lpi);
+}
+
 void copyRow_8U(const uint8_t in[], uint8_t out[], int length) {
     copyRow_8U_impl(in, out, length);
 }
diff --git a/inference-engine/src/preprocessing/cpu_x86_avx512/ie_preprocess_gapi_kernels_avx512.cpp b/inference-engine/src/preprocessing/cpu_x86_avx512/ie_preprocess_gapi_kernels_avx512.cpp
index 6b6e4cfd635b1d..5b900d52c5455f 100644
--- a/inference-engine/src/preprocessing/cpu_x86_avx512/ie_preprocess_gapi_kernels_avx512.cpp
+++ b/inference-engine/src/preprocessing/cpu_x86_avx512/ie_preprocess_gapi_kernels_avx512.cpp
@@ -4,10 +4,7 @@
 
 #include <algorithm>
 #include <utility>
-#include <cstring>
 
-#include "ie_preprocess_gapi_kernels.hpp"
-#include "ie_preprocess_gapi_kernels_impl.hpp"
 #include "ie_preprocess_gapi_kernels_avx512.hpp"
 
 #include <immintrin.h>
@@ -38,17 +35,6 @@ namespace gapi {
 namespace kernels {
 
 namespace avx512 {
-//----------------------------------------------------------------------
-
-static inline v_uint16x32 v_expand_low(const v_uint8x64& a) {
-    return v_uint16x32(_mm512_unpacklo_epi8(a.val, _mm512_setzero_si512()));
-}
-
-static inline v_uint16x32 v_expand_high(const v_uint8x64& a) {
-    return v_uint16x32(_mm512_unpackhi_epi8(a.val, _mm512_setzero_si512()));
-}
-
-//------------------------------------------------------------------------------
 
 void mergeRow_8UC2(const uint8_t in0[], const uint8_t in1[],
                    uint8_t out[], int length) {
@@ -110,8 +96,6 @@ void splitRow_32FC4(const float in[], float out0[], float out1[],
     splitRow_32FC4_Impl(in, out0, out1, out2, out3, length);
 }
 
-
-
 void calculate_nv12_to_rgb(const  uchar **srcY,
                            const  uchar *srcUV,
                                   uchar **dstRGBx,
@@ -141,6 +125,278 @@ void calcRowArea_32F(float dst[], const float *src[], const Size& inSz,
     calcRowArea_impl(dst, src, inSz, outSz, yalpha, ymap, xmaxdf, xindex, xalpha, vbuf);
 }
 
+// Resize (bi-linear, 8U, generic number of channels)
+template<int chanNum>
+void calcRowLinear_8UC_Impl(std::array<std::array<uint8_t*, 4>, chanNum> &dst,
+                            const uint8_t *src0[],
+                            const uint8_t *src1[],
+                            const short    alpha[],
+                            const short    clone[],  // 4 clones of alpha
+                            const short    mapsx[],
+                            const short    beta[],
+                                uint8_t    tmp[],
+                             const Size    &inSz,
+                             const Size    &outSz,
+                                    int    lpi) {
+    constexpr int half_nlanes = (v_uint8::nlanes / 2);
+    const int shift = (half_nlanes / 4);
+
+    if (4 == lpi) {
+        GAPI_DbgAssert(inSz.width >= half_nlanes);
+
+
+        v_uint8 shuf_mask1 = v_setr_s8(0, 4, 8,  12, 1, 5, 9,  13,
+                                       2, 6, 10, 14, 3, 7, 11, 15,
+                                       0, 4, 8,  12, 1, 5, 9,  13,
+                                       2, 6, 10, 14, 3, 7, 11, 15,
+                                       0, 4, 8,  12, 1, 5, 9,  13,
+                                       2, 6, 10, 14, 3, 7, 11, 15,
+                                       0, 4, 8,  12, 1, 5, 9,  13,
+                                       2, 6, 10, 14, 3, 7, 11, 15);
+
+        v_uint8 shuf_mask2 = v_setr_s8(0, 1, 4, 5, 8,  9,  12, 13,
+                                       2, 3, 6, 7, 10, 11, 14, 15,
+                                       0, 1, 4, 5, 8,  9,  12, 13,
+                                       2, 3, 6, 7, 10, 11, 14, 15,
+                                       0, 1, 4, 5, 8,  9,  12, 13,
+                                       2, 3, 6, 7, 10, 11, 14, 15,
+                                       0, 1, 4, 5, 8,  9,  12, 13,
+                                       2, 3, 6, 7, 10, 11, 14, 15);
+
+        v_uint32 idx1 = v_set_s32(23, 21, 7, 5, 22, 20, 6, 4, 19, 17, 3, 1, 18, 16, 2, 0);
+        v_uint32 idx2 = v_set_s32(31, 29, 15, 13, 30, 28, 14, 12, 27, 25, 11, 9, 26, 24, 10, 8);
+        v_uint32 idx3 = v_set_s32(29, 25, 21, 17, 13, 9, 5, 1, 28, 24, 20, 16, 12, 8, 4, 0);
+        v_uint32 idx4 = v_set_s32(31, 27, 23, 19, 15, 11, 7, 3, 30, 26, 22, 18, 14, 10, 6, 2);
+
+        // vertical pass
+        v_int16 b0 = vx_setall_s16(beta[0]);
+        v_int16 b1 = vx_setall_s16(beta[1]);
+        v_int16 b2 = vx_setall_s16(beta[2]);
+        v_int16 b3 = vx_setall_s16(beta[3]);
+
+        for (int w = 0; w < inSz.width*chanNum; ) {
+            for (; w <= inSz.width*chanNum - half_nlanes && w >= 0; w += half_nlanes) {
+                v_int16 val0_0 = v_load_ccache_expand(&src0[0][w]);
+                v_int16 val0_1 = v_load_ccache_expand(&src0[1][w]);
+                v_int16 val0_2 = v_load_ccache_expand(&src0[2][w]);
+                v_int16 val0_3 = v_load_ccache_expand(&src0[3][w]);
+
+                v_int16 val1_0 = v_load_ccache_expand(&src1[0][w]);
+                v_int16 val1_1 = v_load_ccache_expand(&src1[1][w]);
+                v_int16 val1_2 = v_load_ccache_expand(&src1[2][w]);
+                v_int16 val1_3 = v_load_ccache_expand(&src1[3][w]);
+
+                v_int16 t0 = v_mulhrs(v_sub_wrap(val0_0, val1_0), b0);
+                v_int16 t1 = v_mulhrs(v_sub_wrap(val0_1, val1_1), b1);
+                v_int16 t2 = v_mulhrs(v_sub_wrap(val0_2, val1_2), b2);
+                v_int16 t3 = v_mulhrs(v_sub_wrap(val0_3, val1_3), b3);
+
+                v_int16 r0 = v_add_wrap(val1_0, t0);
+                v_int16 r1 = v_add_wrap(val1_1, t1);
+                v_int16 r2 = v_add_wrap(val1_2, t2);
+                v_int16 r3 = v_add_wrap(val1_3, t3);
+
+                v_uint8 q0 = v_packus(r0, r1);
+                v_uint8 q1 = v_packus(r2, r3);
+#if 1
+                v_uint8 q2 = v_permutex2_s32(q0, q1, idx1);
+                v_uint8 q3 = v_permutex2_s32(q0, q1, idx2);
+
+                v_uint8 q4 = v_shuffle_s8(q2, shuf_mask1);
+                v_uint8 q5 = v_shuffle_s8(q3, shuf_mask1);
+
+               //Second variant of decompose. It'll be usefull in the future.
+#else
+                v_uint8 q2 = v_mblend_shiftleft(q0, q1);
+                v_uint8 q3 = v_mblend_shiftright(q0, q1);
+
+                v_uint8 mask1 = v_setr_s8(0, 8,  4, 12, 1, 9,  5, 13,
+                                          2, 10, 6, 14, 3, 11, 7, 15,
+                                          0, 8,  4, 12, 1, 9,  5, 13,
+                                          2, 10, 6, 14, 3, 11, 7, 15,
+                                          0, 8,  4, 12, 1, 9,  5, 13,
+                                          2, 10, 6, 14, 3, 11, 7, 15,
+                                          0, 8,  4, 12, 1, 9,  5, 13,
+                                          2, 10, 6, 14, 3, 11, 7, 15);
+
+                v_uint8 q4 = v_shuffle_s8(q2, mask1);
+                v_uint8 q5 = v_shuffle_s8(q3, mask1);
+
+                v_uint64 idx1 = v_set_s64(11, 10, 3, 2, 9, 8, 1, 0);
+                v_uint64 idx2 = v_set_s64(15, 14, 7, 6, 13, 12, 5, 4);
+
+                v_uint8 q6 = v_permutex2_s64(q4, q5, idx1);
+                v_uint8 q7 = v_permutex2_s64(q4, q5, idx2);
+#endif
+
+                vx_store(&tmp[4 * w + 0], q4);
+                vx_store(&tmp[4 * w + 2 * half_nlanes], q5);
+            }
+
+            if (w < inSz.width*chanNum) {
+                w = inSz.width*chanNum - half_nlanes;
+            }
+        }
+
+        // horizontal pass
+        v_uint8 val_0, val_1, val_2, val_3;
+
+        GAPI_DbgAssert(outSz.width >= half_nlanes);
+        for (int x = 0; x < outSz.width; ) {
+            for (; x <= outSz.width - half_nlanes && x >= 0; x += half_nlanes) {
+                v_int16 a10 = vx_load(&clone[4 * x]);
+                v_int16 a32 = vx_load(&clone[4 * (x + 8)]);
+                v_int16 a54 = vx_load(&clone[4 * (x + 16)]);
+                v_int16 a76 = vx_load(&clone[4 * (x + 24)]);
+                                
+                for (int c = 0; c < chanNum; ++c) {
+                    v_gather_channel(val_0, tmp, mapsx, chanNum, c, x, 0);
+                    v_gather_channel(val_1, tmp, mapsx, chanNum, c, x, shift);
+                    v_gather_channel(val_2, tmp, mapsx, chanNum, c, x, shift * 2);
+                    v_gather_channel(val_3, tmp, mapsx, chanNum, c, x, shift * 3);
+
+                    v_int16 val0_0 = v_reinterpret_as_s16(v_expand_low(val_0));
+                    v_int16 val0_1 = v_reinterpret_as_s16(v_expand_low(val_1));
+                    v_int16 val0_2 = v_reinterpret_as_s16(v_expand_low(val_2));
+                    v_int16 val0_3 = v_reinterpret_as_s16(v_expand_low(val_3));
+
+                    v_int16 val1_0 = v_reinterpret_as_s16(v_expand_high(val_0));
+                    v_int16 val1_1 = v_reinterpret_as_s16(v_expand_high(val_1));
+                    v_int16 val1_2 = v_reinterpret_as_s16(v_expand_high(val_2));
+                    v_int16 val1_3 = v_reinterpret_as_s16(v_expand_high(val_3));
+
+                    v_int16 t0 = v_mulhrs(v_sub_wrap(val0_0, val1_0), a10);
+                    v_int16 t1 = v_mulhrs(v_sub_wrap(val0_1, val1_1), a32);
+                    v_int16 t2 = v_mulhrs(v_sub_wrap(val0_2, val1_2), a54);
+                    v_int16 t3 = v_mulhrs(v_sub_wrap(val0_3, val1_3), a76);
+
+                    v_int16 r0 = v_add_wrap(val1_0, t0);
+                    v_int16 r1 = v_add_wrap(val1_1, t1);
+                    v_int16 r2 = v_add_wrap(val1_2, t2);
+                    v_int16 r3 = v_add_wrap(val1_3, t3);
+
+                    v_uint8 q0 = v_packus(r0, r1);
+                    v_uint8 q1 = v_packus(r2, r3);
+
+                    v_uint8 q2 = v_shuffle_s8(q0, shuf_mask1);
+                    v_uint8 q3 = v_shuffle_s8(q1, shuf_mask1);              
+#if 1
+                    v_uint8 q4 = v_permutex2_s32(q2, q3, idx3);
+                    v_uint8 q5 = v_permutex2_s32(q2, q3, idx4);
+
+                    v_uint8 q6 = v_shuffle_s8(q4, shuf_mask2);
+                    v_uint8 q7 = v_shuffle_s8(q5, shuf_mask2);
+
+
+                    //Second variant of decompose. It'll be usefull in the future.
+#else
+                    v_uint8 q4 = v_mask_blend_shiftleft<0xCCCCCCCC /*0b11001100110011001100110011001100*/, 4>(q2, q3);
+                    v_uint8 q5 = v_mask_blend_shiftright<0xCCCCCCCC /*0b11001100110011001100110011001100*/, 4>(q2, q3);
+
+                    v_int32 idx = v_set_s32(15, 11, 7, 3, 14, 10, 6, 2, 13, 9, 5, 1, 12, 8, 4, 0);
+
+                    v_uint8 q6 = v_permutex_s32(idx, q4);
+                    v_uint8 q7 = v_permutex_s32(idx, q5);
+
+                    v_uint8 mask2 = v_setr_s8(0, 1, 4, 5, 8,  9,  12, 13,
+                                              2, 3, 6, 7, 10, 11, 14, 15,
+                                              0, 1, 4, 5, 8,  9,  12, 13,
+                                              2, 3, 6, 7, 10, 11, 14, 15,
+                                              0, 1, 4, 5, 8,  9,  12, 13,
+                                              2, 3, 6, 7, 10, 11, 14, 15,
+                                              0, 1, 4, 5, 8,  9,  12, 13,
+                                              2, 3, 6, 7, 10, 11, 14, 15);
+
+                    v_uint8 q8 = v_shuffle_s8(q6, mask2);
+                    v_uint8 q9 = v_shuffle_s8(q7, mask2);
+#endif
+                    v_store_low(&dst[c][0][x],  q6);
+                    v_store_high(&dst[c][1][x], q6);
+                    v_store_low(&dst[c][2][x],  q7);
+                    v_store_high(&dst[c][3][x], q7);
+                }
+            }
+
+            if (x < outSz.width) {
+                x = outSz.width - half_nlanes;
+            }
+        }
+    } else {  // if any lpi
+        for (int l = 0; l < lpi; ++l) {
+            short beta0 = beta[l];
+
+         // vertical pass
+            GAPI_DbgAssert(inSz.width*chanNum >= half_nlanes);
+            for (int w = 0; w < inSz.width*chanNum; ) {
+                for (; w <= inSz.width*chanNum - half_nlanes; w += half_nlanes) {
+                    v_int16 s0 = v_reinterpret_as_s16(vx_load_expand(&src0[l][w]));
+                    v_int16 s1 = v_reinterpret_as_s16(vx_load_expand(&src1[l][w]));
+                    v_int16 t = v_mulhrs(s0 - s1, beta0) + s1;
+                    v_pack_u_store(tmp + w, t);
+                }
+
+                if (w < inSz.width*chanNum) {
+                    w = inSz.width*chanNum - half_nlanes;
+                }
+            }
+
+         // horizontal pass
+         GAPI_DbgAssert(outSz.width >= half_nlanes);
+
+        for (int x = 0; x < outSz.width; ) {
+            for (; x <= outSz.width - half_nlanes && x >= 0; x += half_nlanes) {
+                 for (int c = 0; c < chanNum; ++c) {
+                     v_int16 a0 = vx_load(&alpha[x]);        // as signed Q1.1.14
+                     v_int16 sx = vx_load(&mapsx[x]);        // as integer (int16)
+                     v_int16 t0 = v_gather_chan<chanNum>(tmp, sx, c, 0);
+                     v_int16 t1 = v_gather_chan<chanNum>(tmp, sx, c, 1);
+                     v_int16 d = v_mulhrs(t0 - t1, a0) + t1;
+                     v_pack_u_store(&dst[c][l][x], d);
+                 }
+            }
+
+            if (x < outSz.width) {
+                 x = outSz.width - half_nlanes;
+            }
+         }
+       }
+    }
+}
+
+// Resize (bi-linear, 8UC3)
+void calcRowLinear_8U(C3, std::array<std::array<uint8_t*, 4>, 3> &dst,
+                      const uint8_t *src0[],
+                      const uint8_t *src1[],
+                      const short    alpha[],
+                      const short    clone[],  // 4 clones of alpha
+                      const short    mapsx[],
+                      const short    beta[],
+                      uint8_t  tmp[],
+                      const Size    &inSz,
+                      const Size    &outSz,
+                      int      lpi) {
+    constexpr const int chanNum = 3;
+
+    calcRowLinear_8UC_Impl<chanNum>(dst, src0, src1, alpha, clone, mapsx, beta, tmp, inSz, outSz, lpi);
+}
+
+// Resize (bi-linear, 8UC4)
+void calcRowLinear_8U(C4, std::array<std::array<uint8_t*, 4>, 4> &dst,
+                      const uint8_t *src0[],
+                      const uint8_t *src1[],
+                      const short    alpha[],
+                      const short    clone[],  // 4 clones of alpha
+                      const short    mapsx[],
+                      const short    beta[],
+                      uint8_t  tmp[],
+                      const Size    &inSz,
+                      const Size    &outSz,
+                      int      lpi) {
+    constexpr const int chanNum = 4;
+
+    calcRowLinear_8UC_Impl<chanNum>(dst, src0, src1, alpha, clone, mapsx, beta, tmp, inSz, outSz, lpi);
+}
+
 void copyRow_8U(const uint8_t in[], uint8_t out[], int length) {
     copyRow_8U_impl(in, out, length);
 }
@@ -153,3 +409,4 @@ void copyRow_32F(const float in[], float out[], int length) {
 }  // namespace kernels
 }  // namespace gapi
 }  // namespace InferenceEngine
+
diff --git a/inference-engine/src/preprocessing/cpu_x86_sse42/ie_preprocess_gapi_kernels_sse42.cpp b/inference-engine/src/preprocessing/cpu_x86_sse42/ie_preprocess_gapi_kernels_sse42.cpp
index cf121f4296e8f2..8b994d82b8e641 100644
--- a/inference-engine/src/preprocessing/cpu_x86_sse42/ie_preprocess_gapi_kernels_sse42.cpp
+++ b/inference-engine/src/preprocessing/cpu_x86_sse42/ie_preprocess_gapi_kernels_sse42.cpp
@@ -50,18 +50,6 @@ namespace InferenceEngine {
 namespace gapi {
 namespace kernels {
 
-//----------------------------------------------------------------------
-
-static inline v_uint16x8 v_expand_low(const v_uint8x16& a) {
-    return v_uint16x8(_mm_unpacklo_epi8(a.val, _mm_setzero_si128()));
-}
-
-static inline v_uint16x8 v_expand_high(const v_uint8x16& a) {
-    return v_uint16x8(_mm_unpackhi_epi8(a.val, _mm_setzero_si128()));
-}
-
-//------------------------------------------------------------------------------
-
 // Resize (bi-linear, 8U)
 void calcRowLinear_8U(uint8_t *dst[],
                 const uint8_t *src0[],
@@ -485,9 +473,12 @@ void calcRowLinear_8U(uint8_t *dst[],
     }
 }
 
+// Resize 3C/4C universal intrinsic implementation for SSE42 version is a bit slower than original sometimes.
+// Remove original implementation when I find a cause.
+#if 1
 // Resize (bi-linear, 8U, generic number of channels)
 template<int chanNum>
-void calcRowLinear_8UC_Impl(std::array<std::array<uint8_t*, 4>, chanNum> &dst,
+void calcRowLinear_8UC_Impl_(std::array<std::array<uint8_t*, 4>, chanNum> &dst,
                   const uint8_t *src0[],
                   const uint8_t *src1[],
                   const short    alpha[],
@@ -498,9 +489,11 @@ void calcRowLinear_8UC_Impl(std::array<std::array<uint8_t*, 4>, chanNum> &dst,
                   const Size    &inSz,
                   const Size    &outSz,
                         int      lpi) {
+    const int half_nlanes = (v_uint8::nlanes / 2);
+
     if (4 == lpi) {
         // vertical pass
-        GAPI_DbgAssert(inSz.width >= 8);
+        GAPI_DbgAssert(inSz.width >= half_nlanes);
 
         __m128i b0 = _mm_set1_epi16(beta[0]);
         __m128i b1 = _mm_set1_epi16(beta[1]);
@@ -508,7 +501,7 @@ void calcRowLinear_8UC_Impl(std::array<std::array<uint8_t*, 4>, chanNum> &dst,
         __m128i b3 = _mm_set1_epi16(beta[3]);
 
         for (int w = 0; w < inSz.width*chanNum; ) {
-            for (; w <= inSz.width*chanNum - 8 && w >= 0; w += 8) {
+            for (; w <= inSz.width*chanNum - half_nlanes && w >= 0; w += half_nlanes) {
                 //--------------------------------------------
                 // reworked from: ie_preprocess_data_sse42.cpp
                 //      function: resize_bilinear_u8
@@ -558,14 +551,14 @@ void calcRowLinear_8UC_Impl(std::array<std::array<uint8_t*, 4>, chanNum> &dst,
             }
 
             if (w < inSz.width*chanNum) {
-                w = inSz.width*chanNum - 8;
+                w = inSz.width*chanNum - half_nlanes;
             }
         }
 
         // horizontal pass
-        GAPI_DbgAssert(outSz.width >= 8);
+        GAPI_DbgAssert(outSz.width >= half_nlanes);
         for (int x = 0; x < outSz.width; ) {
-            for (; x <= outSz.width - 8 && x >= 0; x += 8) {
+            for (; x <= outSz.width - half_nlanes && x >= 0; x += half_nlanes) {
                 //--------------------------------------------
                 // reworked from: ie_preprocess_data_sse42.cpp
                 //      function: resize_bilinear_u8
@@ -645,17 +638,18 @@ void calcRowLinear_8UC_Impl(std::array<std::array<uint8_t*, 4>, chanNum> &dst,
             }
 
             if (x < outSz.width) {
-                x = outSz.width - 8;
+                x = outSz.width - half_nlanes;
             }
         }
+
     } else {  // if any lpi
         for (int l = 0; l < lpi; l++) {
             short beta0 = beta[l];
 
             // vertical pass
-            GAPI_DbgAssert(inSz.width*chanNum >= 8);
+            GAPI_DbgAssert(inSz.width*chanNum >= half_nlanes);
             for (int w = 0; w < inSz.width*chanNum; ) {
-                for (; w <= inSz.width*chanNum - 8; w += 8) {
+                for (; w <= inSz.width*chanNum - half_nlanes; w += half_nlanes) {
                     v_int16x8 s0 = v_reinterpret_as_s16(v_load_expand(&src0[l][w]));
                     v_int16x8 s1 = v_reinterpret_as_s16(v_load_expand(&src1[l][w]));
                     v_int16x8 t = v_mulhrs(s0 - s1, beta0) + s1;
@@ -663,14 +657,14 @@ void calcRowLinear_8UC_Impl(std::array<std::array<uint8_t*, 4>, chanNum> &dst,
                 }
 
                 if (w < inSz.width*chanNum) {
-                    w = inSz.width*chanNum - 8;
+                    w = inSz.width*chanNum - half_nlanes;
                 }
             }
 
             // horizontal pass
-            GAPI_DbgAssert(outSz.width >= 8);
+            GAPI_DbgAssert(outSz.width >= half_nlanes);
             for (int x = 0; x < outSz.width; ) {
-                for (; x <= outSz.width - 8 && x >= 0; x += 8) {
+                for (; x <= outSz.width - half_nlanes && x >= 0; x += half_nlanes) {
                     for (int c = 0; c < chanNum; c++) {
                         v_int16x8 a0 = v_load(&alpha[x]);        // as signed Q1.1.14
                         v_int16x8 sx = v_load(&mapsx[x]);        // as integer (int16)
@@ -682,12 +676,186 @@ void calcRowLinear_8UC_Impl(std::array<std::array<uint8_t*, 4>, chanNum> &dst,
                 }
 
                 if (x < outSz.width) {
-                    x = outSz.width - 8;
+                    x = outSz.width - half_nlanes;
+                }
+            }
+        }
+    }
+}
+#else
+// Resize 3C/4C universal intrinsic implementation for SSE42 version is a bit slower sometimes.
+// Gonna turn it on when I find a cause.
+template<int chanNum>
+void calcRowLinear_8UC_Impl_(std::array<std::array<uint8_t*, 4>, chanNum> &dst,
+                            const uint8_t *src0[],
+                            const uint8_t *src1[],
+                            const short    alpha[],
+                            const short    clone[],  // 4 clones of alpha
+                            const short    mapsx[],
+                            const short    beta[],
+                            uint8_t  tmp[],
+                            const Size    &inSz,
+                            const Size    &outSz,
+                            int      lpi) {
+    const int half_nlanes = (v_uint8::nlanes / 2);
+
+    if (4 == lpi) {
+        // vertical pass
+        GAPI_DbgAssert(inSz.width >= half_nlanes);
+
+        v_int16 b0 = vx_setall_s16(beta[0]);
+        v_int16 b1 = vx_setall_s16(beta[1]);
+        v_int16 b2 = vx_setall_s16(beta[2]);
+        v_int16 b3 = vx_setall_s16(beta[3]);
+
+        for (int w = 0; w < inSz.width*chanNum; ) {
+            for (; w <= inSz.width*chanNum - half_nlanes && w >= 0; w += half_nlanes) {
+                v_int16 val0_0 = v_reinterpret_as_s16(vx_load_expand(&src0[0][w]));
+                v_int16 val0_1 = v_reinterpret_as_s16(vx_load_expand(&src0[1][w]));
+                v_int16 val0_2 = v_reinterpret_as_s16(vx_load_expand(&src0[2][w]));
+                v_int16 val0_3 = v_reinterpret_as_s16(vx_load_expand(&src0[3][w]));
+
+                v_int16 val1_0 = v_reinterpret_as_s16(vx_load_expand(&src1[0][w]));
+                v_int16 val1_1 = v_reinterpret_as_s16(vx_load_expand(&src1[1][w]));
+                v_int16 val1_2 = v_reinterpret_as_s16(vx_load_expand(&src1[2][w]));
+                v_int16 val1_3 = v_reinterpret_as_s16(vx_load_expand(&src1[3][w]));
+
+                v_int16 t0 = v_mulhrs(v_sub_wrap(val0_0, val1_0), b0);
+                v_int16 t1 = v_mulhrs(v_sub_wrap(val0_1, val1_1), b1);
+                v_int16 t2 = v_mulhrs(v_sub_wrap(val0_2, val1_2), b2);
+                v_int16 t3 = v_mulhrs(v_sub_wrap(val0_3, val1_3), b3);
+
+                v_int16 r0 = v_add_wrap(val1_0, t0);
+                v_int16 r1 = v_add_wrap(val1_1, t1);
+                v_int16 r2 = v_add_wrap(val1_2, t2);
+                v_int16 r3 = v_add_wrap(val1_3, t3);
+
+                v_uint8 q0 = v_packus(r0, r1);
+                v_uint8 q1 = v_packus(r2, r3);
+
+                v_uint8 q2 = v_blend_shiftleft<0xCC  /*0b11001100*/, 4>(q0, q1);
+                v_uint8 q3 = v_blend_shiftright<0xCC  /*0b11001100*/, 4>(q0, q1);
+
+                v_uint8 mask = v_setr_s8(0, 8, 4, 12, 1, 9, 5, 13, 2, 10, 6, 14, 3, 11, 7, 15);
+
+                v_uint8 q4 = v_shuffle_s8(q2, mask);
+                v_uint8 q5 = v_shuffle_s8(q3, mask);
+
+                vx_store(&tmp[4 * w + 0], q4);
+                vx_store(&tmp[4 * w + 2 * half_nlanes], q5);
+            }
+
+            if (w < inSz.width*chanNum) {
+                w = inSz.width*chanNum - half_nlanes;
+            }
+        }
+
+        // horizontal pass
+        GAPI_DbgAssert(outSz.width >= half_nlanes);
+        for (int x = 0; x < outSz.width; ) {
+            for (; x <= outSz.width - half_nlanes && x >= 0; x += half_nlanes) {
+                v_int16 a10 = vx_load(&clone[4 * x]);
+                v_int16 a32 = vx_load(&clone[4 * (x + 2)]);
+                v_int16 a54 = vx_load(&clone[4 * (x + 4)]);
+                v_int16 a76 = vx_load(&clone[4 * (x + 6)]);
+
+                v_uint8 val_0 = vx_setzero_u8();
+                v_uint8 val_1 = vx_setzero_u8();
+                v_uint8 val_2 = vx_setzero_u8();
+                v_uint8 val_3 = vx_setzero_u8();
+
+                for (int c = 0; c < chanNum; ++c) {
+                    int shift = (half_nlanes / 4);
+
+                    v_gather_channel(val_0, tmp, mapsx, chanNum, c, x, 0);
+                    v_gather_channel(val_1, tmp, mapsx, chanNum, c, x, shift);
+                    v_gather_channel(val_2, tmp, mapsx, chanNum, c, x, shift * 2);
+                    v_gather_channel(val_3, tmp, mapsx, chanNum, c, x, shift * 3);
+
+                    v_int16 val0_0 = v_reinterpret_as_s16(v_expand_low(val_0));
+                    v_int16 val0_1 = v_reinterpret_as_s16(v_expand_low(val_1));
+                    v_int16 val0_2 = v_reinterpret_as_s16(v_expand_low(val_2));
+                    v_int16 val0_3 = v_reinterpret_as_s16(v_expand_low(val_3));
+
+                    v_int16 val1_0 = v_reinterpret_as_s16(v_expand_high(val_0));
+                    v_int16 val1_1 = v_reinterpret_as_s16(v_expand_high(val_1));
+                    v_int16 val1_2 = v_reinterpret_as_s16(v_expand_high(val_2));
+                    v_int16 val1_3 = v_reinterpret_as_s16(v_expand_high(val_3));
+
+                    v_int16 t0 = v_mulhrs(v_sub_wrap(val0_0, val1_0), a10);
+                    v_int16 t1 = v_mulhrs(v_sub_wrap(val0_1, val1_1), a32);
+                    v_int16 t2 = v_mulhrs(v_sub_wrap(val0_2, val1_2), a54);
+                    v_int16 t3 = v_mulhrs(v_sub_wrap(val0_3, val1_3), a76);
+
+                    v_int16 r0 = v_add_wrap(val1_0, t0);
+                    v_int16 r1 = v_add_wrap(val1_1, t1);
+                    v_int16 r2 = v_add_wrap(val1_2, t2);
+                    v_int16 r3 = v_add_wrap(val1_3, t3);
+
+                    v_uint8 q0 = v_packus(r0, r1);
+                    v_uint8 q1 = v_packus(r2, r3);
+
+                    v_uint8 mask = v_setr_s8(0, 4, 8, 12, 2, 6, 10, 14, 1, 5, 9, 13, 3, 7, 11, 15);
+
+                    v_uint8 q2 = v_shuffle_s8(q0, mask);
+                    v_uint8 q3 = v_shuffle_s8(q1, mask);
+
+                    v_uint8 q4 = v_blend_shiftleft<0xCC /*0b11001100*/, 4>(q2, q3);
+                    v_uint8 q5 = v_blend_shiftright<0xCC /*0b11001100*/, 4>(q2, q3);
+
+                    v_store_low(&dst[c][0][x], q4);
+                    v_store_high(&dst[c][1][x], q4);
+                    v_store_low(&dst[c][2][x], q5);
+                    v_store_high(&dst[c][3][x], q5);
+                }
+            }
+
+            if (x < outSz.width) {
+                x = outSz.width - half_nlanes;
+            }
+        }
+
+    } else {  // if any lpi
+        for (int l = 0; l < lpi; ++l) {
+            short beta0 = beta[l];
+
+            // vertical pass
+            GAPI_DbgAssert(inSz.width*chanNum >= half_nlanes);
+            for (int w = 0; w < inSz.width*chanNum; ) {
+                for (; w <= inSz.width*chanNum - half_nlanes; w += half_nlanes) {
+                    v_int16 s0 = v_reinterpret_as_s16(vx_load_expand(&src0[l][w]));
+                    v_int16 s1 = v_reinterpret_as_s16(vx_load_expand(&src1[l][w]));
+                    v_int16 t = v_mulhrs(s0 - s1, beta0) + s1;
+                    v_pack_u_store(tmp + w, t);
+                }
+
+                if (w < inSz.width*chanNum) {
+                    w = inSz.width*chanNum - half_nlanes;
+                }
+            }
+
+            // horizontal pass
+            GAPI_DbgAssert(outSz.width >= half_nlanes);
+            for (int x = 0; x < outSz.width; ) {
+                for (; x <= outSz.width - half_nlanes && x >= 0; x += half_nlanes) {
+                    for (int c = 0; c < chanNum; ++c) {
+                        v_int16 a0 = vx_load(&alpha[x]);        // as signed Q1.1.14
+                        v_int16 sx = vx_load(&mapsx[x]);        // as integer (int16)
+                        v_int16 t0 = v_gather_chan<chanNum>(tmp, sx, c, 0);
+                        v_int16 t1 = v_gather_chan<chanNum>(tmp, sx, c, 1);
+                        v_int16 d = v_mulhrs(t0 - t1, a0) + t1;
+                        v_pack_u_store(&dst[c][l][x], d);
+                    }
+                }
+
+                if (x < outSz.width) {
+                    x = outSz.width - half_nlanes;
                 }
             }
         }
     }
 }
+#endif
 
 // Resize (bi-linear, 8UC3)
 void calcRowLinear_8U(C3, std::array<std::array<uint8_t*, 4>, 3> &dst,
@@ -703,7 +871,7 @@ void calcRowLinear_8U(C3, std::array<std::array<uint8_t*, 4>, 3> &dst,
                         int      lpi) {
     constexpr const int chanNum = 3;
 
-    calcRowLinear_8UC_Impl<chanNum>(dst, src0, src1, alpha, clone, mapsx, beta, tmp, inSz, outSz, lpi);
+    calcRowLinear_8UC_Impl_<chanNum>(dst, src0, src1, alpha, clone, mapsx, beta, tmp, inSz, outSz, lpi);
 }
 
 // Resize (bi-linear, 8UC4)
@@ -719,7 +887,7 @@ void calcRowLinear_8U(C4, std::array<std::array<uint8_t*, 4>, 4> &dst,
                   const Size    &outSz,
                         int      lpi) {
     constexpr const int chanNum = 4;
-    calcRowLinear_8UC_Impl<chanNum>(dst, src0, src1, alpha, clone, mapsx, beta, tmp, inSz, outSz, lpi);
+    calcRowLinear_8UC_Impl_<chanNum>(dst, src0, src1, alpha, clone, mapsx, beta, tmp, inSz, outSz, lpi);
 }
 
 // Resize (bi-linear, 32F)
diff --git a/inference-engine/src/preprocessing/ie_preprocess_gapi_kernels.cpp b/inference-engine/src/preprocessing/ie_preprocess_gapi_kernels.cpp
index 667e9d6be18410..2272ba5970645f 100644
--- a/inference-engine/src/preprocessing/ie_preprocess_gapi_kernels.cpp
+++ b/inference-engine/src/preprocessing/ie_preprocess_gapi_kernels.cpp
@@ -894,24 +894,62 @@ static void calcRowLinearC(const cv::gapi::fluid::View  & in,
         }
     }
 
-    #ifdef HAVE_SSE
+#ifdef HAVE_AVX512
+    if (with_cpu_x86_avx512_core()) {
+        if (std::is_same<T, uint8_t>::value) {
+            if (inSz.width >= 64 && outSz.width >= 32) {
+                avx512::calcRowLinear_8UC<numChan>(dst,
+                                                   reinterpret_cast<const uint8_t**>(src0),
+                                                   reinterpret_cast<const uint8_t**>(src1),
+                                                   reinterpret_cast<const short*>(alpha),
+                                                   reinterpret_cast<const short*>(clone),
+                                                   reinterpret_cast<const short*>(mapsx),
+                                                   reinterpret_cast<const short*>(beta),
+                                                   reinterpret_cast<uint8_t*>(tmp),
+                                                   inSz, outSz, lpi);
+                return;
+            }
+        }
+    }
+#endif
+
+#ifdef HAVE_AVX2
+    if (with_cpu_x86_avx2()) {
+        if (std::is_same<T, uint8_t>::value) {
+            if (inSz.width >= 32 && outSz.width >= 16) {
+                avx::calcRowLinear_8UC<numChan>(dst,
+                                                reinterpret_cast<const uint8_t**>(src0),
+                                                reinterpret_cast<const uint8_t**>(src1),
+                                                reinterpret_cast<const short*>(alpha),
+                                                reinterpret_cast<const short*>(clone),
+                                                reinterpret_cast<const short*>(mapsx),
+                                                reinterpret_cast<const short*>(beta),
+                                                reinterpret_cast<uint8_t*>(tmp),
+                                                inSz, outSz, lpi);
+                return;
+            }
+        }
+    }
+#endif
+
+#ifdef HAVE_SSE
     if (with_cpu_x86_sse42()) {
         if (std::is_same<T, uint8_t>::value) {
             if (inSz.width >= 16 && outSz.width >= 8) {
                 calcRowLinear_8UC<numChan>(dst,
-                                   reinterpret_cast<const uint8_t**>(src0),
-                                   reinterpret_cast<const uint8_t**>(src1),
-                                   reinterpret_cast<const short*>(alpha),
-                                   reinterpret_cast<const short*>(clone),
-                                   reinterpret_cast<const short*>(mapsx),
-                                   reinterpret_cast<const short*>(beta),
-                                   reinterpret_cast<uint8_t*>(tmp),
-                                   inSz, outSz, lpi);
+                                           reinterpret_cast<const uint8_t**>(src0),
+                                           reinterpret_cast<const uint8_t**>(src1),
+                                           reinterpret_cast<const short*>(alpha),
+                                           reinterpret_cast<const short*>(clone),
+                                           reinterpret_cast<const short*>(mapsx),
+                                           reinterpret_cast<const short*>(beta),
+                                           reinterpret_cast<uint8_t*>(tmp),
+                                           inSz, outSz, lpi);
                 return;
             }
         }
     }
-    #endif  // HAVE_SSE
+#endif  // HAVE_SSE
 
     auto length = out[0].get().length();
 
diff --git a/inference-engine/tests_deprecated/fluid_preproc/fluid_test_computations/CMakeLists.txt b/inference-engine/tests_deprecated/fluid_preproc/fluid_test_computations/CMakeLists.txt
index 94b935f6918391..36b3d9a089ad08 100644
--- a/inference-engine/tests_deprecated/fluid_preproc/fluid_test_computations/CMakeLists.txt
+++ b/inference-engine/tests_deprecated/fluid_preproc/fluid_test_computations/CMakeLists.txt
@@ -7,6 +7,12 @@ file(GLOB HDR *.hpp)
 
 add_library(fluid_test_computations SHARED ${SRC} ${HDR})
 
+# Workaround to avoid warnings caused with bug in the avx512intrin.h of GCC5
+if((CMAKE_CXX_COMPILER_ID STREQUAL "GNU") AND
+   (CMAKE_CXX_COMPILER_VERSION VERSION_LESS_EQUAL 5.5))
+    set_target_properties(fluid_test_computations PROPERTIES LINK_FLAGS_RELEASE "-Wno-error=maybe-uninitialized -Wno-maybe-uninitialized")
+endif()
+
 target_include_directories(fluid_test_computations PUBLIC "${CMAKE_CURRENT_SOURCE_DIR}")
 
 target_link_libraries(fluid_test_computations PRIVATE inference_engine_preproc_s inference_engine fluid)
diff --git a/inference-engine/thirdparty/ocv/opencv_hal_avx.hpp b/inference-engine/thirdparty/ocv/opencv_hal_avx.hpp
index eb592b1212d049..046f604d57654a 100644
--- a/inference-engine/thirdparty/ocv/opencv_hal_avx.hpp
+++ b/inference-engine/thirdparty/ocv/opencv_hal_avx.hpp
@@ -48,7 +48,7 @@ inline __m256d _v256_permute2x128(const __m256d& a, const __m256d& b)
 { return _mm256_permute2f128_pd(a, b, imm); }
 
 template<int imm, typename _Tpvec>
-inline _Tpvec v256_permute2x128(const _Tpvec& a, const _Tpvec& b)
+static inline _Tpvec v256_permute2x128(const _Tpvec& a, const _Tpvec& b)
 { return _Tpvec(_v256_permute2x128<imm>(a.val, b.val)); }
 
 template<int imm>
@@ -60,7 +60,7 @@ inline __m256d _v256_permute4x64(const __m256d& a)
 { return _mm256_permute4x64_pd(a, imm); }
 
 template<int imm, typename _Tpvec>
-inline _Tpvec v256_permute4x64(const _Tpvec& a)
+static inline _Tpvec v256_permute4x64(const _Tpvec& a)
 { return _Tpvec(_v256_permute4x64<imm>(a.val)); }
 
 inline __m128i _v256_extract_high(const __m256i& v)
@@ -730,6 +730,11 @@ OPENCV_HAL_IMPL_AVX_EXPAND(v_int16x16,  v_int32x8,   short,    _mm256_cvtepi16_e
 OPENCV_HAL_IMPL_AVX_EXPAND(v_uint32x8,  v_uint64x4,  unsigned, _mm256_cvtepu32_epi64)
 OPENCV_HAL_IMPL_AVX_EXPAND(v_int32x8,   v_int64x4,   int,      _mm256_cvtepi32_epi64)
 
+static inline v_int16x16 v_load_ccache_expand(const uchar* ptr)
+{
+    return v_int16x16(_mm256_cvtepu8_epi16(_mm_lddqu_si128((const __m128i*)ptr)));
+}
+
 inline v_uint8x32 v_mul_wrap(const v_uint8x32& a, const v_uint8x32& b)
 {
     __m256i ad = _mm256_srai_epi16(a.val, 8);
@@ -1925,6 +1930,37 @@ inline v_uint8x32 v_pack_u(const v_int16x16& a, const v_int16x16& b)
     return v_uint8x32(_v256_shuffle_odd_64(_mm256_packus_epi16(a.val, b.val)));
 }
 
+static inline v_uint8x32 v_packus(const v_int16x16& a, const v_int16x16& b)
+{
+    return v_uint8x32(_mm256_packus_epi16(a.val, b.val));
+}
+
+template<int mask, int shift>
+static inline v_uint8x32 v_blend_shiftleft(const v_uint8x32& a, const v_uint8x32& b)
+{
+    return v_uint8x32(_mm256_blend_epi16(a.val, _mm256_slli_si256(b.val, shift), mask));
+}
+
+template<int mask, int shift>
+static inline v_uint8x32 v_blend_shiftright(const v_uint8x32& a, const v_uint8x32& b)
+{
+    return v_uint8x32(_mm256_blend_epi16(_mm256_srli_si256(a.val, shift), b.val, mask));
+}
+
+static inline v_uint8x32 v_setr_s8(char b0, char b1, char b2, char b3, char b4,
+                                   char b5, char b6, char b7, char b8, char b9,
+                                   char b10, char b11, char b12, char b13, char b14,
+                                   char b15, char b16, char b17, char b18, char b19,
+                                   char b20, char b21, char b22, char b23, char b24,
+                                   char b25, char b26, char b27, char b28, char b29,
+                                   char b30, char b31)
+{
+    return v_uint8x32(_mm256_setr_epi8(b0, b1, b2, b3, b4, b5, b6, b7,
+                        b8, b9, b10, b11, b12, b13, b14, b15,
+                        b16, b17, b18, b19, b20, b21, b22, b23,
+                        b24, b25, b26, b27, b28, b29, b30, b31));
+}
+
 inline void v_pack_store(schar* ptr, const v_int16x16& a)
 { v_store_low(ptr, v_pack(a, a)); }
 
@@ -3075,9 +3111,7 @@ static inline v_uint16x16 v_mulhi(const v_uint16x16& a, uint16_t b)
 
 static inline v_int16x16 v_mulhrs(const v_int16x16& a, const v_int16x16& b)
 {
-    v_int16x16 r;
-    r.val = _mm256_mulhrs_epi16(a.val, b.val);
-    return r;
+    return v_int16x16(_mm256_mulhrs_epi16(a.val, b.val));
 }
 
 static inline v_int16x16 v_mulhrs(const v_int16x16& a, short b)
@@ -3110,6 +3144,49 @@ static inline v_float32x8 operator* (const v_float32x8& a, float b)
     return a * v256_setall_f32(b);
 }
 
+static inline v_uint8x32 v_shuffle_s8(const v_uint8x32& a, const v_uint8x32& mask)
+{
+    return v_uint8x32(_mm256_shuffle_epi8(a.val, mask.val));
+}
+
+static inline void v_gather_channel(v_uint8x32& vec, const uint8_t tmp[], const short mapsx[],
+                                    int chanNum, int c, int x, int shift)
+{
+    vec.val = _mm256_insert_epi32(vec.val, *reinterpret_cast<const int*>(&tmp[4 * (chanNum *  mapsx[x + shift + 0] + c)]), 0);
+    vec.val = _mm256_insert_epi32(vec.val, *reinterpret_cast<const int*>(&tmp[4 * (chanNum *  mapsx[x + shift + 1] + c)]), 1);
+    vec.val = _mm256_insert_epi32(vec.val, *reinterpret_cast<const int*>(&tmp[4 * (chanNum *  mapsx[x + shift + 2] + c)]), 2);
+    vec.val = _mm256_insert_epi32(vec.val, *reinterpret_cast<const int*>(&tmp[4 * (chanNum *  mapsx[x + shift + 3] + c)]), 3);
+
+    vec.val = _mm256_insert_epi32(vec.val, *reinterpret_cast<const int*>(&tmp[4 * (chanNum * (mapsx[x + shift + 0] + 1) + c)]), 4);
+    vec.val = _mm256_insert_epi32(vec.val, *reinterpret_cast<const int*>(&tmp[4 * (chanNum * (mapsx[x + shift + 1] + 1) + c)]), 5);
+    vec.val = _mm256_insert_epi32(vec.val, *reinterpret_cast<const int*>(&tmp[4 * (chanNum * (mapsx[x + shift + 2] + 1) + c)]), 6);
+    vec.val = _mm256_insert_epi32(vec.val, *reinterpret_cast<const int*>(&tmp[4 * (chanNum * (mapsx[x + shift + 3] + 1) + c)]), 7);
+}
+
+namespace {
+    template<int chanNum>
+    static inline v_int16x16 v_gather_chan(const uchar src[], const v_int16x16& index, int channel, int pos) {
+        v_int16x16 r;
+        r.val = _mm256_insert_epi16(r.val, *reinterpret_cast<const uchar*>(&src[chanNum*(_mm256_extract_epi16(index.val, 0) + pos) + channel]), 0);
+        r.val = _mm256_insert_epi16(r.val, *reinterpret_cast<const uchar*>(&src[chanNum*(_mm256_extract_epi16(index.val, 1) + pos) + channel]), 1);
+        r.val = _mm256_insert_epi16(r.val, *reinterpret_cast<const uchar*>(&src[chanNum*(_mm256_extract_epi16(index.val, 2) + pos) + channel]), 2);
+        r.val = _mm256_insert_epi16(r.val, *reinterpret_cast<const uchar*>(&src[chanNum*(_mm256_extract_epi16(index.val, 3) + pos) + channel]), 3);
+        r.val = _mm256_insert_epi16(r.val, *reinterpret_cast<const uchar*>(&src[chanNum*(_mm256_extract_epi16(index.val, 4) + pos) + channel]), 4);
+        r.val = _mm256_insert_epi16(r.val, *reinterpret_cast<const uchar*>(&src[chanNum*(_mm256_extract_epi16(index.val, 5) + pos) + channel]), 5);
+        r.val = _mm256_insert_epi16(r.val, *reinterpret_cast<const uchar*>(&src[chanNum*(_mm256_extract_epi16(index.val, 6) + pos) + channel]), 6);
+        r.val = _mm256_insert_epi16(r.val, *reinterpret_cast<const uchar*>(&src[chanNum*(_mm256_extract_epi16(index.val, 7) + pos) + channel]), 7);
+        r.val = _mm256_insert_epi16(r.val, *reinterpret_cast<const uchar*>(&src[chanNum*(_mm256_extract_epi16(index.val, 8) + pos) + channel]), 8);
+        r.val = _mm256_insert_epi16(r.val, *reinterpret_cast<const uchar*>(&src[chanNum*(_mm256_extract_epi16(index.val, 9) + pos) + channel]), 9);
+        r.val = _mm256_insert_epi16(r.val, *reinterpret_cast<const uchar*>(&src[chanNum*(_mm256_extract_epi16(index.val, 10) + pos) + channel]), 10);
+        r.val = _mm256_insert_epi16(r.val, *reinterpret_cast<const uchar*>(&src[chanNum*(_mm256_extract_epi16(index.val, 11) + pos) + channel]), 11);
+        r.val = _mm256_insert_epi16(r.val, *reinterpret_cast<const uchar*>(&src[chanNum*(_mm256_extract_epi16(index.val, 12) + pos) + channel]), 12);
+        r.val = _mm256_insert_epi16(r.val, *reinterpret_cast<const uchar*>(&src[chanNum*(_mm256_extract_epi16(index.val, 13) + pos) + channel]), 13);
+        r.val = _mm256_insert_epi16(r.val, *reinterpret_cast<const uchar*>(&src[chanNum*(_mm256_extract_epi16(index.val, 14) + pos) + channel]), 14);
+        r.val = _mm256_insert_epi16(r.val, *reinterpret_cast<const uchar*>(&src[chanNum*(_mm256_extract_epi16(index.val, 15) + pos) + channel]), 15);
+        return r;
+    }
+}  // namespace
+
 CV_CPU_OPTIMIZATION_HAL_NAMESPACE_END
 
 //! @endcond
diff --git a/inference-engine/thirdparty/ocv/opencv_hal_avx512.hpp b/inference-engine/thirdparty/ocv/opencv_hal_avx512.hpp
index 2f88c191651259..1f786b7bebb2ea 100644
--- a/inference-engine/thirdparty/ocv/opencv_hal_avx512.hpp
+++ b/inference-engine/thirdparty/ocv/opencv_hal_avx512.hpp
@@ -89,7 +89,7 @@ inline __m256  _v512_extract_high(const __m512& v)
 { return _mm512_extractf32x8_ps(v, 1); }
 
 inline __m256d _v512_extract_high(const __m512d& v)
-{ return _mm512_extractf64x4_pd(v, 1); }
+{ return _mm512_mask_extractf64x4_pd(_mm256_setzero_pd(), (__mmask8) -1, v, 1); }
 
 inline __m256i _v512_extract_low(const __m512i& v)
 { return _mm512_castsi512_si256(v); }
@@ -1936,7 +1936,7 @@ OPENCV_HAL_IMPL_AVX512_EXPAND_Q(v_int32x16,  schar, _mm512_cvtepi8_epi32)
 /* pack */
 // 16
 inline v_int8x64 v_pack(const v_int16x32& a, const v_int16x32& b)
-{ return v_int8x64(_mm512_permutexvar_epi64(_v512_set_epu64(7, 5, 3, 1, 6, 4, 2, 0), _mm512_packs_epi16(a.val, b.val))); }
+{ return v_int8x64(_mm512_mask_permutexvar_epi64(_mm512_setzero_si512(), (__mmask8)-1, _v512_set_epu64(7, 5, 3, 1, 6, 4, 2, 0), _mm512_packs_epi16(a.val, b.val))); }
 
 inline v_uint8x64 v_pack(const v_uint16x32& a, const v_uint16x32& b)
 {
@@ -1946,7 +1946,7 @@ inline v_uint8x64 v_pack(const v_uint16x32& a, const v_uint16x32& b)
 
 inline v_uint8x64 v_pack_u(const v_int16x32& a, const v_int16x32& b)
 {
-    return v_uint8x64(_mm512_permutexvar_epi64(_v512_set_epu64(7, 5, 3, 1, 6, 4, 2, 0), _mm512_packus_epi16(a.val, b.val)));
+    return v_uint8x64(_mm512_mask_permutexvar_epi64(_mm512_setzero_si512(), (__mmask8)-1, _v512_set_epu64(7, 5, 3, 1, 6, 4, 2, 0), _mm512_packus_epi16(a.val, b.val)));
 }
 
 inline void v_pack_store(schar* ptr, const v_int16x32& a)
@@ -2007,7 +2007,9 @@ void v_rshr_pack_store(schar* ptr, const v_int16x32& a)
 
 // 32
 inline v_int16x32 v_pack(const v_int32x16& a, const v_int32x16& b)
-{ return v_int16x32(_mm512_permutexvar_epi64(_v512_set_epu64(7, 5, 3, 1, 6, 4, 2, 0), _mm512_packs_epi32(a.val, b.val))); }
+{ return v_int16x32(_mm512_mask_permutexvar_epi64(_mm512_setzero_si512(), (__mmask8) -1,
+						  _v512_set_epu64(7, 5, 3, 1, 6, 4, 2, 0),
+						  _mm512_packs_epi32(a.val, b.val))); }
 
 inline v_uint16x32 v_pack(const v_uint32x16& a, const v_uint32x16& b)
 {
@@ -2016,7 +2018,9 @@ inline v_uint16x32 v_pack(const v_uint32x16& a, const v_uint32x16& b)
 }
 
 inline v_uint16x32 v_pack_u(const v_int32x16& a, const v_int32x16& b)
-{ return v_uint16x32(_mm512_permutexvar_epi64(_v512_set_epu64(7, 5, 3, 1, 6, 4, 2, 0), _mm512_packus_epi32(a.val, b.val))); }
+{ return v_uint16x32(_mm512_mask_permutexvar_epi64(_mm512_setzero_si512(), (__mmask8) -1,
+						   _v512_set_epu64(7, 5, 3, 1, 6, 4, 2, 0),
+						   _mm512_packus_epi32(a.val, b.val))); }
 
 inline void v_pack_store(short* ptr, const v_int32x16& a)
 { v_store_low(ptr, v_pack(a, a)); }
@@ -2118,7 +2122,7 @@ void v_rshr_pack_store(int* ptr, const v_int64x8& a)
 
 // pack boolean
 inline v_uint8x64 v_pack_b(const v_uint16x32& a, const v_uint16x32& b)
-{ return v_uint8x64(_mm512_permutexvar_epi64(_v512_set_epu64(7, 5, 3, 1, 6, 4, 2, 0), _mm512_packs_epi16(a.val, b.val))); }
+{ return v_uint8x64(_mm512_mask_permutexvar_epi64(_mm512_setzero_si512(), (__mmask8) -1, _v512_set_epu64(7, 5, 3, 1, 6, 4, 2, 0), _mm512_packs_epi16(a.val, b.val))); }
 
 inline v_uint8x64 v_pack_b(const v_uint32x16& a, const v_uint32x16& b,
                            const v_uint32x16& c, const v_uint32x16& d)
@@ -3069,9 +3073,7 @@ static inline v_uint16x32 v_mulhi(const v_uint16x32& a, uint16_t b)
 
 static inline v_int16x32 v_mulhrs(const v_int16x32& a, const v_int16x32& b)
 {
-    v_int16x32 r;
-    r.val = _mm512_mulhrs_epi16(a.val, b.val);
-    return r;
+    return v_int16x32(_mm512_mulhrs_epi16(a.val, b.val));
 }
 
 static inline v_int16x32 v_mulhrs(const v_int16x32& a, short b)
@@ -3104,6 +3106,188 @@ static inline v_float32x16 operator* (const v_float32x16& a, float b)
     return a * v512_setall_f32(b);
 }
 
+template<int mask, int shift>
+static inline v_uint8x64 v_mask_blend_shiftleft(const v_uint8x64& a, const v_uint8x64& b)
+{
+    return v_uint8x64(_mm512_mask_blend_epi16(mask,
+                                              a.val, _mm512_bslli_epi128(b.val, shift)));
+}
+
+template<int mask, int shift>
+static inline v_uint8x64 v_mask_blend_shiftright(const v_uint8x64& a, const v_uint8x64& b)
+{
+    return v_uint8x64(_mm512_mask_blend_epi16(mask,
+                                              _mm512_bsrli_epi128(a.val, shift), b.val));
+}
+
+static inline v_uint8x64 v_packus(const v_int16x32& a, const v_int16x32& b)
+{
+    return v_uint8x64(_mm512_packus_epi16(a.val, b.val));
+}
+
+
+#define word(b0, b1, b2, b3)                 \
+        (((uint32_t)((uint8_t)(b0)) << 0*8)  \
+      | ((uint32_t)((uint8_t)(b1))  << 1*8)  \
+      | ((uint32_t)((uint8_t)(b2))  << 2*8)  \
+      | ((uint32_t)((uint8_t)(b3))  << 3*8))
+
+static inline v_uint8x64 v_setr_s8(char b0, char b1, char b2, char b3, char b4,
+                                   char b5, char b6, char b7, char b8, char b9,
+                                   char b10, char b11, char b12, char b13, char b14,
+                                   char b15, char b16, char b17, char b18, char b19,
+                                   char b20, char b21, char b22, char b23, char b24,
+                                   char b25, char b26, char b27, char b28, char b29,
+                                   char b30, char b31, char b32, char b33, char b34,
+                                   char b35, char b36, char b37, char b38, char b39,
+                                   char b40, char b41, char b42, char b43, char b44,
+                                   char b45, char b46, char b47, char b48, char b49,
+                                   char b50, char b51, char b52, char b53, char b54,
+                                   char b55, char b56, char b57, char b58, char b59,
+                                   char b60, char b61, char b62, char b63)
+{
+    return v_uint8x64(_mm512_setr_epi32(word(b0, b1, b2, b3), word(b4, b5, b6, b7), word(b8, b9, b10, b11),
+                                        word(b12, b13, b14, b15), word(b16, b17, b18, b19), word(b20, b21, b22, b23),
+                                        word(b24, b25, b26, b27), word(b28, b29, b30, b31), word(b32, b33, b34, b35),
+                                        word(b36, b37, b38, b39), word(b40, b41, b42, b43), word(b44, b45, b46, b47),
+                                        word(b48, b49, b50, b51), word(b52, b53, b54, b55), word(b56, b57, b58, b59),
+                                        word(b60, b61, b62, b63)));
+}
+
+static inline v_uint64x8 v_set_s64(int b7, int b6, int b5, int b4, int b3, int b2, int b1, int b0)
+{
+    return v_uint64x8(_mm512_set_epi64(b7, b6, b5, b4, b3, b2, b1, b0));
+}
+
+static inline v_uint32x16 v_set_s32(int b15, int b14, int b13, int b12, int b11, int b10, int b9, int b8,
+                                    int b7, int b6, int b5, int b4, int b3, int b2, int b1, int b0)
+{
+    return v_uint32x16(_mm512_set_epi32(b15, b14, b13, b12, b11, b10, b9, b8, b7, b6, b5, b4, b3, b2, b1, b0));
+}
+
+static inline v_uint8x64 v_shuffle_s8(const v_uint8x64& a, const v_uint8x64& mask)
+{
+    return v_uint8x64(_mm512_shuffle_epi8(a.val, mask.val));
+}
+static inline v_int16x32 v_load_ccache_expand(const uchar* ptr)
+{
+    return v_int16x32(_mm512_cvtepu8_epi16(_mm256_lddqu_si256((const __m256i*)ptr)));                         \
+}
+static inline __m512i v512_insert_epi16(__m512i target, const uchar x, const int index)
+{
+    return _mm512_mask_set1_epi16(target, 1UL << index, x);
+}
+static inline __m512i v512_insert_epi32(__m512i target, const int32_t x, const int index)
+{
+    return _mm512_mask_set1_epi32(target, 1UL << index, x);
+}
+
+static inline void v_gather_channel(v_uint8x64& vec, const uint8_t tmp[], const short mapsx[],
+                                    int chanNum, int c, int x, int shift)
+{
+    __m256i vec1 = _mm256_setzero_si256();
+    __m256i vec2 = _mm256_setzero_si256();
+
+    vec1 = _mm256_insert_epi32(vec1, *reinterpret_cast<const int*>(&tmp[4 * (chanNum *  mapsx[x + shift + 0] + c)]), 0);
+    vec1 = _mm256_insert_epi32(vec1, *reinterpret_cast<const int*>(&tmp[4 * (chanNum *  mapsx[x + shift + 1] + c)]), 1);
+    vec1 = _mm256_insert_epi32(vec1, *reinterpret_cast<const int*>(&tmp[4 * (chanNum *  mapsx[x + shift + 2] + c)]), 2);
+    vec1 = _mm256_insert_epi32(vec1, *reinterpret_cast<const int*>(&tmp[4 * (chanNum *  mapsx[x + shift + 3] + c)]), 3);
+    vec1 = _mm256_insert_epi32(vec1, *reinterpret_cast<const int*>(&tmp[4 * (chanNum *  mapsx[x + shift + 4] + c)]), 4);
+    vec1 = _mm256_insert_epi32(vec1, *reinterpret_cast<const int*>(&tmp[4 * (chanNum *  mapsx[x + shift + 5] + c)]), 5);
+    vec1 = _mm256_insert_epi32(vec1, *reinterpret_cast<const int*>(&tmp[4 * (chanNum *  mapsx[x + shift + 6] + c)]), 6);
+    vec1 = _mm256_insert_epi32(vec1, *reinterpret_cast<const int*>(&tmp[4 * (chanNum *  mapsx[x + shift + 7] + c)]), 7);
+
+    vec2 = _mm256_insert_epi32(vec2, *reinterpret_cast<const int*>(&tmp[4 * (chanNum * (mapsx[x + shift + 0] + 1) + c)]), 0);
+    vec2 = _mm256_insert_epi32(vec2, *reinterpret_cast<const int*>(&tmp[4 * (chanNum * (mapsx[x + shift + 1] + 1) + c)]), 1);
+    vec2 = _mm256_insert_epi32(vec2, *reinterpret_cast<const int*>(&tmp[4 * (chanNum * (mapsx[x + shift + 2] + 1) + c)]), 2);
+    vec2 = _mm256_insert_epi32(vec2, *reinterpret_cast<const int*>(&tmp[4 * (chanNum * (mapsx[x + shift + 3] + 1) + c)]), 3);
+    vec2 = _mm256_insert_epi32(vec2, *reinterpret_cast<const int*>(&tmp[4 * (chanNum * (mapsx[x + shift + 4] + 1) + c)]), 4);
+    vec2 = _mm256_insert_epi32(vec2, *reinterpret_cast<const int*>(&tmp[4 * (chanNum * (mapsx[x + shift + 5] + 1) + c)]), 5);
+    vec2 = _mm256_insert_epi32(vec2, *reinterpret_cast<const int*>(&tmp[4 * (chanNum * (mapsx[x + shift + 6] + 1) + c)]), 6);
+    vec2 = _mm256_insert_epi32(vec2, *reinterpret_cast<const int*>(&tmp[4 * (chanNum * (mapsx[x + shift + 7] + 1) + c)]), 7);
+
+    vec.val = _mm512_inserti32x8(_mm512_castsi256_si512(vec1), vec2, 1);
+}
+
+static inline v_uint8x64 v_permutex2_s64(const v_uint8x64& a, const v_uint8x64& b, const v_uint64x8& idxs)
+{
+    return v_uint8x64(_mm512_permutex2var_epi64(a.val, idxs.val, b.val));
+}
+
+static inline v_uint8x64 v_permutex_s32(const v_uint8x64& a, const v_uint64x8 idxs)
+{
+    return v_uint8x64(_mm512_permutexvar_epi32(idxs.val, a.val));
+}
+
+static inline v_uint8x64 v_permutex2_s32(const v_uint8x64& a, const v_uint8x64& b, const v_uint32x16 idxs)
+{
+    return v_uint8x64(_mm512_permutex2var_epi32(a.val, idxs.val, b.val));
+}
+
+#if defined(__GNUC__)
+
+int _mm512_cvtsi512_si32(__m512i a)
+{
+    __v16si b = (__v16si)a;
+    return b[0];
+}
+
+#endif
+
+template <int index>
+static inline int v512_extract_epi32(__m512i target)
+{
+    return _mm512_cvtsi512_si32(_mm512_mask_alignr_epi32(_mm512_setzero_si512(), (__mmask16)-1, target, target, index));
+}
+
+template <int index>
+static inline int v512_extract_epi16(__m512i target)
+{
+    return (v512_extract_epi32<index/2>(target) >> (index % 2 ? 16 : 0)) & 0xFFFF;
+}
+
+namespace {
+    template<int chanNum>
+    static inline v_int16x32 v_gather_chan(const uchar src[], const v_int16x32& index, int channel, int pos) {
+        v_int16x32 r;
+
+        r.val = v512_insert_epi16(r.val, *reinterpret_cast<const uchar*>(&src[chanNum*(v512_extract_epi16<0>(index.val) + pos) + channel]), 0);
+        r.val = v512_insert_epi16(r.val, *reinterpret_cast<const uchar*>(&src[chanNum*(v512_extract_epi16<1>(index.val) + pos) + channel]), 1);
+        r.val = v512_insert_epi16(r.val, *reinterpret_cast<const uchar*>(&src[chanNum*(v512_extract_epi16<2>(index.val) + pos) + channel]), 2);
+        r.val = v512_insert_epi16(r.val, *reinterpret_cast<const uchar*>(&src[chanNum*(v512_extract_epi16<3>(index.val) + pos) + channel]), 3);
+        r.val = v512_insert_epi16(r.val, *reinterpret_cast<const uchar*>(&src[chanNum*(v512_extract_epi16<4>(index.val) + pos) + channel]), 4);
+        r.val = v512_insert_epi16(r.val, *reinterpret_cast<const uchar*>(&src[chanNum*(v512_extract_epi16<5>(index.val) + pos) + channel]), 5);
+        r.val = v512_insert_epi16(r.val, *reinterpret_cast<const uchar*>(&src[chanNum*(v512_extract_epi16<6>(index.val) + pos) + channel]), 6);
+        r.val = v512_insert_epi16(r.val, *reinterpret_cast<const uchar*>(&src[chanNum*(v512_extract_epi16<7>(index.val) + pos) + channel]), 7);
+        r.val = v512_insert_epi16(r.val, *reinterpret_cast<const uchar*>(&src[chanNum*(v512_extract_epi16<8>(index.val) + pos) + channel]), 8);
+        r.val = v512_insert_epi16(r.val, *reinterpret_cast<const uchar*>(&src[chanNum*(v512_extract_epi16<9>(index.val) + pos) + channel]), 9);
+        r.val = v512_insert_epi16(r.val, *reinterpret_cast<const uchar*>(&src[chanNum*(v512_extract_epi16<10>(index.val) + pos) + channel]), 10);
+        r.val = v512_insert_epi16(r.val, *reinterpret_cast<const uchar*>(&src[chanNum*(v512_extract_epi16<11>(index.val) + pos) + channel]), 11);
+        r.val = v512_insert_epi16(r.val, *reinterpret_cast<const uchar*>(&src[chanNum*(v512_extract_epi16<12>(index.val) + pos) + channel]), 12);
+        r.val = v512_insert_epi16(r.val, *reinterpret_cast<const uchar*>(&src[chanNum*(v512_extract_epi16<13>(index.val) + pos) + channel]), 13);
+        r.val = v512_insert_epi16(r.val, *reinterpret_cast<const uchar*>(&src[chanNum*(v512_extract_epi16<14>(index.val) + pos) + channel]), 14);
+        r.val = v512_insert_epi16(r.val, *reinterpret_cast<const uchar*>(&src[chanNum*(v512_extract_epi16<15>(index.val) + pos) + channel]), 15);
+        r.val = v512_insert_epi16(r.val, *reinterpret_cast<const uchar*>(&src[chanNum*(v512_extract_epi16<16>(index.val) + pos) + channel]), 16);
+        r.val = v512_insert_epi16(r.val, *reinterpret_cast<const uchar*>(&src[chanNum*(v512_extract_epi16<17>(index.val) + pos) + channel]), 17);
+        r.val = v512_insert_epi16(r.val, *reinterpret_cast<const uchar*>(&src[chanNum*(v512_extract_epi16<18>(index.val) + pos) + channel]), 18);
+        r.val = v512_insert_epi16(r.val, *reinterpret_cast<const uchar*>(&src[chanNum*(v512_extract_epi16<19>(index.val) + pos) + channel]), 19);
+        r.val = v512_insert_epi16(r.val, *reinterpret_cast<const uchar*>(&src[chanNum*(v512_extract_epi16<20>(index.val) + pos) + channel]), 20);
+        r.val = v512_insert_epi16(r.val, *reinterpret_cast<const uchar*>(&src[chanNum*(v512_extract_epi16<21>(index.val) + pos) + channel]), 21);
+        r.val = v512_insert_epi16(r.val, *reinterpret_cast<const uchar*>(&src[chanNum*(v512_extract_epi16<22>(index.val) + pos) + channel]), 22);
+        r.val = v512_insert_epi16(r.val, *reinterpret_cast<const uchar*>(&src[chanNum*(v512_extract_epi16<23>(index.val) + pos) + channel]), 23);
+        r.val = v512_insert_epi16(r.val, *reinterpret_cast<const uchar*>(&src[chanNum*(v512_extract_epi16<24>(index.val) + pos) + channel]), 24);
+        r.val = v512_insert_epi16(r.val, *reinterpret_cast<const uchar*>(&src[chanNum*(v512_extract_epi16<25>(index.val) + pos) + channel]), 25);
+        r.val = v512_insert_epi16(r.val, *reinterpret_cast<const uchar*>(&src[chanNum*(v512_extract_epi16<26>(index.val) + pos) + channel]), 26);
+        r.val = v512_insert_epi16(r.val, *reinterpret_cast<const uchar*>(&src[chanNum*(v512_extract_epi16<27>(index.val) + pos) + channel]), 27);
+        r.val = v512_insert_epi16(r.val, *reinterpret_cast<const uchar*>(&src[chanNum*(v512_extract_epi16<28>(index.val) + pos) + channel]), 28);
+        r.val = v512_insert_epi16(r.val, *reinterpret_cast<const uchar*>(&src[chanNum*(v512_extract_epi16<29>(index.val) + pos) + channel]), 29);
+        r.val = v512_insert_epi16(r.val, *reinterpret_cast<const uchar*>(&src[chanNum*(v512_extract_epi16<30>(index.val) + pos) + channel]), 30);
+        r.val = v512_insert_epi16(r.val, *reinterpret_cast<const uchar*>(&src[chanNum*(v512_extract_epi16<31>(index.val) + pos) + channel]), 31);
+
+        return r;
+    }
+}  // namespace
+
 CV_CPU_OPTIMIZATION_HAL_NAMESPACE_END
 
 //! @endcond
diff --git a/inference-engine/thirdparty/ocv/opencv_hal_sse.hpp b/inference-engine/thirdparty/ocv/opencv_hal_sse.hpp
index cfeb2962d35f76..1e75ee715204a8 100644
--- a/inference-engine/thirdparty/ocv/opencv_hal_sse.hpp
+++ b/inference-engine/thirdparty/ocv/opencv_hal_sse.hpp
@@ -371,6 +371,12 @@ inline v_float32x4 v_reinterpret_as_f32(const v_float64x2& a) {return v_float32x
 inline v_float64x2 v_reinterpret_as_f64(const v_float32x4& a) {return v_float64x2(_mm_castps_pd(a.val)); }
 
 //////////////// PACK ///////////////
+static inline v_uint8x16 v_packus(const v_int16x8& a, const v_int16x8& b) {
+    v_uint8x16 res;
+    res.val = _mm_packus_epi16(a.val, b.val);
+    return res;
+}
+
 inline v_uint8x16 v_pack(const v_uint16x8& a, const v_uint16x8& b)
 {
     __m128i delta = _mm_set1_epi16(255);
@@ -1526,7 +1532,17 @@ inline _Tpwsvec v_load_expand(const _Tps* ptr) \
 { \
     __m128i a = _mm_loadl_epi64((const __m128i*)ptr); \
     return _Tpwsvec(_mm_srai_##wsuffix(_mm_unpacklo_##suffix(a, a), shift)); \
-}
+}\
+inline _Tpwuvec v_expand_low(const _Tpuvec& a) {             \
+    _Tpwuvec res;                                            \
+    res.val = _mm_cvtepu8_epi16(a.val);                      \
+    return res;                                              \
+}                                                            \
+inline  _Tpwuvec v_expand_high(const _Tpuvec& a) {           \
+    _Tpwuvec res;                                            \
+    res.val = _mm_unpackhi_epi8(a.val, _mm_setzero_si128()); \
+    return res;                                              \
+}                                                            \
 
 OPENCV_HAL_IMPL_SSE_EXPAND(v_uint8x16, v_uint16x8, uchar, v_int8x16, v_int16x8, schar, epi8, epi16, 8)
 OPENCV_HAL_IMPL_SSE_EXPAND(v_uint16x8, v_uint32x4, ushort, v_int16x8, v_int32x4, short, epi16, epi32, 16)
@@ -2921,6 +2937,12 @@ static inline v_int16x8 v_saturate_s16(const v_int32x4& a) {
     return r;
 }
 
+static inline v_uint8x16 v_packus_s16(const v_int16x8& a, const v_int16x8& b) {
+    v_uint8x16 r;
+    r.val = _mm_packus_epi16(a.val, b.val);
+    return r;
+}
+
 // for each j=index[k], load two chars src[j] and src[j+1]
 static inline v_uint8x16 v_gather_pairs(const uchar src[], const v_int16x8& index) {
     v_uint8x16 r;
@@ -3030,6 +3052,47 @@ static inline v_float32x4 operator* (const v_float32x4& a, float b) {
     return a * v_setall_f32(b);
 }
 
+template<int mask, int shift>
+static inline v_uint8x16 v_blend_shiftleft(const v_uint8x16& a, const v_uint8x16& b) {
+    v_uint8x16 res;
+    res.val = _mm_blend_epi16(a.val, _mm_slli_si128(b.val, shift), mask /*0xCC 0b11001100*/);
+    return res;
+}
+
+template<int mask, int shift>
+static inline v_uint8x16 v_blend_shiftright(const v_uint8x16& a, const v_uint8x16& b) {
+    v_uint8x16 res;
+    res.val = _mm_blend_epi16(_mm_srli_si128(a.val, shift), b.val, mask /*0xCC 0b11001100*/);
+    return res;
+}
+
+static inline v_uint8x16 v_setr_s8(char b0, char b1, char b2, char b3, char b4,
+                                   char b5, char b6, char b7, char b8, char b9,
+                                   char b10, char b11, char b12, char b13, char b14,
+                                   char b15) {
+    v_uint8x16 res;
+    res.val = _mm_setr_epi8(b0, b1, b2, b3, b4, b5, b6, b7, b8,
+                            b9, b10, b11, b12, b13, b14, b15);
+    return res;
+}
+
+
+static inline v_uint8x16 v_shuffle_s8(const v_uint8x16& a, const v_uint8x16& mask) {
+    v_uint8x16 res;
+    res.val = _mm_shuffle_epi8(a.val, mask.val);
+    return res;
+}
+
+static inline void v_gather_channel(v_uint8x16& vec, const uint8_t tmp[], const short mapsx[],
+                                    int chanNum, int c, int x, int shift)
+{
+    vec.val = _mm_insert_epi32(vec.val, *reinterpret_cast<const int*>(&tmp[4 * (chanNum *  mapsx[x + shift + 0] + c)]), 0);
+    vec.val = _mm_insert_epi32(vec.val, *reinterpret_cast<const int*>(&tmp[4 * (chanNum *  mapsx[x + shift + 1] + c)]), 1);
+
+    vec.val = _mm_insert_epi32(vec.val, *reinterpret_cast<const int*>(&tmp[4 * (chanNum * (mapsx[x + shift + 0] + 1) + c)]), 2);
+    vec.val = _mm_insert_epi32(vec.val, *reinterpret_cast<const int*>(&tmp[4 * (chanNum * (mapsx[x + shift + 1] + 1) + c)]), 3);
+}
+
 //! @}
 
 CV_CPU_OPTIMIZATION_HAL_NAMESPACE_END

From 11bd4f8a4274234936d493f42cce9fd595c73939 Mon Sep 17 00:00:00 2001
From: Ilya Churaev <ilya.churaev@intel.com>
Date: Fri, 29 May 2020 17:46:40 +0300
Subject: [PATCH 16/24] Do not use ONNX reader if ONNX importer was disabled
 (#683)

---
 inference-engine/src/inference_engine/CMakeLists.txt        | 5 +++++
 inference-engine/src/inference_engine/ie_network_reader.cpp | 2 ++
 2 files changed, 7 insertions(+)

diff --git a/inference-engine/src/inference_engine/CMakeLists.txt b/inference-engine/src/inference_engine/CMakeLists.txt
index ce94bb3567fea8..4ae0d560700aa1 100644
--- a/inference-engine/src/inference_engine/CMakeLists.txt
+++ b/inference-engine/src/inference_engine/CMakeLists.txt
@@ -119,6 +119,11 @@ add_library(${TARGET_NAME}_obj OBJECT
 
 target_compile_definitions(${TARGET_NAME}_obj PRIVATE IMPLEMENT_INFERENCE_ENGINE_API)
 
+# TODO: Remove this definitios when readers will be loaded from xml
+if(NGRAPH_ONNX_IMPORT_ENABLE)
+    target_compile_definitions(${TARGET_NAME}_obj PRIVATE ONNX_IMPORT_ENABLE)
+endif()
+
 target_include_directories(${TARGET_NAME}_obj SYSTEM PRIVATE $<TARGET_PROPERTY:ngraph::ngraph,INTERFACE_INCLUDE_DIRECTORIES>
                                                              $<TARGET_PROPERTY:pugixml,INTERFACE_INCLUDE_DIRECTORIES>)
 
diff --git a/inference-engine/src/inference_engine/ie_network_reader.cpp b/inference-engine/src/inference_engine/ie_network_reader.cpp
index 9d739b6afb3c46..eabfb3dbbbc6b0 100644
--- a/inference-engine/src/inference_engine/ie_network_reader.cpp
+++ b/inference-engine/src/inference_engine/ie_network_reader.cpp
@@ -103,9 +103,11 @@ void registerReaders() {
     std::lock_guard<std::mutex> lock(readerMutex);
     if (initialized) return;
     // TODO: Read readers info from XML
+#ifdef ONNX_IMPORT_ENABLE
     auto onnxReader = std::make_shared<Reader>("ONNX", std::string("inference_engine_onnx_reader") + std::string(IE_BUILD_POSTFIX));
     readers.emplace("onnx", onnxReader);
     readers.emplace("prototxt", onnxReader);
+#endif
     auto irReader = std::make_shared<Reader>("IR", std::string("inference_engine_ir_reader") + std::string(IE_BUILD_POSTFIX));
     readers.emplace("xml", irReader);
     initialized = true;

From 6cfa77223e5cdca39ce04acca4635f2d7d7d1cd0 Mon Sep 17 00:00:00 2001
From: Evgenya Stepyreva <evgenya.stepyreva@intel.com>
Date: Fri, 29 May 2020 19:09:01 +0300
Subject: [PATCH 17/24] [ nG ] Added F16 folding support (#686)

---
 ngraph/src/ngraph/op/transpose.cpp | 2 ++
 1 file changed, 2 insertions(+)

diff --git a/ngraph/src/ngraph/op/transpose.cpp b/ngraph/src/ngraph/op/transpose.cpp
index 758bea5fbfaa5f..d95d2dad4cb994 100644
--- a/ngraph/src/ngraph/op/transpose.cpp
+++ b/ngraph/src/ngraph/op/transpose.cpp
@@ -181,6 +181,8 @@ namespace
             break;
             TYPE_CASE(bf16)(arg1, arg2, out);
             break;
+            TYPE_CASE(f16)(arg1, arg2, out);
+            break;
             TYPE_CASE(f32)(arg1, arg2, out);
             break;
             TYPE_CASE(f64)(arg1, arg2, out);

From f7052a107d24c2e72ef8fe2d25d8d1f56fe72e71 Mon Sep 17 00:00:00 2001
From: Vladimir Paramuzov <vladimir.paramuzov@intel.com>
Date: Fri, 29 May 2020 20:10:30 +0300
Subject: [PATCH 18/24] [IE CLDNN] Optimized FQ kernel in fsv16 layout (#573)

- Optimized FQ kernel in fsv16 layout. Enabled scaleshift transform for FP16 precision
- Disabled activation_opt kernel with fused ops in some cases
---
 .../activation/activation_kernel_opt.cpp      |  6 +-
 .../quantize/quantize_kernel_base.cpp         |  8 ++-
 .../quantize/quantize_kernel_base.h           |  6 +-
 .../quantize/quantize_kernel_params.h         | 39 +++++++++++-
 .../quantize/quantize_kernel_ref.cpp          | 31 +++++++---
 .../quantize/quantize_kernel_ref.h            |  2 +-
 .../quantize_kernel_scale_shift_opt.cpp       | 61 +++++++++++++++----
 .../quantize_kernel_scale_shift_opt.h         |  2 +-
 .../core/cl_kernels/quantize_gpu_ref.cl       | 12 +++-
 .../quantize_gpu_scale_shift_opt.cl           | 58 ++++++++++++++++--
 .../thirdparty/clDNN/src/gpu/quantize_gpu.cpp | 21 ++++++-
 .../graph_optimizer/prepare_quantization.cpp  |  3 -
 .../clDNN/src/include/quantize_inst.h         | 19 +++++-
 .../tests/test_cases/fusings_gpu_test.cpp     | 61 ++++++++++++-------
 14 files changed, 263 insertions(+), 66 deletions(-)

diff --git a/inference-engine/thirdparty/clDNN/kernel_selector/core/actual_kernels/activation/activation_kernel_opt.cpp b/inference-engine/thirdparty/clDNN/kernel_selector/core/actual_kernels/activation/activation_kernel_opt.cpp
index f48b0e275fe3e1..2830d4066e151f 100644
--- a/inference-engine/thirdparty/clDNN/kernel_selector/core/actual_kernels/activation/activation_kernel_opt.cpp
+++ b/inference-engine/thirdparty/clDNN/kernel_selector/core/actual_kernels/activation/activation_kernel_opt.cpp
@@ -76,11 +76,13 @@ bool ActivationKernelOpt::Validate(const Params& p, const optional_params& o) co
         return false;
     }
 
+
     if (params.output.GetLayout() != params.inputs[0].GetLayout())
         return false;
 
-    if (!params.fused_ops.empty() && params.output.GetLayout() != DataLayout::bfyx &&
-        params.output.GetLayout() != DataLayout::bfzyx)
+    if (!params.fused_ops.empty() &&
+        ((params.output.GetLayout() != DataLayout::bfyx && params.output.GetLayout() != DataLayout::bfzyx) ||
+        ((params.output.X().v * params.output.Y().v) % 4 != 0)))
         return false;
 
     return true;
diff --git a/inference-engine/thirdparty/clDNN/kernel_selector/core/actual_kernels/quantize/quantize_kernel_base.cpp b/inference-engine/thirdparty/clDNN/kernel_selector/core/actual_kernels/quantize/quantize_kernel_base.cpp
index 92ce82357130d8..e9c1b39c8dfe44 100644
--- a/inference-engine/thirdparty/clDNN/kernel_selector/core/actual_kernels/quantize/quantize_kernel_base.cpp
+++ b/inference-engine/thirdparty/clDNN/kernel_selector/core/actual_kernels/quantize/quantize_kernel_base.cpp
@@ -33,7 +33,7 @@ bool QuantizeKernelBase::Validate(const Params& p, const optional_params&) const
     return true;
 }
 
-JitConstants QuantizeKernelBase::GetJitConstants(const quantize_params& params) const {
+JitConstants QuantizeKernelBase::GetJitConstants(const quantize_params& params, const CommonDispatchData& runInfo) const {
     JitConstants jit = MakeBaseParamsJitConstants(params);
 
     if (params.packed_binary_output) {
@@ -55,6 +55,10 @@ JitConstants QuantizeKernelBase::GetJitConstants(const quantize_params& params)
 
     jit.AddConstant(MakeJitConstant("LEVELS", static_cast<float>(params.levels)));
 
+    jit.AddConstant(MakeJitConstant("LWS_0", runInfo.lws0));
+    jit.AddConstant(MakeJitConstant("LWS_1", runInfo.lws1));
+    jit.AddConstant(MakeJitConstant("LWS_2", runInfo.lws2));
+
     return jit;
 }
 
@@ -70,7 +74,7 @@ KernelsData QuantizeKernelBase::GetKernelsData(const Params& params, const optio
 
     auto runInfo = SetDefault(newParams, options);
     auto entry_point = GetEntryPoint(kernelName, newParams.layerID, options);
-    auto cldnn_jit = GetJitConstants(newParams);
+    auto cldnn_jit = GetJitConstants(newParams, runInfo);
     std::string jit = CreateJit(kernelName, cldnn_jit, entry_point);
 
     auto& kernel = kd.kernels[0];
diff --git a/inference-engine/thirdparty/clDNN/kernel_selector/core/actual_kernels/quantize/quantize_kernel_base.h b/inference-engine/thirdparty/clDNN/kernel_selector/core/actual_kernels/quantize/quantize_kernel_base.h
index 960fd4a7b009df..480e786ab847bf 100644
--- a/inference-engine/thirdparty/clDNN/kernel_selector/core/actual_kernels/quantize/quantize_kernel_base.h
+++ b/inference-engine/thirdparty/clDNN/kernel_selector/core/actual_kernels/quantize/quantize_kernel_base.h
@@ -25,9 +25,11 @@ class QuantizeKernelBase : public common_kernel_base {
     using common_kernel_base::common_kernel_base;
     virtual ~QuantizeKernelBase() {}
 
-    virtual JitConstants GetJitConstants(const quantize_params& params) const;
-    virtual CommonDispatchData SetDefault(const quantize_params& params, const optional_params&) const = 0;
     bool Validate(const Params& p, const optional_params& o) const override;
     KernelsData GetKernelsData(const Params& params, const optional_params& options) const override;
+
+protected:
+    virtual JitConstants GetJitConstants(const quantize_params& params, const CommonDispatchData& runInfo) const;
+    virtual CommonDispatchData SetDefault(const quantize_params& params, const optional_params&) const = 0;
 };
 }  // namespace kernel_selector
diff --git a/inference-engine/thirdparty/clDNN/kernel_selector/core/actual_kernels/quantize/quantize_kernel_params.h b/inference-engine/thirdparty/clDNN/kernel_selector/core/actual_kernels/quantize/quantize_kernel_params.h
index 388c0d6fb56b58..58ae821da63325 100644
--- a/inference-engine/thirdparty/clDNN/kernel_selector/core/actual_kernels/quantize/quantize_kernel_params.h
+++ b/inference-engine/thirdparty/clDNN/kernel_selector/core/actual_kernels/quantize/quantize_kernel_params.h
@@ -22,12 +22,47 @@ namespace kernel_selector {
 // quantize_params
 ////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////
 struct quantize_params : public base_params {
-    quantize_params() : base_params(KernelType::QUANTIZE),
-    levels(0), packed_binary_output(false), scale_shift_opt(false) {}
+    quantize_params()
+    : base_params(KernelType::QUANTIZE)
+    , levels(0)
+    , packed_binary_output(false)
+    , scale_shift_opt(false)
+    , has_post_scale(true)
+    , has_post_shift(true)
+    , has_pre_shift(true)
+    , has_clamp(true)
+    , per_tensor_input_range(false)
+    , per_tensor_input_scale(false)
+    , per_tensor_input_shift(false)
+    , per_tensor_output_scale(false)
+    , per_tensor_output_shift(false)
+    , in_lo(0.0f)
+    , in_hi(0.0f)
+    , in_scale(0.0f)
+    , in_shift(0.0f)
+    , out_scale(0.0f)
+    , out_shift(0.0f) { }
 
     int levels;
     bool packed_binary_output;
     bool scale_shift_opt;
+    bool has_post_scale;
+    bool has_post_shift;
+    bool has_pre_shift;
+    bool has_clamp;
+
+    bool per_tensor_input_range;
+    bool per_tensor_input_scale;
+    bool per_tensor_input_shift;
+    bool per_tensor_output_scale;
+    bool per_tensor_output_shift;
+
+    float in_lo;
+    float in_hi;
+    float in_scale;
+    float in_shift;
+    float out_scale;
+    float out_shift;
 
     virtual ParamsKey GetParamsKey() const {
         auto k = base_params::GetParamsKey();
diff --git a/inference-engine/thirdparty/clDNN/kernel_selector/core/actual_kernels/quantize/quantize_kernel_ref.cpp b/inference-engine/thirdparty/clDNN/kernel_selector/core/actual_kernels/quantize/quantize_kernel_ref.cpp
index e1189153192a11..27fe85f5af4b4d 100644
--- a/inference-engine/thirdparty/clDNN/kernel_selector/core/actual_kernels/quantize/quantize_kernel_ref.cpp
+++ b/inference-engine/thirdparty/clDNN/kernel_selector/core/actual_kernels/quantize/quantize_kernel_ref.cpp
@@ -18,6 +18,8 @@
 #include "kernel_selector_utils.h"
 #include <string>
 
+static const size_t sub_group_size = 32;
+
 namespace kernel_selector {
 ParamsKey QuantizeKernelRef::GetSupportedKey() const {
     ParamsKey k;
@@ -43,21 +45,34 @@ CommonDispatchData QuantizeKernelRef::SetDefault(const quantize_params& params,
 
     auto output = params.output;
 
-    runInfo.gws0 = output.Batch().v;
-    runInfo.gws1 = params.packed_binary_output ? CeilDiv(output.Feature().v, 32) : output.Feature().v;
-    runInfo.gws2 = Align(output.X().v * output.Y().v * output.Z().v, 16);
+    if (output.GetLayout() == DataLayout::b_fs_yx_fsv16 && !params.packed_binary_output) {
+        runInfo.gws0 = output.Batch().v;
+        runInfo.gws1 = Align(output.Feature().v, sub_group_size);
+        runInfo.gws2 = output.Y().v * output.X().v * output.Z().v;
+
+        runInfo.lws0 = 1;
+        runInfo.lws1 = sub_group_size;
+        runInfo.lws2 = 1;
+    } else {
+        runInfo.gws0 = output.Batch().v;
+        runInfo.gws1 = params.packed_binary_output ? CeilDiv(output.Feature().v, 32) : output.Feature().v;
+        runInfo.gws2 = Align(output.X().v * output.Y().v * output.Z().v, 16);
 
-    runInfo.lws0 = 1;
-    runInfo.lws1 = 1;
-    runInfo.lws2 = 16;
+        runInfo.lws0 = 1;
+        runInfo.lws1 = 1;
+        runInfo.lws2 = 16;
+    }
 
     runInfo.fp16UnitUsed = params.inputs[0].GetDType() == Datatype::F16;
 
     return runInfo;
 }
 
-JitConstants QuantizeKernelRef::GetJitConstants(const quantize_params& params) const {
-    JitConstants jit = Parent::GetJitConstants(params);
+JitConstants QuantizeKernelRef::GetJitConstants(const quantize_params& params, const CommonDispatchData& runInfo) const {
+    JitConstants jit = Parent::GetJitConstants(params, runInfo);
+    if (params.output.GetLayout() == DataLayout::b_fs_yx_fsv16 && !params.packed_binary_output) {
+        jit.AddConstant(MakeJitConstant("SUB_GROUP_SIZE", sub_group_size));
+    }
     return jit;
 }
 
diff --git a/inference-engine/thirdparty/clDNN/kernel_selector/core/actual_kernels/quantize/quantize_kernel_ref.h b/inference-engine/thirdparty/clDNN/kernel_selector/core/actual_kernels/quantize/quantize_kernel_ref.h
index ca4287b3b19fae..f0263b231cb6ba 100644
--- a/inference-engine/thirdparty/clDNN/kernel_selector/core/actual_kernels/quantize/quantize_kernel_ref.h
+++ b/inference-engine/thirdparty/clDNN/kernel_selector/core/actual_kernels/quantize/quantize_kernel_ref.h
@@ -26,7 +26,7 @@ class QuantizeKernelRef : public QuantizeKernelBase {
     QuantizeKernelRef() : QuantizeKernelBase("quantize_gpu_ref") {}
     virtual ~QuantizeKernelRef() {}
 
-    JitConstants GetJitConstants(const quantize_params& params) const override;
+    JitConstants GetJitConstants(const quantize_params& params, const CommonDispatchData& runInfo) const override;
     CommonDispatchData SetDefault(const quantize_params& params, const optional_params&) const override;
     bool Validate(const Params& p, const optional_params& o) const override;
     ParamsKey GetSupportedKey() const override;
diff --git a/inference-engine/thirdparty/clDNN/kernel_selector/core/actual_kernels/quantize/quantize_kernel_scale_shift_opt.cpp b/inference-engine/thirdparty/clDNN/kernel_selector/core/actual_kernels/quantize/quantize_kernel_scale_shift_opt.cpp
index 49f87576ecbd0a..679d663ecedff0 100644
--- a/inference-engine/thirdparty/clDNN/kernel_selector/core/actual_kernels/quantize/quantize_kernel_scale_shift_opt.cpp
+++ b/inference-engine/thirdparty/clDNN/kernel_selector/core/actual_kernels/quantize/quantize_kernel_scale_shift_opt.cpp
@@ -18,6 +18,8 @@
 #include "kernel_selector_utils.h"
 #include <string>
 
+static const size_t sub_group_size = 32;
+
 namespace kernel_selector {
 ParamsKey QuantizeKernelScaleShift::GetSupportedKey() const {
     ParamsKey k;
@@ -60,27 +62,60 @@ CommonDispatchData QuantizeKernelScaleShift::SetDefault(const quantize_params& p
 
     auto output = params.output;
 
-    auto global = GetTensorFriendlyWorkGroups(output);
-    auto local = GetOptimalLocalWorkGroupSizes(global, params.engineInfo);
+    if (output.GetLayout() == DataLayout::b_fs_yx_fsv16) {
+        runInfo.gws0 = output.Y().v * output.X().v;
+        runInfo.gws1 = Align(output.Feature().v, sub_group_size);
+        runInfo.gws2 = output.Batch().v;
+
+        runInfo.lws0 = 1;
+        runInfo.lws1 = sub_group_size;
+        runInfo.lws2 = 1;
+    } else {
+        auto global = GetTensorFriendlyWorkGroups(output);
+        auto local = GetOptimalLocalWorkGroupSizes(global, params.engineInfo);
 
-    runInfo.gws0 = global[0];
-    runInfo.gws1 = global[1];
-    runInfo.gws2 = global[2];
+        runInfo.gws0 = global[0];
+        runInfo.gws1 = global[1];
+        runInfo.gws2 = global[2];
 
-    runInfo.lws0 = local[0];
-    runInfo.lws1 = local[1];
-    runInfo.lws2 = local[2];
+        runInfo.lws0 = local[0];
+        runInfo.lws1 = local[1];
+        runInfo.lws2 = local[2];
+    }
 
     runInfo.fp16UnitUsed = params.inputs[0].GetDType() == Datatype::F16;
 
     return runInfo;
 }
 
-JitConstants QuantizeKernelScaleShift::GetJitConstants(const quantize_params& params) const {
-    JitConstants jit = Parent::GetJitConstants(params);
-
-    auto tensor_jits = GetTensorFriendlyWorkGroupsJit(params.output);
-    jit.Merge(tensor_jits);
+JitConstants QuantizeKernelScaleShift::GetJitConstants(const quantize_params& params, const CommonDispatchData& runInfo) const {
+    JitConstants jit = Parent::GetJitConstants(params, runInfo);
+
+    if (params.output.GetLayout() == DataLayout::b_fs_yx_fsv16) {
+        jit.AddConstant(MakeJitConstant("GWS_BATCH", 2));
+        jit.AddConstant(MakeJitConstant("GWS_FEATURE", 1));
+        jit.AddConstant(MakeJitConstant("GWS_YX", 0));
+        jit.AddConstant(MakeJitConstant("SUB_GROUP_SIZE", sub_group_size));
+    } else {
+        auto tensor_jits = GetTensorFriendlyWorkGroupsJit(params.output);
+        jit.Merge(tensor_jits);
+    }
+
+    jit.AddConstant(MakeJitConstant("HAS_POST_SCALE", params.has_post_scale));
+    jit.AddConstant(MakeJitConstant("HAS_POST_SHIFT", params.has_post_shift));
+    jit.AddConstant(MakeJitConstant("HAS_PRE_SHIFT", params.has_pre_shift));
+    jit.AddConstant(MakeJitConstant("HAS_CLAMP", params.has_clamp));
+    jit.AddConstant(MakeJitConstant("PER_TENSOR_INPUT_RANGE", params.per_tensor_input_range));
+    jit.AddConstant(MakeJitConstant("PER_TENSOR_INPUT_SCALE", params.per_tensor_input_scale));
+    jit.AddConstant(MakeJitConstant("PER_TENSOR_INPUT_SHIFT", params.per_tensor_input_shift));
+    jit.AddConstant(MakeJitConstant("PER_TENSOR_OUTPUT_SCALE", params.per_tensor_output_scale));
+    jit.AddConstant(MakeJitConstant("PER_TENSOR_OUTPUT_SHIFT", params.per_tensor_output_shift));
+    jit.AddConstant(MakeJitConstant("IN_LO_VAL", params.in_lo));
+    jit.AddConstant(MakeJitConstant("IN_HI_VAL", params.in_hi));
+    jit.AddConstant(MakeJitConstant("IN_SCALE_VAL", params.in_scale));
+    jit.AddConstant(MakeJitConstant("IN_SHIFT_VAL", params.in_shift));
+    jit.AddConstant(MakeJitConstant("OUT_SCALE_VAL", params.out_scale));
+    jit.AddConstant(MakeJitConstant("OUT_SHIFT_VAL", params.out_shift));
 
     return jit;
 }
diff --git a/inference-engine/thirdparty/clDNN/kernel_selector/core/actual_kernels/quantize/quantize_kernel_scale_shift_opt.h b/inference-engine/thirdparty/clDNN/kernel_selector/core/actual_kernels/quantize/quantize_kernel_scale_shift_opt.h
index d1c3fc8d32e041..d88dfb32f66544 100644
--- a/inference-engine/thirdparty/clDNN/kernel_selector/core/actual_kernels/quantize/quantize_kernel_scale_shift_opt.h
+++ b/inference-engine/thirdparty/clDNN/kernel_selector/core/actual_kernels/quantize/quantize_kernel_scale_shift_opt.h
@@ -26,7 +26,7 @@ class QuantizeKernelScaleShift : public QuantizeKernelBase {
     QuantizeKernelScaleShift() : QuantizeKernelBase("quantize_gpu_scale_shift_opt") {}
     virtual ~QuantizeKernelScaleShift() {}
 
-    JitConstants GetJitConstants(const quantize_params& params) const override;
+    JitConstants GetJitConstants(const quantize_params& params, const CommonDispatchData& runInfo) const override;
     CommonDispatchData SetDefault(const quantize_params& params, const optional_params&) const override;
     bool Validate(const Params& p, const optional_params& o) const override;
     ParamsKey GetSupportedKey() const override;
diff --git a/inference-engine/thirdparty/clDNN/kernel_selector/core/cl_kernels/quantize_gpu_ref.cl b/inference-engine/thirdparty/clDNN/kernel_selector/core/cl_kernels/quantize_gpu_ref.cl
index 6599e07ac9f445..f0fe9c908b470c 100644
--- a/inference-engine/thirdparty/clDNN/kernel_selector/core/cl_kernels/quantize_gpu_ref.cl
+++ b/inference-engine/thirdparty/clDNN/kernel_selector/core/cl_kernels/quantize_gpu_ref.cl
@@ -16,7 +16,10 @@
 #include "include/data_types.cl"
 #include "include/fetch.cl"
 
-__attribute__((intel_reqd_sub_group_size(16)))
+#ifdef SUB_GROUP_SIZE
+__attribute__((intel_reqd_sub_group_size(SUB_GROUP_SIZE)))
+#endif
+__attribute__((reqd_work_group_size(LWS_0, LWS_1, LWS_2)))
 KERNEL(quantize_ref)(const __global INPUT0_TYPE* input,
                      const __global INPUT1_TYPE* input_low,
                      const __global INPUT2_TYPE* input_high,
@@ -110,10 +113,15 @@ KERNEL(quantize_ref)(const __global INPUT0_TYPE* input,
     const int output_high_offset = INPUT4_GET_INDEX_SAFE(b, of, y, x);
 #endif
 
-
     INPUT0_TYPE val = input[input_offset];
+
+#if OUTPUT_LAYOUT_B_FS_YX_FSV16
+    if (of >= OUTPUT_FEATURE_NUM)
+        return;
+#else
     if (x >= OUTPUT_SIZE_X || y >= OUTPUT_SIZE_Y || z >= OUTPUT_SIZE_Z)
         return;
+#endif
 
     INPUT0_TYPE input_low_val  = input_low[input_low_offset];
     INPUT0_TYPE input_high_val  = input_high[input_high_offset];
diff --git a/inference-engine/thirdparty/clDNN/kernel_selector/core/cl_kernels/quantize_gpu_scale_shift_opt.cl b/inference-engine/thirdparty/clDNN/kernel_selector/core/cl_kernels/quantize_gpu_scale_shift_opt.cl
index 4c41a6f332d8a0..fecc649a8b4214 100644
--- a/inference-engine/thirdparty/clDNN/kernel_selector/core/cl_kernels/quantize_gpu_scale_shift_opt.cl
+++ b/inference-engine/thirdparty/clDNN/kernel_selector/core/cl_kernels/quantize_gpu_scale_shift_opt.cl
@@ -16,6 +16,10 @@
 #include "include/data_types.cl"
 #include "include/fetch.cl"
 
+#ifdef SUB_GROUP_SIZE
+__attribute__((intel_reqd_sub_group_size(SUB_GROUP_SIZE)))
+#endif
+__attribute__((reqd_work_group_size(LWS_0, LWS_1, LWS_2)))
 KERNEL(quantize_gpu_scale_shift_opt)(const __global INPUT0_TYPE* input,
                                      const __global INPUT1_TYPE* input_low,
                                      const __global INPUT2_TYPE* input_high,
@@ -52,11 +56,13 @@ KERNEL(quantize_gpu_scale_shift_opt)(const __global INPUT0_TYPE* input,
     const int output_offset = OUTPUT_GET_INDEX(b, of, y, x);
 #endif
 
+#if HAS_CLAMP && !PER_TENSOR_INPUT_RANGE
 #if INPUT1_DIMS == 4
     const int in_range_offset = INPUT1_GET_INDEX_SAFE(b, of, y, x);
 #elif INPUT1_DIMS == 5
     const int in_range_offset = INPUT1_GET_INDEX_SAFE(b, of, z, y, x);
 #endif
+#endif
 
 #if INPUT7_DIMS == 4
     const int scales_offset = INPUT7_GET_INDEX_SAFE(b, of, y, x);
@@ -64,17 +70,61 @@ KERNEL(quantize_gpu_scale_shift_opt)(const __global INPUT0_TYPE* input,
     const int scales_offset = INPUT7_GET_INDEX_SAFE(b, of, z, y, x);
 #endif
 
+#if PER_TENSOR_INPUT_SCALE
+    INPUT1_TYPE input_scale_val  = IN_SCALE_VAL;
+#else
     INPUT1_TYPE input_scale_val  = input_scale[scales_offset];
+#endif
+#if PER_TENSOR_INPUT_SHIFT
+    INPUT1_TYPE input_shift_val  = IN_SHIFT_VAL;
+#else
     INPUT1_TYPE input_shift_val  = input_shift[scales_offset];
+#endif
+
+#if PER_TENSOR_OUTPUT_SCALE
+    INPUT1_TYPE output_scale_val = OUT_SCALE_VAL;
+#else
     INPUT1_TYPE output_scale_val = output_scale[scales_offset];
+#endif
+
+#if PER_TENSOR_OUTPUT_SHIFT
+    INPUT1_TYPE output_shift_val = OUT_SHIFT_VAL;
+#else
     INPUT1_TYPE output_shift_val = output_shift[scales_offset];
+#endif
+
+#if PER_TENSOR_INPUT_RANGE && HAS_CLAMP
+    INPUT1_TYPE input_low_val    = IN_LO_VAL;
+    INPUT1_TYPE input_high_val   = IN_HI_VAL;
+#elif HAS_CLAMP
     INPUT1_TYPE input_low_val    = input_low[in_range_offset];
     INPUT1_TYPE input_high_val   = input_high[in_range_offset];
-    INPUT1_TYPE val = min(max(TO_INPUT1_TYPE(input[input_offset]),input_low_val), input_high_val);
+#endif
+
+#if HAS_CLAMP
+    INPUT1_TYPE val = min(max(TO_INPUT1_TYPE(input[input_offset]), input_low_val), input_high_val);
+#else
+    INPUT1_TYPE val = TO_INPUT1_TYPE(input[input_offset]);
+#endif
+#if HAS_PRE_SHIFT
+    val = round(val * input_scale_val + input_shift_val);
+#else
+    val = round(val * input_scale_val);
+#endif
+
+#if HAS_POST_SCALE
+    val = val*output_scale_val;
+#endif
+#if HAS_POST_SHIFT
+    val += output_shift_val;
+#endif
+
+#if OUTPUT_LAYOUT_B_FS_YX_FSV16
+    if (of < OUTPUT_FEATURE_NUM)
+#endif
 #if OUTPUT_IS_FP
-    output[output_offset] = TO_OUTPUT_TYPE_SAT(round(val * input_scale_val + input_shift_val) * output_scale_val + output_shift_val);
+    output[output_offset] = TO_OUTPUT_TYPE_SAT(val);
 #else
-    // TODO: the outer round should be deleted once output range is correct
-    output[output_offset] = TO_OUTPUT_TYPE_SAT(round(round(val * input_scale_val + input_shift_val) * output_scale_val + output_shift_val));
+    output[output_offset] = TO_OUTPUT_TYPE_SAT(round(val));
 #endif
 }
diff --git a/inference-engine/thirdparty/clDNN/src/gpu/quantize_gpu.cpp b/inference-engine/thirdparty/clDNN/src/gpu/quantize_gpu.cpp
index a869aa307b6c28..34db0348382a31 100644
--- a/inference-engine/thirdparty/clDNN/src/gpu/quantize_gpu.cpp
+++ b/inference-engine/thirdparty/clDNN/src/gpu/quantize_gpu.cpp
@@ -57,9 +57,26 @@ struct quantize_gpu : typed_primitive_gpu_impl<quantize> {
         auto quantize_optional_params =
             get_default_optional_params<kernel_selector::quantize_optional_params>(arg.get_program());
 
-        quantize_params.levels = arg.get_primitive()->levels;
-        quantize_params.packed_binary_output = arg.get_output_layout().data_type == data_types::bin;
+        quantize_params.levels = arg.get_levels();
+        quantize_params.packed_binary_output = arg.get_packed_binary_output();
         quantize_params.scale_shift_opt = arg.get_scale_shift_opt();
+        quantize_params.has_post_scale = arg.get_need_post_scale();
+        quantize_params.has_post_shift = arg.get_need_post_shift();
+        quantize_params.has_pre_shift = arg.get_need_pre_shift();
+        quantize_params.has_clamp = arg.get_need_clamp();
+
+        quantize_params.per_tensor_input_range = arg.get_per_tensor_input_range();
+        quantize_params.per_tensor_input_scale = arg.get_per_tensor_input_scale();
+        quantize_params.per_tensor_input_shift = arg.get_per_tensor_input_shift();
+        quantize_params.per_tensor_output_scale = arg.get_per_tensor_output_scale();
+        quantize_params.per_tensor_output_shift = arg.get_per_tensor_output_shift();
+
+        quantize_params.in_lo = arg.get_input_lo_val();
+        quantize_params.in_hi = arg.get_input_hi_val();
+        quantize_params.in_scale = arg.get_input_scale_val();
+        quantize_params.in_shift = arg.get_input_shift_val();
+        quantize_params.out_scale = arg.get_output_scale_val();
+        quantize_params.out_shift = arg.get_output_shift_val();
 
         for (size_t i = 1; i < arg.inputs_count(); i++) {
             quantize_params.inputs.push_back(convert_data_tensor(arg.input(i).get_output_layout()));
diff --git a/inference-engine/thirdparty/clDNN/src/graph_optimizer/prepare_quantization.cpp b/inference-engine/thirdparty/clDNN/src/graph_optimizer/prepare_quantization.cpp
index 5eb670fd2942d8..885275677388bf 100644
--- a/inference-engine/thirdparty/clDNN/src/graph_optimizer/prepare_quantization.cpp
+++ b/inference-engine/thirdparty/clDNN/src/graph_optimizer/prepare_quantization.cpp
@@ -111,9 +111,6 @@ void prepare_quantization::prepare_scale_shift_opt(program_impl &p) {
             if (levels == 2 || levels > 256 || quantize_node.get_scale_shift_opt() || quantize_node.is_constant())
                 return;
 
-            if (quantize_node.input().get_output_layout().data_type == data_types::f16)
-                return;
-
             auto &input_low = quantize_node.get_dependency(1).template as<data>();
             auto &input_high = quantize_node.get_dependency(2).template as<data>();
             auto &output_low = quantize_node.get_dependency(3).template as<data>();
diff --git a/inference-engine/thirdparty/clDNN/src/include/quantize_inst.h b/inference-engine/thirdparty/clDNN/src/include/quantize_inst.h
index 6e05a4fa3e1e5f..d7003d1c6d6b47 100644
--- a/inference-engine/thirdparty/clDNN/src/include/quantize_inst.h
+++ b/inference-engine/thirdparty/clDNN/src/include/quantize_inst.h
@@ -34,8 +34,25 @@ struct typed_program_node<quantize> : public typed_program_node_base<quantize> {
 
     program_node& input(size_t index = 0) const { return get_dependency(index); }
     size_t inputs_count() const { return get_dependencies().size(); }
+    int get_levels() const { return get_primitive()->levels; }
+    bool get_packed_binary_output() const { return get_output_layout().data_type == data_types::bin; }
     bool get_scale_shift_opt() const { return scale_shift_opt; }
-    bool get_need_pre_shift() { return need_pre_shift; }
+    bool get_need_pre_shift() const { return need_pre_shift; }
+    bool get_need_post_scale() const { return need_post_scale; }
+    bool get_need_post_shift() const { return need_post_shift; }
+    bool get_need_clamp() const { return need_clamp; }
+    bool get_per_tensor_input_scale() const { return per_tensor_input_scale; }
+    bool get_per_tensor_input_shift() const { return per_tensor_input_shift; }
+    bool get_per_tensor_input_range() const { return per_tensor_input_range; }
+    bool get_per_tensor_output_scale() const { return per_tensor_output_scale; }
+    bool get_per_tensor_output_shift() const { return per_tensor_output_shift; }
+    float get_input_scale_val() const { return in_scale; }
+    float get_input_shift_val() const { return in_shift; }
+    float get_input_lo_val() const { return in_lo; }
+    float get_input_hi_val() const { return in_hi; }
+    float get_output_scale_val() const { return out_scale; }
+    float get_output_shift_val() const { return out_shift; }
+
     void set_scale_shift_opt() { scale_shift_opt = true; }
     void set_need_post_scale() { need_post_scale = true; }
     void set_need_post_shift() { need_post_shift = true; }
diff --git a/inference-engine/thirdparty/clDNN/tests/test_cases/fusings_gpu_test.cpp b/inference-engine/thirdparty/clDNN/tests/test_cases/fusings_gpu_test.cpp
index 31dc29d04b3db1..c2af2edfbb2bdb 100644
--- a/inference-engine/thirdparty/clDNN/tests/test_cases/fusings_gpu_test.cpp
+++ b/inference-engine/thirdparty/clDNN/tests/test_cases/fusings_gpu_test.cpp
@@ -692,7 +692,7 @@ TEST_P(conv_fp32_quantize_u8, basic) {
                  reorder("reorder_bfyx", "quantize", p.default_format, data_types::f32)
     );
 
-    tolerance = 1e-5f;
+    tolerance = 1.0f;
     execute(p);
 }
 
@@ -701,6 +701,9 @@ INSTANTIATE_TEST_CASE_P(fusings_gpu, conv_fp32_quantize_u8,
                                 // For now only b_fs_yx_fsv16 supports this case
                                 bc_test_params{CASE_CONV_FP32_2, 2, 3},
                                 bc_test_params{CASE_CONV_FP32_3, 2, 3},
+
+                                bc_test_params{CASE_CONV_FP16_2, 2, 3},
+                                bc_test_params{CASE_CONV_FP16_3, 2, 3},
                         }), );
 
 class conv_fp32_scale_quantize_i8 : public ConvFusingTest {};
@@ -731,6 +734,9 @@ INSTANTIATE_TEST_CASE_P(fusings_gpu, conv_fp32_scale_quantize_i8,
                                 // For now only b_fs_yx_fsv16 supports this case
                                 bc_test_params{CASE_CONV_FP32_2, 2, 4},
                                 bc_test_params{CASE_CONV_FP32_3, 2, 4},
+
+                                bc_test_params{CASE_CONV_FP16_2, 2, 4},
+                                bc_test_params{CASE_CONV_FP16_3, 2, 4},
                         }), );
 
 class conv_fp32_scale_activation_quantize_i8 : public ConvFusingTest {};
@@ -751,7 +757,7 @@ TEST_P(conv_fp32_scale_activation_quantize_i8, basic) {
                  reorder("reorder_bfyx", "quantize", p.default_format, data_types::f32)
     );
 
-    tolerance = 1e-2f;
+    tolerance = 1.0f;
     execute(p);
 }
 
@@ -760,36 +766,42 @@ INSTANTIATE_TEST_CASE_P(fusings_gpu, conv_fp32_scale_activation_quantize_i8,
                                 // For now only b_fs_yx_fsv16 supports this case
                                 bc_test_params{CASE_CONV_FP32_2, 2, 5},
                                 bc_test_params{CASE_CONV_FP32_3, 2, 5},
+
+                                bc_test_params{CASE_CONV_FP16_2, 2, 5},
+                                bc_test_params{CASE_CONV_FP16_3, 2, 5},
                         }), );
 
-class conv_fp32_scale_activation_quantize_i8_eltwise_fp32 : public ConvFusingTest {};
-TEST_P(conv_fp32_scale_activation_quantize_i8_eltwise_fp32, basic) {
+class conv_fp32_scale_activation_quantize_u8_eltwise_fp32 : public ConvFusingTest {};
+TEST_P(conv_fp32_scale_activation_quantize_u8_eltwise_fp32, basic) {
     auto p = GetParam();
     create_topologies(input_layout("input", get_input_layout(p)),
                  data("weights", get_mem(get_weights_layout(p))),
                  data("bias", get_mem(get_bias_layout(p))),
-                 data("in_lo", get_mem(get_per_channel_layout(p), min_random, 0)),
+                 data("in_lo", get_mem(get_per_channel_layout(p), 0)),
                  data("in_hi", get_mem(get_per_channel_layout(p), 1, max_random)),
-                 data("out_lo", get_mem(get_single_element_layout(p), -127)),
-                 data("out_hi", get_mem(get_single_element_layout(p), 127)),
+                 data("out_lo", get_mem(get_single_element_layout(p), 0)),
+                 data("out_hi", get_mem(get_single_element_layout(p), 255)),
                  data("scale_data", get_mem(get_per_channel_layout(p), 1.0f/p.kernel.count()/255)),
                  data("eltwise_data", get_mem(get_output_layout(p))),
                  convolution("conv_prim", "input", {"weights"}, {"bias"}, p.groups, p.stride, p.pad, p.dilation),
                  scale("scale", "conv_prim", "scale_data"),
                  activation("activation_scale", "scale", activation_func::exp),
-                 quantize("quantize", "activation_scale", "in_lo", "in_hi", "out_lo", "out_hi", 255, data_types::i8),
-                 eltwise("sum", { "quantize", "eltwise_data"}, eltwise_mode::sum,  data_types::f32),
+                 quantize("quantize", "activation_scale", "in_lo", "in_hi", "out_lo", "out_hi", 256, data_types::u8),
+                 eltwise("sum", { "quantize", "eltwise_data"}, eltwise_mode::sum,  p.default_type),
                  reorder("reorder_bfyx", "sum", p.default_format, data_types::f32)
     );
-    tolerance = 1e-2f;
+    tolerance = 1.0f;
     execute(p);
 }
 
-INSTANTIATE_TEST_CASE_P(fusings_gpu, conv_fp32_scale_activation_quantize_i8_eltwise_fp32,
+INSTANTIATE_TEST_CASE_P(fusings_gpu, conv_fp32_scale_activation_quantize_u8_eltwise_fp32,
                         ::testing::ValuesIn(std::vector<bc_test_params>{
                                 // For now only b_fs_yx_fsv16 supports this case
                                 bc_test_params{CASE_CONV_FP32_2, 2, 6},
                                 bc_test_params{CASE_CONV_FP32_3, 2, 6},
+
+                                bc_test_params{CASE_CONV_FP16_2, 2, 6},
+                                bc_test_params{CASE_CONV_FP16_3, 2, 6},
                         }), );
 
 class conv_fp32_scale_activation_quantize_i8_activation : public ConvFusingTest {};
@@ -811,7 +823,7 @@ TEST_P(conv_fp32_scale_activation_quantize_i8_activation, basic) {
                  activation("activation_quantize", "quantize", activation_func::relu),
                  reorder("reorder_bfyx", "activation_quantize", p.default_format, data_types::f32)
     );
-    tolerance = 1e-2f;
+    tolerance = 1.0f;
     execute(p);
 }
 
@@ -819,6 +831,9 @@ INSTANTIATE_TEST_CASE_P(fusings_gpu, conv_fp32_scale_activation_quantize_i8_acti
                         ::testing::ValuesIn(std::vector<bc_test_params>{
                                 bc_test_params{CASE_CONV_FP32_2, 2, 6},
                                 bc_test_params{CASE_CONV_FP32_3, 2, 6},
+
+                                bc_test_params{CASE_CONV_FP16_2, 2, 6},
+                                bc_test_params{CASE_CONV_FP16_3, 2, 6},
                         }), );
 
 
@@ -2486,8 +2501,8 @@ INSTANTIATE_TEST_CASE_P(fusings_gpu, mvn_scale_quantize_i8,
         mvn_test_params{ CASE_MVN_3D_U8_2, 2, 4 },
 }), );
 
-class mvn_scale_activation_quantize_i8_eltwise_fp32_quantize_i8 : public MVNFusingTest {};
-TEST_P(mvn_scale_activation_quantize_i8_eltwise_fp32_quantize_i8, basic) {
+class mvn_scale_activation_quantize_u8_eltwise_fp32_quantize_i8 : public MVNFusingTest {};
+TEST_P(mvn_scale_activation_quantize_u8_eltwise_fp32_quantize_i8, basic) {
     auto p = GetParam();
     create_topologies(
         input_layout("input", get_input_layout(p)),
@@ -2495,18 +2510,18 @@ TEST_P(mvn_scale_activation_quantize_i8_eltwise_fp32_quantize_i8, basic) {
         data("scale_data", get_mem(get_per_channel_layout(p))),
         scale("scale", "mvn", "scale_data"),
         activation("act", "scale", activation_func::hyperbolic_tan),
-        data("in_low", get_mem(get_per_channel_layout(p), min_random, 0)),
+        data("in_low", get_mem(get_per_channel_layout(p), 0)),
         data("in_high", get_mem(get_per_channel_layout(p), 1, max_random)),
-        data("out_low", get_mem(get_single_element_layout(p), -127, 127)),
-        data("out_high", get_mem(get_single_element_layout(p), -127, 127)),
-        quantize("quant", "act", "in_low", "in_high", "out_low", "out_high", 255, data_types::i8),
+        data("out_low", get_mem(get_single_element_layout(p), 0)),
+        data("out_high", get_mem(get_single_element_layout(p), 255)),
+        quantize("quant", "act", "in_low", "in_high", "out_low", "out_high", 256, data_types::u8),
         data("eltw_data", get_mem(layout{ data_types::i8, p.input_format, p.input_size })),
         eltwise("eltw", {"quant", "eltw_data"}, eltwise_mode::sum, data_types::f32),
         data("in_low2", get_mem(get_per_channel_layout(p), min_random, 0)),
         data("in_high2", get_mem(get_per_channel_layout(p), 1, max_random)),
-        data("out_low2", get_mem(get_single_element_layout(p), -127, 127)),
-        data("out_high2", get_mem(get_single_element_layout(p), -127, 127)),
-        quantize("quant2", "eltw", "in_low2", "in_high2", "out_low2", "out_high2", 255, data_types::i8),
+        data("out_low2", get_mem(get_single_element_layout(p), -128)),
+        data("out_high2", get_mem(get_single_element_layout(p), 127)),
+        quantize("quant2", "eltw", "in_low2", "in_high2", "out_low2", "out_high2", 256, data_types::i8),
         reorder("reorder_bfyx", "quant2", format::bfyx, data_types::f32)
     );
 
@@ -2514,7 +2529,7 @@ TEST_P(mvn_scale_activation_quantize_i8_eltwise_fp32_quantize_i8, basic) {
     execute(p);
 }
 
-INSTANTIATE_TEST_CASE_P(fusings_gpu, mvn_scale_activation_quantize_i8_eltwise_fp32_quantize_i8,
+INSTANTIATE_TEST_CASE_P(fusings_gpu, mvn_scale_activation_quantize_u8_eltwise_fp32_quantize_i8,
     ::testing::ValuesIn(std::vector<mvn_test_params>{
         // Full using for fp input not supported yet, it may lead to output padding and non-optimal kernel
         // mvn_test_params{ CASE_MVN_F32_1, 2, 7 },
@@ -3461,7 +3476,7 @@ TEST_P(deconv_scale_actv_quant_u8_eltw_scale_actv_quant_i8, basic) {
         input_layout("input", get_input_layout(p)),
         data("weights", get_mem(get_weights_layout(p))),
         data("scale1_data", get_mem(get_per_channel_layout(p), 1.f / p.kernel.count())),
-        data("in1_lo", get_mem(get_per_channel_layout(p), min_random, 0)),
+        data("in1_lo", get_mem(get_per_channel_layout(p), 0)),
         data("in1_hi", get_mem(get_per_channel_layout(p), 1, max_random)),
         data("out1_lo", get_mem(get_single_element_layout(p), 0)),
         data("out1_hi", get_mem(get_single_element_layout(p), 255)),

From 963f55a1894ede3cb9336ce14a5319d3adae26ec Mon Sep 17 00:00:00 2001
From: Ilya Churaev <ilya.churaev@intel.com>
Date: Fri, 29 May 2020 20:57:32 +0300
Subject: [PATCH 19/24] Fixed CODEOWNERS paths (#684)

---
 CODEOWNERS | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/CODEOWNERS b/CODEOWNERS
index 9d19cd384ebfa5..4905affcc7dccd 100644
--- a/CODEOWNERS
+++ b/CODEOWNERS
@@ -16,7 +16,7 @@ azure-pipelines.yml  @openvinotoolkit/openvino-admins
 /inference-engine/  @openvinotoolkit/openvino-ie-maintainers
 /inference-engine/src/transformations/  @GlebKazantaev @ichuraev
 /inference-engine/src/legacy_api/  @openvinotoolkit/openvino-ngraph-maintainers
-/inference-engine/src/ir_readers/  @openvinotoolkit/openvino-ngraph-maintainers
+/inference-engine/src/readers/  @openvinotoolkit/openvino-ngraph-maintainers
 
 # IE CPU:
 /inference-engine/src/mkldnn_plugin/  @openvinotoolkit/openvino-ie-cpu-maintainers @openvinotoolkit/openvino-ie-cpu-developers

From 3a24eb6a6233646f660f0522fe0209fc17da4d93 Mon Sep 17 00:00:00 2001
From: Vladimir Gavrilov <vladimir.gavrilov@intel.com>
Date: Fri, 29 May 2020 21:01:09 +0300
Subject: [PATCH 20/24] MO fails generating IR from XLNET model due to a bug in
 the transformation ConvertGroupedStridedSlice (#625)

* Small fix in the transformation ConvertGroupedStridedSlice. Now VariadicSplit is generated only in the case when node has at least 2 output nodes.

* Added unittests for the case when there is only one StridedSlice.
---
 .../middle/ConvertGroupedStridedSlice.py      |  2 +-
 .../middle/ConvertGroupedStridedSlice_test.py | 49 +++++++++++++++++++
 2 files changed, 50 insertions(+), 1 deletion(-)

diff --git a/model-optimizer/extensions/middle/ConvertGroupedStridedSlice.py b/model-optimizer/extensions/middle/ConvertGroupedStridedSlice.py
index 3e1d83178a5034..e948f0adea8cff 100644
--- a/model-optimizer/extensions/middle/ConvertGroupedStridedSlice.py
+++ b/model-optimizer/extensions/middle/ConvertGroupedStridedSlice.py
@@ -70,7 +70,7 @@ def find_and_replace_pattern(self, graph: Graph):
 
             # Get all StridedSlice consumers
             out_nodes = [node for node in input_data.out_nodes() if node.op == 'StridedSlice' and node.in_node(0).name == input_data.name]
-            if len(out_nodes) < 1:
+            if len(out_nodes) <= 1:
                 continue
 
             valid_for_replacement = True
diff --git a/model-optimizer/extensions/middle/ConvertGroupedStridedSlice_test.py b/model-optimizer/extensions/middle/ConvertGroupedStridedSlice_test.py
index 6eaf6d7c28a481..0dd50b9ed9ef53 100644
--- a/model-optimizer/extensions/middle/ConvertGroupedStridedSlice_test.py
+++ b/model-optimizer/extensions/middle/ConvertGroupedStridedSlice_test.py
@@ -17,6 +17,7 @@
 import unittest
 
 import numpy as np
+from generator import generator, generate
 
 from extensions.middle.ConvertGroupedStridedSlice import ConvertGroupedStridedSlice
 from mo.front.common.partial_infer.utils import int64_array
@@ -82,7 +83,24 @@
     'sslice_2/unsqueeze_const_data': {'kind': 'data', 'value': None, 'shape': None},
 }
 
+one_strided_slice_case_node_attributes = {
+    'placeholder': {'type': 'Parameter', 'kind': 'op', 'op': 'Parameter'},
+    'placeholder_data': {'value': None, 'shape': None, 'kind': 'data', 'data_type': None},
+    'sslice': {'type': None, 'kind': 'op', 'op': 'StridedSlice', 'slices': None,
+               'shrink_axis_mask': np.array([0, 0, 0, 0])},
+    'sslice_data': {'value': None, 'shape': None, 'kind': 'data'},
+    'op_output': {'kind': 'op', 'op': 'Result'},
+}
+
+one_strided_slice_case_edges = [
+    ('placeholder', 'placeholder_data'),
+    ('placeholder_data', 'sslice'),
+    ('sslice', 'sslice_data'),
+    ('sslice_data', 'op_output'),
+]
 
+
+@generator
 class ConvertGroupedStridedSliceTests(unittest.TestCase):
     def test_1(self):
         graph = build_graph(nodes_attributes,
@@ -604,6 +622,37 @@ def test_8(self):
         (flag, resp) = compare_graphs(graph, graph_ref, 'concat_1_data', check_op_attrs=True)
         self.assertTrue(flag, resp)
 
+    # Test for the case when there is only 1 StridedSlice.
+    @generate(*[(np.array([1, 227, 227, 54]),
+                 np.array([slice(0, 1, 1), slice(0, 227, 1), slice(0, 227, 1), slice(0, 18, 1)]),
+                 np.array([1, 227, 227, 18])),
+                (np.array([57, 16, 100, 23]),
+                 np.array([slice(3, 16, 1), slice(0, 16, 1), slice(0, 100, 1), slice(0, 23, 1)]),
+                 np.array([13, 16, 100, 23])),
+                (np.array([16, 800, 1024, 17]),
+                 np.array([slice(0, 16, 1), slice(0, 800, 1), slice(13, 817, 1), slice(0, 17, 1)]),
+                 np.array([16, 800, 804, 17]))])
+    def test_9(self, input_shape, slices, output_shape):
+        graph = build_graph(nodes_attrs=one_strided_slice_case_node_attributes,
+                            edges=one_strided_slice_case_edges,
+                            update_attributes={
+                                'placeholder_data': {'shape': input_shape},
+                                'sslice': {'slices': slices},
+                                'sslice_data': {'shape': output_shape},
+                            })
+        graph.graph['layout'] = 'NHWC'
+        graph_ref = build_graph(nodes_attrs=one_strided_slice_case_node_attributes,
+                                edges=one_strided_slice_case_edges,
+                                update_attributes={
+                                    'placeholder_data': {'shape': input_shape},
+                                    'sslice': {'slices': slices},
+                                    'sslice_data': {'shape': output_shape},
+                                })
+        pattern = ConvertGroupedStridedSlice()
+        pattern.find_and_replace_pattern(graph)
+        (flag, resp) = compare_graphs(graph, graph_ref, 'op_output', check_op_attrs=True)
+        self.assertTrue(flag, resp)
+
 
 class AddReshapeAfterStridedSliceTests(unittest.TestCase):
     def test_ss_1_shrink_last(self):

From cbad43f3a55f923510b7611a7bea842c52f20881 Mon Sep 17 00:00:00 2001
From: Anastasia Kuporosova <anastasia.kuporosova@intel.com>
Date: Fri, 29 May 2020 21:20:16 +0300
Subject: [PATCH 21/24] [Python API] Fix PreProcessInfo tests (#690)

---
 .../python/tests/test_PreProcessInfo.py       | 32 ++++++++++++-------
 1 file changed, 20 insertions(+), 12 deletions(-)

diff --git a/inference-engine/ie_bridges/python/tests/test_PreProcessInfo.py b/inference-engine/ie_bridges/python/tests/test_PreProcessInfo.py
index 499fdb3c83493b..5cfc99631d07a0 100644
--- a/inference-engine/ie_bridges/python/tests/test_PreProcessInfo.py
+++ b/inference-engine/ie_bridges/python/tests/test_PreProcessInfo.py
@@ -8,45 +8,53 @@
 test_net_xml, test_net_bin = model_path()
 
 
-def get_preprocess_info():
+def test_preprocess_info():
     ie_core = IECore()
     net = ie_core.read_network(model=test_net_xml, weights=test_net_bin)
-    return net.input_info["data"].preprocess_info
-
-
-def test_preprocess_info():
-    assert isinstance(get_preprocess_info(), PreProcessInfo)
+    assert isinstance(net.input_info["data"].preprocess_info, PreProcessInfo)
 
 
 def test_color_format():
-    preprocess_info = get_preprocess_info()
+    ie_core = IECore()
+    net = ie_core.read_network(model=test_net_xml, weights=test_net_bin)
+    preprocess_info = net.input_info["data"].preprocess_info
     assert preprocess_info.color_format == ColorFormat.RAW
 
 
 def test_color_format_setter():
-    preprocess_info = get_preprocess_info()
+    ie_core = IECore()
+    net = ie_core.read_network(model=test_net_xml, weights=test_net_bin)
+    preprocess_info = net.input_info["data"].preprocess_info
     preprocess_info.color_format = ColorFormat.BGR
     assert preprocess_info.color_format == ColorFormat.BGR
 
 
 def test_resize_algorithm():
-    preprocess_info = get_preprocess_info()
+    ie_core = IECore()
+    net = ie_core.read_network(model=test_net_xml, weights=test_net_bin)
+    preprocess_info = net.input_info["data"].preprocess_info
     assert preprocess_info.resize_algorithm == ResizeAlgorithm.NO_RESIZE
 
 
 def test_resize_algorithm_setter():
-    preprocess_info = get_preprocess_info()
+    ie_core = IECore()
+    net = ie_core.read_network(model=test_net_xml, weights=test_net_bin)
+    preprocess_info = net.input_info["data"].preprocess_info
     preprocess_info.resize_algorithm = ResizeAlgorithm.RESIZE_BILINEAR
     assert preprocess_info.resize_algorithm == ResizeAlgorithm.RESIZE_BILINEAR
 
 
 def test_mean_variant():
-    preprocess_info = get_preprocess_info()
+    ie_core = IECore()
+    net = ie_core.read_network(model=test_net_xml, weights=test_net_bin)
+    preprocess_info = net.input_info["data"].preprocess_info
     assert preprocess_info.mean_variant == MeanVariant.NONE
 
 
 def test_mean_variant_setter():
-    preprocess_info = get_preprocess_info()
+    ie_core = IECore()
+    net = ie_core.read_network(model=test_net_xml, weights=test_net_bin)
+    preprocess_info = net.input_info["data"].preprocess_info
     preprocess_info.mean_variant = MeanVariant.MEAN_IMAGE
     assert preprocess_info.mean_variant == MeanVariant.MEAN_IMAGE
 

From 3ef1a26174522d995ec1027cd410ff89e27f5a1e Mon Sep 17 00:00:00 2001
From: Anastasia Kuporosova <anastasia.kuporosova@intel.com>
Date: Fri, 29 May 2020 21:28:17 +0300
Subject: [PATCH 22/24] [IE TOOLS] Use input_info in python benchmark app
 (#660)

---
 tools/benchmark/benchmark.py            |  2 +-
 tools/benchmark/main.py                 |  8 ++++----
 tools/benchmark/utils/inputs_filling.py | 24 ++++++++++++------------
 tools/benchmark/utils/utils.py          |  6 +++---
 4 files changed, 20 insertions(+), 20 deletions(-)

diff --git a/tools/benchmark/benchmark.py b/tools/benchmark/benchmark.py
index 95965c9e653933..0764bbf9958f1b 100644
--- a/tools/benchmark/benchmark.py
+++ b/tools/benchmark/benchmark.py
@@ -66,7 +66,7 @@ def read_network(self, path_to_model: str):
 
         ie_network = self.ie.read_network(xml_filename, bin_filename)
 
-        input_info = ie_network.inputs
+        input_info = ie_network.input_info
 
         if not input_info:
             raise AttributeError('No inputs info is provided')
diff --git a/tools/benchmark/main.py b/tools/benchmark/main.py
index 8945cc3ee66d9f..df40950c61ffe2 100644
--- a/tools/benchmark/main.py
+++ b/tools/benchmark/main.py
@@ -175,12 +175,12 @@ def set_throughput_streams():
             # --------------------- 5. Resizing network to match image sizes and given batch ---------------------------
             next_step()
 
-            shapes = {k: v.shape.copy() for k, v in ie_network.inputs.items()}
+            shapes = {k: v.input_data.shape.copy() for k, v in ie_network.input_info.items()}
             reshape = False
             if args.shape:
-                reshape |= update_shapes(shapes, args.shape, ie_network.inputs)
+                reshape |= update_shapes(shapes, args.shape, ie_network.input_info)
             if args.batch_size and args.batch_size != ie_network.batch_size:
-                reshape |= adjust_shapes_batch(shapes, args.batch_size, ie_network.inputs)
+                reshape |= adjust_shapes_batch(shapes, args.batch_size, ie_network.input_info)
 
             if reshape:
                 start_time = datetime.utcnow()
@@ -259,7 +259,7 @@ def set_throughput_streams():
         if args.paths_to_input:
             for path in args.paths_to_input:
                 paths_to_input.append(os.path.abspath(*path) if args.paths_to_input else None)
-        set_inputs(paths_to_input, batch_size, exe_network.inputs, infer_requests)
+        set_inputs(paths_to_input, batch_size, exe_network.input_info, infer_requests)
 
         if statistics:
             statistics.add_parameters(StatisticsReport.Category.RUNTIME_CONFIG,
diff --git a/tools/benchmark/utils/inputs_filling.py b/tools/benchmark/utils/inputs_filling.py
index b27f6ebcf569fb..ae7fa64a089010 100644
--- a/tools/benchmark/utils/inputs_filling.py
+++ b/tools/benchmark/utils/inputs_filling.py
@@ -47,13 +47,13 @@ def set_inputs(paths_to_input, batch_size, input_info, requests):
 def get_inputs(paths_to_input, batch_size, input_info, requests):
     input_image_sizes = {}
     for key in sorted(input_info.keys()):
-        if is_image(input_info[key]):
-            input_image_sizes[key] = (input_info[key].shape[2], input_info[key].shape[3])
+        if is_image(input_info[key].input_data):
+            input_image_sizes[key] = (input_info[key].input_data.shape[2], input_info[key].input_data.shape[3])
         logger.info("Network input '{}' precision {}, dimensions ({}): {}".format(key,
-                                                                                  input_info[key].precision,
-                                                                                  input_info[key].layout,
+                                                                                  input_info[key].input_data.precision,
+                                                                                  input_info[key].input_data.layout,
                                                                                   " ".join(str(x) for x in
-                                                                                           input_info[key].shape)))
+                                                                                           input_info[key].input_data.shape)))
 
     images_count = len(input_image_sizes.keys())
     binaries_count = len(input_info) - images_count
@@ -102,31 +102,31 @@ def get_inputs(paths_to_input, batch_size, input_info, requests):
         input_data = {}
         keys = list(sorted(input_info.keys()))
         for key in keys:
-            if is_image(input_info[key]):
+            if is_image(input_info[key].input_data):
                 # input is image
                 if len(image_files) > 0:
                     input_data[key] = fill_blob_with_image(image_files, request_id, batch_size, keys.index(key),
-                                                           len(keys), input_info[key])
+                                                           len(keys), input_info[key].input_data)
                     continue
 
             # input is binary
             if len(binary_files):
                 input_data[key] = fill_blob_with_binary(binary_files, request_id, batch_size, keys.index(key),
-                                                        len(keys), input_info[key])
+                                                        len(keys), input_info[key].input_data)
                 continue
 
             # most likely input is image info
-            if is_image_info(input_info[key]) and len(input_image_sizes) == 1:
+            if is_image_info(input_info[key].input_data) and len(input_image_sizes) == 1:
                 image_size = input_image_sizes[list(input_image_sizes.keys()).pop()]
                 logger.info("Fill input '" + key + "' with image size " + str(image_size[0]) + "x" +
                             str(image_size[1]))
-                input_data[key] = fill_blob_with_image_info(image_size, input_info[key])
+                input_data[key] = fill_blob_with_image_info(image_size, input_info[key].input_data)
                 continue
 
             # fill with random data
             logger.info("Fill input '{}' with random values ({} is expected)".format(key, "image" if is_image(
-                input_info[key]) else "some binary data"))
-            input_data[key] = fill_blob_with_random(input_info[key])
+                input_info[key].input_data) else "some binary data"))
+            input_data[key] = fill_blob_with_random(input_info[key].input_data)
 
         requests_input_data.append(input_data)
 
diff --git a/tools/benchmark/utils/utils.py b/tools/benchmark/utils/utils.py
index 2cef9f9de5a114..16c28bb3828642 100644
--- a/tools/benchmark/utils/utils.py
+++ b/tools/benchmark/utils/utils.py
@@ -62,10 +62,10 @@ def next_step(additional_info='', step_id=0):
 
 
 def config_network_inputs(ie_network: IENetwork):
-    input_info = ie_network.inputs
+    input_info = ie_network.input_info
 
     for key in input_info.keys():
-        if is_image(input_info[key]):
+        if is_image(input_info[key].input_data):
             # Set the precision of input data provided by the user
             # Should be called before load of the network to the plugin
             input_info[key].precision = 'U8'
@@ -261,7 +261,7 @@ def update_shapes(shapes, shapes_string: str, inputs_info):
 def adjust_shapes_batch(shapes, batch_size: int, inputs_info):
     updated = False
     for name, data in inputs_info.items():
-        layout = data.layout
+        layout = data.input_data.layout
         batch_index = layout.index('N') if 'N' in layout else -1
         if batch_index != -1 and shapes[name][batch_index] != batch_size:
             shapes[name][batch_index] = batch_size

From e2729b87f3a09bb5f0730bc92d1d9a27b09f4884 Mon Sep 17 00:00:00 2001
From: Edward Shogulin <edward.shogulin@intel.com>
Date: Fri, 29 May 2020 22:56:58 +0300
Subject: [PATCH 23/24] [LPT] Convolution regression tests (#543)

* [LPT] Base test infrastructure extending & Convolution test

* [LPT] LPT test infrastructure refactoring
---
 ...oncat_neighboring_graph_transformation.cpp |   6 +-
 .../concat_transformation.cpp                 |   6 +-
 ...oncat_with_intermediate_transformation.cpp |   6 +-
 .../convolution_transformation.cpp            |  39 ++++++
 .../layer_transformation.cpp                  |  49 ++++++++
 ...oncat_neighboring_graph_transformation.cpp |   2 +-
 .../concat_transformation.cpp                 |   2 +-
 ...oncat_with_intermediate_transformation.cpp |   2 +-
 .../convolution_transformation.cpp            |  39 ++++++
 .../layer_transformation.cpp                  |  43 +++++++
 ...oncat_neighboring_graph_transformation.hpp |   4 +-
 .../concat_transformation.hpp                 |   4 +-
 ...oncat_with_intermediate_transformation.hpp |   4 +-
 .../convolution_transformation.hpp            |  36 ++++++
 ...oncat_neighboring_graph_transformation.cpp |   2 +
 .../concat_transformation.cpp                 |   2 +
 ...oncat_with_intermediate_transformation.cpp |   2 +
 .../convolution_transformation.cpp            | 114 ++++++++++++++++++
 .../layer_transformation.cpp                  |  74 ++++++++----
 .../layer_transformation.hpp                  |  67 ++--------
 20 files changed, 409 insertions(+), 94 deletions(-)
 create mode 100644 inference-engine/tests/functional/plugin/cpu/shared_tests_instances/low_precision_transformations/convolution_transformation.cpp
 create mode 100644 inference-engine/tests/functional/plugin/cpu/shared_tests_instances/low_precision_transformations/layer_transformation.cpp
 create mode 100644 inference-engine/tests/functional/plugin/gpu/shared_tests_instances/low_precision_transformations/convolution_transformation.cpp
 create mode 100644 inference-engine/tests/functional/plugin/gpu/shared_tests_instances/low_precision_transformations/layer_transformation.cpp
 create mode 100644 inference-engine/tests/functional/plugin/shared/include/low_precision_transformations/convolution_transformation.hpp
 create mode 100644 inference-engine/tests/functional/plugin/shared/src/low_precision_transformations/convolution_transformation.cpp

diff --git a/inference-engine/tests/functional/plugin/cpu/shared_tests_instances/low_precision_transformations/concat_neighboring_graph_transformation.cpp b/inference-engine/tests/functional/plugin/cpu/shared_tests_instances/low_precision_transformations/concat_neighboring_graph_transformation.cpp
index 23a38b7dd3b2bb..5116acce4d7311 100644
--- a/inference-engine/tests/functional/plugin/cpu/shared_tests_instances/low_precision_transformations/concat_neighboring_graph_transformation.cpp
+++ b/inference-engine/tests/functional/plugin/cpu/shared_tests_instances/low_precision_transformations/concat_neighboring_graph_transformation.cpp
@@ -17,9 +17,9 @@ const std::vector<InferenceEngine::Precision> netPrecisions = {
 };
 
 const std::vector<LayerTransformation::Params> trasformationParamValues = {
-    LayerTestsUtils::LayerTransformationParamsFactory::createParamCpu(),
-    LayerTestsUtils::LayerTransformationParamsFactory::createParamI8I8(),
-    LayerTestsUtils::LayerTransformationParamsFactory::createParamU8I8()
+    LayerTestsUtils::LayerTransformationParamsFactory::createParams(),
+    LayerTestsUtils::LayerTransformationParamsFactory::createParamsI8I8(),
+    LayerTestsUtils::LayerTransformationParamsFactory::createParamsU8I8()
 };
 
 INSTANTIATE_TEST_CASE_P(LPT, ConcatNeighboringGraphTransformation,
diff --git a/inference-engine/tests/functional/plugin/cpu/shared_tests_instances/low_precision_transformations/concat_transformation.cpp b/inference-engine/tests/functional/plugin/cpu/shared_tests_instances/low_precision_transformations/concat_transformation.cpp
index f77cedbbf52b9e..bbc88cfaaabb6a 100644
--- a/inference-engine/tests/functional/plugin/cpu/shared_tests_instances/low_precision_transformations/concat_transformation.cpp
+++ b/inference-engine/tests/functional/plugin/cpu/shared_tests_instances/low_precision_transformations/concat_transformation.cpp
@@ -17,9 +17,9 @@ const std::vector<InferenceEngine::Precision> netPrecisions = {
 };
 
 const std::vector<LayerTransformation::Params> trasformationParamValues = {
-    LayerTestsUtils::LayerTransformationParamsFactory::createParamCpu(),
-    LayerTestsUtils::LayerTransformationParamsFactory::createParamI8I8(),
-    LayerTestsUtils::LayerTransformationParamsFactory::createParamU8I8()
+    LayerTestsUtils::LayerTransformationParamsFactory::createParams(),
+    LayerTestsUtils::LayerTransformationParamsFactory::createParamsI8I8(),
+    LayerTestsUtils::LayerTransformationParamsFactory::createParamsU8I8()
 };
 
 INSTANTIATE_TEST_CASE_P(LPT, ConcatTransformation,
diff --git a/inference-engine/tests/functional/plugin/cpu/shared_tests_instances/low_precision_transformations/concat_with_intermediate_transformation.cpp b/inference-engine/tests/functional/plugin/cpu/shared_tests_instances/low_precision_transformations/concat_with_intermediate_transformation.cpp
index 3e739a7910dae5..32cf7fbd700376 100644
--- a/inference-engine/tests/functional/plugin/cpu/shared_tests_instances/low_precision_transformations/concat_with_intermediate_transformation.cpp
+++ b/inference-engine/tests/functional/plugin/cpu/shared_tests_instances/low_precision_transformations/concat_with_intermediate_transformation.cpp
@@ -17,9 +17,9 @@ const std::vector<InferenceEngine::Precision> netPrecisions = {
 };
 
 const std::vector<LayerTransformation::Params> trasformationParamValues = {
-    LayerTestsUtils::LayerTransformationParamsFactory::createParamCpu(),
-    LayerTestsUtils::LayerTransformationParamsFactory::createParamI8I8(),
-    LayerTestsUtils::LayerTransformationParamsFactory::createParamU8I8()
+    LayerTestsUtils::LayerTransformationParamsFactory::createParams(),
+    LayerTestsUtils::LayerTransformationParamsFactory::createParamsI8I8(),
+    LayerTestsUtils::LayerTransformationParamsFactory::createParamsU8I8()
 };
 
 const std::vector<bool> transparentIntermediateValues = { true, false };
diff --git a/inference-engine/tests/functional/plugin/cpu/shared_tests_instances/low_precision_transformations/convolution_transformation.cpp b/inference-engine/tests/functional/plugin/cpu/shared_tests_instances/low_precision_transformations/convolution_transformation.cpp
new file mode 100644
index 00000000000000..e97ebad56132fe
--- /dev/null
+++ b/inference-engine/tests/functional/plugin/cpu/shared_tests_instances/low_precision_transformations/convolution_transformation.cpp
@@ -0,0 +1,39 @@
+// Copyright (C) 2019 Intel Corporation
+// SPDX-License-Identifier: Apache-2.0
+//
+
+#include <vector>
+
+#include "low_precision_transformations/convolution_transformation.hpp"
+#include "common_test_utils/test_constants.hpp"
+
+using namespace LayerTestsDefinitions;
+
+namespace {
+const std::vector<InferenceEngine::Precision> netPrecisions = {
+        InferenceEngine::Precision::FP32,
+        InferenceEngine::Precision::FP16
+};
+
+const std::vector<InferenceEngine::details::LayerTransformation::Params> trasformationParamValues = {
+    LayerTestsUtils::LayerTransformationParamsFactory::createParams()
+};
+
+const std::vector<bool> fqOnActivationsValues = { true, false };
+
+const std::vector<bool> fqOnWeightsValues = { true, false };
+
+INSTANTIATE_TEST_CASE_P(LPT, ConvolutionTransformation,
+    ::testing::Combine(
+        ::testing::ValuesIn(netPrecisions),
+        ::testing::Values(InferenceEngine::SizeVector({ 1, 3, 16, 16 })),
+        ::testing::Values(CommonTestUtils::DEVICE_CPU),
+        ::testing::ValuesIn(trasformationParamValues),
+        ::testing::ValuesIn(fqOnActivationsValues),
+        ::testing::ValuesIn(fqOnWeightsValues)),
+    ConvolutionTransformation::getTestCaseName);
+}  // namespace
+
+
+
+
diff --git a/inference-engine/tests/functional/plugin/cpu/shared_tests_instances/low_precision_transformations/layer_transformation.cpp b/inference-engine/tests/functional/plugin/cpu/shared_tests_instances/low_precision_transformations/layer_transformation.cpp
new file mode 100644
index 00000000000000..4416509d2466f7
--- /dev/null
+++ b/inference-engine/tests/functional/plugin/cpu/shared_tests_instances/low_precision_transformations/layer_transformation.cpp
@@ -0,0 +1,49 @@
+// Copyright (C) 2020 Intel Corporation
+// SPDX-License-Identifier: Apache-2.0
+//
+
+#include <memory>
+#include <tuple>
+#include <vector>
+#include <string>
+
+#include <ie_core.hpp>
+
+#include "common_test_utils/common_utils.hpp"
+#include "functional_test_utils/plugin_cache.hpp"
+#include "functional_test_utils/layer_test_utils.hpp"
+#include "functional_test_utils/blob_utils.hpp"
+
+#include "ngraph_functions/pass/convert_prc.hpp"
+
+#include "ie_util_internal.hpp"
+#include "functional_test_utils/low_precision_transformations/layer_transformation.hpp"
+#include "low_precision_transformations/convolution.hpp"
+#include "low_precision_transformations/scaleshift_to_convolution.hpp"
+
+
+namespace LayerTestsUtils {
+
+InferenceEngine::details::LowPrecisionTransformations LayerTransformation::getLowPrecisionTransformations(
+    const InferenceEngine::details::LayerTransformation::Params& params) const {
+    return InferenceEngine::details::LowPrecisionTransformer::getAllTransformations(params).
+        add<InferenceEngine::details::ConvolutionTransformation>(InferenceEngine::details::LayerTransformation::Params(params).
+            setPrecisionsOnActivations({ InferenceEngine::Precision::U8 }), "Convolution").
+        addCleanup<InferenceEngine::details::ScaleShiftToConvolutionTransformation>(
+            InferenceEngine::details::LayerTransformation::Params(params).setPrecisionsOnActivations({ InferenceEngine::Precision::U8 }),
+            "ScaleShift");
+}
+
+InferenceEngine::details::LayerTransformation::Params LayerTransformationParamsFactory::createParams() {
+    return InferenceEngine::details::LayerTransformation::Params(
+        true,
+        true,
+        true,
+        InferenceEngine::details::LayerTransformation::QuantizedTensorAlignment::UpdateLevel,
+        InferenceEngine::details::LayerTransformation::QuantizedTensorAlignment::None,
+        true,
+        true,
+        true);
+}
+
+}  // namespace LayerTestsUtils
diff --git a/inference-engine/tests/functional/plugin/gpu/shared_tests_instances/low_precision_transformations/concat_neighboring_graph_transformation.cpp b/inference-engine/tests/functional/plugin/gpu/shared_tests_instances/low_precision_transformations/concat_neighboring_graph_transformation.cpp
index b7d0c6b304d659..c2800a72271d8a 100644
--- a/inference-engine/tests/functional/plugin/gpu/shared_tests_instances/low_precision_transformations/concat_neighboring_graph_transformation.cpp
+++ b/inference-engine/tests/functional/plugin/gpu/shared_tests_instances/low_precision_transformations/concat_neighboring_graph_transformation.cpp
@@ -16,7 +16,7 @@ const std::vector<InferenceEngine::Precision> netPrecisions = {
 };
 
 const std::vector<LayerTransformation::Params> trasformationParamValues = {
-    LayerTestsUtils::LayerTransformationParamsFactory::createParamGpu()
+    LayerTestsUtils::LayerTransformationParamsFactory::createParams()
 };
 
 INSTANTIATE_TEST_CASE_P(LPT, ConcatNeighboringGraphTransformation,
diff --git a/inference-engine/tests/functional/plugin/gpu/shared_tests_instances/low_precision_transformations/concat_transformation.cpp b/inference-engine/tests/functional/plugin/gpu/shared_tests_instances/low_precision_transformations/concat_transformation.cpp
index fbfd9160129440..5f768d1458afae 100644
--- a/inference-engine/tests/functional/plugin/gpu/shared_tests_instances/low_precision_transformations/concat_transformation.cpp
+++ b/inference-engine/tests/functional/plugin/gpu/shared_tests_instances/low_precision_transformations/concat_transformation.cpp
@@ -16,7 +16,7 @@ const std::vector<InferenceEngine::Precision> netPrecisions = {
 };
 
 const std::vector<LayerTransformation::Params> trasformationParamValues = {
-    LayerTestsUtils::LayerTransformationParamsFactory::createParamGpu()
+    LayerTestsUtils::LayerTransformationParamsFactory::createParams()
 };
 
 INSTANTIATE_TEST_CASE_P(LPT, ConcatTransformation,
diff --git a/inference-engine/tests/functional/plugin/gpu/shared_tests_instances/low_precision_transformations/concat_with_intermediate_transformation.cpp b/inference-engine/tests/functional/plugin/gpu/shared_tests_instances/low_precision_transformations/concat_with_intermediate_transformation.cpp
index c0a9751247476f..e5c330f16409fb 100644
--- a/inference-engine/tests/functional/plugin/gpu/shared_tests_instances/low_precision_transformations/concat_with_intermediate_transformation.cpp
+++ b/inference-engine/tests/functional/plugin/gpu/shared_tests_instances/low_precision_transformations/concat_with_intermediate_transformation.cpp
@@ -16,7 +16,7 @@ const std::vector<InferenceEngine::Precision> netPrecisions = {
 };
 
 const std::vector<LayerTransformation::Params> trasformationParamValues = {
-    LayerTestsUtils::LayerTransformationParamsFactory::createParamGpu()
+    LayerTestsUtils::LayerTransformationParamsFactory::createParams()
 };
 
 const std::vector<bool> transparentIntermediates = { true, false };
diff --git a/inference-engine/tests/functional/plugin/gpu/shared_tests_instances/low_precision_transformations/convolution_transformation.cpp b/inference-engine/tests/functional/plugin/gpu/shared_tests_instances/low_precision_transformations/convolution_transformation.cpp
new file mode 100644
index 00000000000000..b99db86cf74503
--- /dev/null
+++ b/inference-engine/tests/functional/plugin/gpu/shared_tests_instances/low_precision_transformations/convolution_transformation.cpp
@@ -0,0 +1,39 @@
+// Copyright (C) 2019 Intel Corporation
+// SPDX-License-Identifier: Apache-2.0
+//
+
+#include <vector>
+
+#include "low_precision_transformations/convolution_transformation.hpp"
+#include "common_test_utils/test_constants.hpp"
+
+using namespace LayerTestsDefinitions;
+
+namespace {
+const std::vector<InferenceEngine::Precision> netPrecisions = {
+        InferenceEngine::Precision::FP32,
+        InferenceEngine::Precision::FP16
+};
+
+const std::vector<InferenceEngine::details::LayerTransformation::Params> trasformationParamValues = {
+    LayerTestsUtils::LayerTransformationParamsFactory::createParams()
+};
+
+const std::vector<bool> fqOnActivationsValues = { true, false };
+
+const std::vector<bool> fqOnWeightsValues = { true, false };
+
+INSTANTIATE_TEST_CASE_P(LPT, ConvolutionTransformation,
+    ::testing::Combine(
+        ::testing::ValuesIn(netPrecisions),
+        ::testing::Values(InferenceEngine::SizeVector({ 1, 3, 16, 16 })),
+        ::testing::Values(CommonTestUtils::DEVICE_GPU),
+        ::testing::ValuesIn(trasformationParamValues),
+        ::testing::ValuesIn(fqOnActivationsValues),
+        ::testing::ValuesIn(fqOnWeightsValues)),
+    ConvolutionTransformation::getTestCaseName);
+}  // namespace
+
+
+
+
diff --git a/inference-engine/tests/functional/plugin/gpu/shared_tests_instances/low_precision_transformations/layer_transformation.cpp b/inference-engine/tests/functional/plugin/gpu/shared_tests_instances/low_precision_transformations/layer_transformation.cpp
new file mode 100644
index 00000000000000..3a686050cbdf56
--- /dev/null
+++ b/inference-engine/tests/functional/plugin/gpu/shared_tests_instances/low_precision_transformations/layer_transformation.cpp
@@ -0,0 +1,43 @@
+// Copyright (C) 2020 Intel Corporation
+// SPDX-License-Identifier: Apache-2.0
+//
+
+#include <memory>
+#include <tuple>
+#include <vector>
+#include <string>
+
+#include <ie_core.hpp>
+
+#include "common_test_utils/common_utils.hpp"
+#include "functional_test_utils/plugin_cache.hpp"
+#include "functional_test_utils/layer_test_utils.hpp"
+#include "functional_test_utils/blob_utils.hpp"
+
+#include "ngraph_functions/pass/convert_prc.hpp"
+
+#include "ie_util_internal.hpp"
+#include "functional_test_utils/low_precision_transformations/layer_transformation.hpp"
+#include "low_precision_transformations/convolution.hpp"
+#include "low_precision_transformations/scaleshift_to_convolution.hpp"
+
+
+namespace LayerTestsUtils {
+
+InferenceEngine::details::LowPrecisionTransformations LayerTransformation::getLowPrecisionTransformations(
+    const InferenceEngine::details::LayerTransformation::Params& params) const {
+    return InferenceEngine::details::LowPrecisionTransformer::getAllTransformations(params);
+}
+
+InferenceEngine::details::LayerTransformation::Params LayerTransformationParamsFactory::createParams() {
+    return InferenceEngine::details::LayerTransformation::Params(
+        true,
+        true,
+        true,
+        InferenceEngine::details::LayerTransformation::QuantizedTensorAlignment::UpdateLevel,
+        InferenceEngine::details::LayerTransformation::QuantizedTensorAlignment::None,
+        true,
+        true,
+        true);
+}
+}  // namespace LayerTestsUtils
diff --git a/inference-engine/tests/functional/plugin/shared/include/low_precision_transformations/concat_neighboring_graph_transformation.hpp b/inference-engine/tests/functional/plugin/shared/include/low_precision_transformations/concat_neighboring_graph_transformation.hpp
index 84e1e35b472711..eee5532dc44fc0 100644
--- a/inference-engine/tests/functional/plugin/shared/include/low_precision_transformations/concat_neighboring_graph_transformation.hpp
+++ b/inference-engine/tests/functional/plugin/shared/include/low_precision_transformations/concat_neighboring_graph_transformation.hpp
@@ -11,7 +11,9 @@
 
 namespace LayerTestsDefinitions {
 
-class ConcatNeighboringGraphTransformation : public LayerTestsUtils::LayerTransformation<LayerTestsUtils::LayerTransformationParams> {
+class ConcatNeighboringGraphTransformation :
+    public testing::WithParamInterface<LayerTestsUtils::LayerTransformationParams>,
+    public LayerTestsUtils::LayerTransformation {
 public:
     static std::string getTestCaseName(testing::TestParamInfo<LayerTestsUtils::LayerTransformationParams> obj);
     InferenceEngine::Blob::Ptr GenerateInput(const InferenceEngine::InputInfo &info) const override;
diff --git a/inference-engine/tests/functional/plugin/shared/include/low_precision_transformations/concat_transformation.hpp b/inference-engine/tests/functional/plugin/shared/include/low_precision_transformations/concat_transformation.hpp
index 1ccb9789004ec2..cf9aa3c605cb77 100644
--- a/inference-engine/tests/functional/plugin/shared/include/low_precision_transformations/concat_transformation.hpp
+++ b/inference-engine/tests/functional/plugin/shared/include/low_precision_transformations/concat_transformation.hpp
@@ -11,7 +11,9 @@
 
 namespace LayerTestsDefinitions {
 
-class ConcatTransformation : public LayerTestsUtils::LayerTransformation<LayerTestsUtils::LayerTransformationParams> {
+class ConcatTransformation :
+    public testing::WithParamInterface<LayerTestsUtils::LayerTransformationParams>,
+    public LayerTestsUtils::LayerTransformation {
 public:
     static std::string getTestCaseName(testing::TestParamInfo<LayerTestsUtils::LayerTransformationParams> obj);
 
diff --git a/inference-engine/tests/functional/plugin/shared/include/low_precision_transformations/concat_with_intermediate_transformation.hpp b/inference-engine/tests/functional/plugin/shared/include/low_precision_transformations/concat_with_intermediate_transformation.hpp
index 30224a72f9c3d7..d1d21f33c56164 100644
--- a/inference-engine/tests/functional/plugin/shared/include/low_precision_transformations/concat_with_intermediate_transformation.hpp
+++ b/inference-engine/tests/functional/plugin/shared/include/low_precision_transformations/concat_with_intermediate_transformation.hpp
@@ -20,7 +20,9 @@ typedef std::tuple<
     // multichannel
     bool> ConcatWithIntermediateTransformationParams;
 
-class ConcatWithIntermediateTransformation : public LayerTestsUtils::LayerTransformation<ConcatWithIntermediateTransformationParams> {
+class ConcatWithIntermediateTransformation :
+    public testing::WithParamInterface<ConcatWithIntermediateTransformationParams>,
+    public LayerTestsUtils::LayerTransformation {
 public:
     static std::string getTestCaseName(testing::TestParamInfo<ConcatWithIntermediateTransformationParams> obj);
 
diff --git a/inference-engine/tests/functional/plugin/shared/include/low_precision_transformations/convolution_transformation.hpp b/inference-engine/tests/functional/plugin/shared/include/low_precision_transformations/convolution_transformation.hpp
new file mode 100644
index 00000000000000..13f7f6ee9361fd
--- /dev/null
+++ b/inference-engine/tests/functional/plugin/shared/include/low_precision_transformations/convolution_transformation.hpp
@@ -0,0 +1,36 @@
+// Copyright (C) 2019 Intel Corporation
+// SPDX-License-Identifier: Apache-2.0
+//
+
+#pragma once
+
+#include <string>
+#include <memory>
+
+#include "functional_test_utils/low_precision_transformations/layer_transformation.hpp"
+
+namespace LayerTestsDefinitions {
+
+typedef std::tuple<
+    InferenceEngine::Precision,
+    InferenceEngine::SizeVector,
+    std::string,
+    InferenceEngine::details::LayerTransformation::Params,
+    bool, // fqOnActivations
+    bool  // fqOnWeights
+> ConvolutionTransformationParams;
+
+class ConvolutionTransformation :
+    public testing::WithParamInterface<ConvolutionTransformationParams>,
+    public LayerTestsUtils::LayerTransformation {
+public:
+    static std::string getTestCaseName(testing::TestParamInfo<ConvolutionTransformationParams> obj);
+
+protected:
+    void SetUp() override;
+
+private:
+    void validate();
+};
+
+}  // namespace LayerTestsDefinitions
diff --git a/inference-engine/tests/functional/plugin/shared/src/low_precision_transformations/concat_neighboring_graph_transformation.cpp b/inference-engine/tests/functional/plugin/shared/src/low_precision_transformations/concat_neighboring_graph_transformation.cpp
index 46053e77c56955..8af28552ee59d2 100644
--- a/inference-engine/tests/functional/plugin/shared/src/low_precision_transformations/concat_neighboring_graph_transformation.cpp
+++ b/inference-engine/tests/functional/plugin/shared/src/low_precision_transformations/concat_neighboring_graph_transformation.cpp
@@ -132,6 +132,8 @@ void ConcatNeighboringGraphTransformation::validate() {
         const InferenceEngine::CNNLayerPtr outputLayer = it.second->getCreatorLayer().lock();
         EXPECT_TRUE(outputLayer != nullptr);
         EXPECT_EQ("ScaleShift", outputLayer->type);
+
+        checkParentPrecision(outputLayer, params.updatePrecisions);
     }
 
     // check quantized FQ layers map: should includes all FQ
diff --git a/inference-engine/tests/functional/plugin/shared/src/low_precision_transformations/concat_transformation.cpp b/inference-engine/tests/functional/plugin/shared/src/low_precision_transformations/concat_transformation.cpp
index 022473e6424f68..2a594774a54402 100644
--- a/inference-engine/tests/functional/plugin/shared/src/low_precision_transformations/concat_transformation.cpp
+++ b/inference-engine/tests/functional/plugin/shared/src/low_precision_transformations/concat_transformation.cpp
@@ -89,6 +89,8 @@ void ConcatTransformation::validate() {
     EXPECT_TRUE(outputLayer != nullptr);
     EXPECT_EQ("ScaleShift", outputLayer->type);
 
+    checkParentPrecision(outputLayer, params.updatePrecisions);
+
     IE_SUPPRESS_DEPRECATED_END
 }
 
diff --git a/inference-engine/tests/functional/plugin/shared/src/low_precision_transformations/concat_with_intermediate_transformation.cpp b/inference-engine/tests/functional/plugin/shared/src/low_precision_transformations/concat_with_intermediate_transformation.cpp
index ec274f03342ab2..aca75dfee2ee9e 100644
--- a/inference-engine/tests/functional/plugin/shared/src/low_precision_transformations/concat_with_intermediate_transformation.cpp
+++ b/inference-engine/tests/functional/plugin/shared/src/low_precision_transformations/concat_with_intermediate_transformation.cpp
@@ -172,6 +172,8 @@ void ConcatWithIntermediateTransformation::validate() {
         children = CNNNetworkHelper::getChildren(*concat);
         EXPECT_EQ(1ul, children.size());
         EXPECT_EQ("ScaleShift", children[0]->type);
+
+        checkParentPrecision(children[0], params.updatePrecisions);
     } else {
         std::vector<CNNLayerPtr> children = CNNNetworkHelper::getChildren(*intermediate);
         EXPECT_EQ(2ul, children.size());
diff --git a/inference-engine/tests/functional/plugin/shared/src/low_precision_transformations/convolution_transformation.cpp b/inference-engine/tests/functional/plugin/shared/src/low_precision_transformations/convolution_transformation.cpp
new file mode 100644
index 00000000000000..c0f87d40c1ac80
--- /dev/null
+++ b/inference-engine/tests/functional/plugin/shared/src/low_precision_transformations/convolution_transformation.cpp
@@ -0,0 +1,114 @@
+// Copyright (C) 2019 Intel Corporation
+// SPDX-License-Identifier: Apache-2.0
+//
+
+#include "low_precision_transformations/convolution_transformation.hpp"
+
+#include <memory>
+#include <tuple>
+#include <vector>
+#include <string>
+
+#include <ie_core.hpp>
+
+#include "common_test_utils/common_utils.hpp"
+#include "functional_test_utils/plugin_cache.hpp"
+#include "functional_test_utils/layer_test_utils.hpp"
+#include "functional_test_utils/blob_utils.hpp"
+#include "ngraph_functions/pass/convert_prc.hpp"
+#include "ngraph_functions/builders.hpp"
+
+
+namespace LayerTestsDefinitions {
+
+std::string ConvolutionTransformation::getTestCaseName(testing::TestParamInfo<ConvolutionTransformationParams> obj) {
+    InferenceEngine::Precision netPrecision;
+    InferenceEngine::SizeVector inputShapes;
+    std::string targetDevice;
+    InferenceEngine::details::LayerTransformation::Params params;
+    bool fqOnActivations;
+    bool fqOnWeights;
+    std::tie(netPrecision, inputShapes, targetDevice, params, fqOnActivations, fqOnWeights) = obj.param;
+
+    std::ostringstream result;
+    result << netPrecision.name() << "_" << targetDevice << "_" << toString(params) <<
+        (fqOnActivations ? "" : "_noFqOnActivations") <<
+        (fqOnWeights ? "" : "_noFqOnWeights");
+    return result.str();
+}
+
+void ConvolutionTransformation::SetUp() {
+    threshold = 0.1f;
+
+    InferenceEngine::SizeVector inputShape;
+    InferenceEngine::Precision netPrecision;
+    InferenceEngine::details::LayerTransformation::Params params;
+    bool fqOnActivations;
+    bool fqOnWeights;
+    std::tie(netPrecision, inputShape, targetDevice, params, fqOnActivations, fqOnWeights) = this->GetParam();
+    auto precision = FuncTestUtils::PrecisionUtils::convertIE2nGraphPrc(netPrecision);
+
+    const float k = 50.f;
+
+    const auto input = std::make_shared<ngraph::opset1::Parameter>(precision, ngraph::Shape(inputShape));
+    const auto fakeQuantizeOnActivations = fqOnActivations ?
+        ngraph::builder::makeFakeQuantize(
+            input, precision, 256ul, { 1ul },
+            { 0.f }, { 255.f / k }, { 0.f }, { 255.f / k }) :
+        nullptr;
+
+    auto weights = ngraph::opset1::Constant::create(
+        precision,
+        ngraph::Shape{ inputShape[1], inputShape[1], 1, 1 },
+        std::vector<float>(inputShape[1] * inputShape[1], 1));
+
+    const auto convolution = std::make_shared<ngraph::opset1::Convolution>(
+        fakeQuantizeOnActivations == nullptr ? input : fakeQuantizeOnActivations,
+        fqOnWeights ?
+            ngraph::builder::makeFakeQuantize(
+                weights, precision, 255ul, { 1ul },
+                { -128.f / k }, { 127.f / k }, { -128.f / k }, { 127.f / k }) :
+            weights->output(0),
+        ngraph::Strides{ 1, 1 },
+        ngraph::CoordinateDiff{ 0, 0 },
+        ngraph::CoordinateDiff{ 0, 0 },
+        ngraph::Strides{ 1, 1 });
+
+    ngraph::ResultVector results {std::make_shared<ngraph::opset1::Result>(convolution)};
+    function = std::make_shared<ngraph::Function>(results, ngraph::ParameterVector { input }, "ConvolutionTransformation");
+
+    validate();
+}
+
+void ConvolutionTransformation::validate() {
+    InferenceEngine::SizeVector inputShape;
+    InferenceEngine::Precision netPrecision;
+    InferenceEngine::details::LayerTransformation::Params params;
+    bool fqOnActivations;
+    bool fqOnWeights;
+    std::tie(netPrecision, inputShape, targetDevice, params, fqOnActivations, fqOnWeights) = this->GetParam();
+
+    const InferenceEngine::CNNNetwork network = transform(params);
+
+    IE_SUPPRESS_DEPRECATED_START
+
+    InferenceEngine::OutputsDataMap outputs = network.getOutputsInfo();
+    EXPECT_EQ(1, outputs.size());
+
+    std::map<std::string, InferenceEngine::DataPtr>::iterator it = outputs.begin();
+    const InferenceEngine::CNNLayerPtr outputLayer = it->second->getCreatorLayer().lock();
+    EXPECT_TRUE(outputLayer != nullptr);
+    EXPECT_EQ(fqOnActivations & fqOnWeights ? "ScaleShift" : "Convolution", outputLayer->type);
+
+    IE_SUPPRESS_DEPRECATED_END
+}
+
+TEST_P(ConvolutionTransformation, CompareWithRefImpl) {
+    Run();
+
+    if (targetDevice == std::string{CommonTestUtils::DEVICE_GPU}) {
+        PluginCache::get().reset();
+    }
+};
+
+}  // namespace LayerTestsDefinitions
diff --git a/inference-engine/tests/ie_test_utils/functional_test_utils/low_precision_transformations/layer_transformation.cpp b/inference-engine/tests/ie_test_utils/functional_test_utils/low_precision_transformations/layer_transformation.cpp
index 0d0ece7098c9bf..db998a6b626826 100644
--- a/inference-engine/tests/ie_test_utils/functional_test_utils/low_precision_transformations/layer_transformation.cpp
+++ b/inference-engine/tests/ie_test_utils/functional_test_utils/low_precision_transformations/layer_transformation.cpp
@@ -6,6 +6,7 @@
 #include <tuple>
 #include <vector>
 #include <string>
+#include <unordered_set>
 
 #include <ie_core.hpp>
 
@@ -23,7 +24,8 @@
 
 
 namespace LayerTestsUtils {
-InferenceEngine::details::LayerTransformation::Params LayerTransformationParamsFactory::createParamU8I8() {
+
+InferenceEngine::details::LayerTransformation::Params LayerTransformationParamsFactory::createParamsU8I8() {
     return InferenceEngine::details::LayerTransformation::Params(
         false,
         true,
@@ -37,7 +39,7 @@ InferenceEngine::details::LayerTransformation::Params LayerTransformationParamsF
         { InferenceEngine::Precision::I8 });
 }
 
-InferenceEngine::details::LayerTransformation::Params LayerTransformationParamsFactory::createParamU8U8() {
+InferenceEngine::details::LayerTransformation::Params LayerTransformationParamsFactory::createParamsU8U8() {
     return InferenceEngine::details::LayerTransformation::Params(
         false,
         true,
@@ -51,7 +53,7 @@ InferenceEngine::details::LayerTransformation::Params LayerTransformationParamsF
         { InferenceEngine::Precision::U8 });
 }
 
-InferenceEngine::details::LayerTransformation::Params LayerTransformationParamsFactory::createParamI8I8() {
+InferenceEngine::details::LayerTransformation::Params LayerTransformationParamsFactory::createParamsI8I8() {
     return InferenceEngine::details::LayerTransformation::Params(
         false,
         true,
@@ -65,28 +67,52 @@ InferenceEngine::details::LayerTransformation::Params LayerTransformationParamsF
         { InferenceEngine::Precision::I8 });
 }
 
-InferenceEngine::details::LayerTransformation::Params LayerTransformationParamsFactory::createParamCpu() {
-    return InferenceEngine::details::LayerTransformation::Params(
-        true,
-        true,
-        true,
-        InferenceEngine::details::LayerTransformation::QuantizedTensorAlignment::UpdateLevel,
-        InferenceEngine::details::LayerTransformation::QuantizedTensorAlignment::None,
-        true,
-        true,
-        true);
+InferenceEngine::details::LowPrecisionTransformer LayerTransformation::getLowPrecisionTransformer(
+    const InferenceEngine::details::LayerTransformation::Params& params) const {
+    InferenceEngine::details::LowPrecisionTransformer transformer(getLowPrecisionTransformations(params));
+    return transformer;
 }
 
-InferenceEngine::details::LayerTransformation::Params LayerTransformationParamsFactory::createParamGpu() {
-    // not completed
-    return InferenceEngine::details::LayerTransformation::Params(
-        true,
-        true,
-        true,
-        InferenceEngine::details::LayerTransformation::QuantizedTensorAlignment::UpdateLevel,
-        InferenceEngine::details::LayerTransformation::QuantizedTensorAlignment::None,
-        true,
-        true,
-        true);
+InferenceEngine::CNNNetwork LayerTransformation::transform(InferenceEngine::details::LayerTransformation::Params& params) {
+    InferenceEngine::details::CNNNetworkImplPtr cnnNetworkImp = cloneNet(InferenceEngine::CNNNetwork(function));
+
+    auto transformer = getLowPrecisionTransformer(params);
+    transformer.transform(*cnnNetworkImp);
+
+    return InferenceEngine::CNNNetwork(cnnNetworkImp);
+}
+
+InferenceEngine::CNNNetwork LayerTransformation::transform(const InferenceEngine::details::LowPrecisionTransformations& transformations) {
+    InferenceEngine::details::CNNNetworkImplPtr cnnNetworkImp = cloneNet(InferenceEngine::CNNNetwork(function));
+
+    InferenceEngine::details::LowPrecisionTransformer transformer(transformations);
+    transformer.transform(*cnnNetworkImp);
+
+    return InferenceEngine::CNNNetwork(cnnNetworkImp);
 }
+
+void LayerTransformation::checkParentPrecision(const InferenceEngine::CNNLayerPtr& layer, const bool lowPrecision) {
+    EXPECT_EQ(1ul, layer->insData.size()) << "insert data count is no expected: " << layer->insData.size();
+    const InferenceEngine::DataPtr insData = layer->insData[0].lock();
+    EXPECT_TRUE(insData != nullptr) << "insert data is nullable";
+    const InferenceEngine::Precision precision = insData->getTensorDesc().getPrecision();
+
+    const std::unordered_set<uint8_t> expectedPrecisions = lowPrecision ?
+        std::unordered_set<uint8_t>({ InferenceEngine::Precision::U8, InferenceEngine::Precision::I8 }) :
+        std::unordered_set<uint8_t>({ InferenceEngine::Precision::FP16, InferenceEngine::Precision::FP32 });
+    EXPECT_TRUE((expectedPrecisions.find(precision) != expectedPrecisions.end())) <<
+        "actual precision is " << precision;
+}
+
+std::string LayerTransformation::toString(const InferenceEngine::details::LayerTransformation::Params& params) {
+    std::ostringstream result;
+    result <<
+        (params.supportAsymmetricQuantization ? "asymmetric" : "symmetric") << "_" <<
+        params.precisionsOnActivations << "_" <<
+        params.precisionsOnWeights << "_" <<
+        params.quantizedTensorAlignmentOnActivations;
+
+    return result.str();
+}
+
 }  // namespace LayerTestsUtils
diff --git a/inference-engine/tests/ie_test_utils/functional_test_utils/low_precision_transformations/layer_transformation.hpp b/inference-engine/tests/ie_test_utils/functional_test_utils/low_precision_transformations/layer_transformation.hpp
index 81a3a82fb9ba71..13fdb67581dafc 100644
--- a/inference-engine/tests/ie_test_utils/functional_test_utils/low_precision_transformations/layer_transformation.hpp
+++ b/inference-engine/tests/ie_test_utils/functional_test_utils/low_precision_transformations/layer_transformation.hpp
@@ -21,70 +21,27 @@ typedef std::tuple<
 
 class LayerTransformationParamsFactory {
 public:
-    static InferenceEngine::details::LayerTransformation::Params createParamU8I8();
-    static InferenceEngine::details::LayerTransformation::Params createParamU8U8();
-    static InferenceEngine::details::LayerTransformation::Params createParamI8I8();
-    static InferenceEngine::details::LayerTransformation::Params createParamCpu();
-    static InferenceEngine::details::LayerTransformation::Params createParamGpu();
+    static InferenceEngine::details::LayerTransformation::Params createParamsU8I8();
+    static InferenceEngine::details::LayerTransformation::Params createParamsU8U8();
+    static InferenceEngine::details::LayerTransformation::Params createParamsI8I8();
+    static InferenceEngine::details::LayerTransformation::Params createParams();
 };
 
-template <typename T>
-class LayerTransformation : public testing::WithParamInterface<T>, public LayerTestsUtils::LayerTestsCommon {
-public:
+class LayerTransformation : public LayerTestsUtils::LayerTestsCommon {
+protected:
     InferenceEngine::details::LowPrecisionTransformations getLowPrecisionTransformations(
-        const InferenceEngine::details::LayerTransformation::Params& params) const {
-        if (targetDevice == "CPU") {
-            return InferenceEngine::details::LowPrecisionTransformer::getAllTransformations(params).
-                add<InferenceEngine::details::ConvolutionTransformation>(InferenceEngine::details::LayerTransformation::Params(params).
-                    setPrecisionsOnActivations({ InferenceEngine::Precision::U8 }), "Convolution").
-                addCleanup<InferenceEngine::details::ScaleShiftToConvolutionTransformation>(
-                    InferenceEngine::details::LayerTransformation::Params(params).setPrecisionsOnActivations({ InferenceEngine::Precision::U8 }),
-                    "ScaleShift");
-        } else if (targetDevice == "GPU") {
-            return InferenceEngine::details::LowPrecisionTransformer::getAllTransformations(params);
-        } else {
-            THROW_IE_EXCEPTION << "unknown target device " << targetDevice;
-        }
-    }
+        const InferenceEngine::details::LayerTransformation::Params& params) const;
 
     InferenceEngine::details::LowPrecisionTransformer getLowPrecisionTransformer(
-        const InferenceEngine::details::LayerTransformation::Params& params) const {
-        InferenceEngine::details::LowPrecisionTransformer transformer(getLowPrecisionTransformations(params));
-        return transformer;
-    }
-
-    InferenceEngine::CNNNetwork transform() {
-        return transform(LayerTransformationParamsFactory::createParamCpu());
-    }
-
-    InferenceEngine::CNNNetwork transform(InferenceEngine::details::LayerTransformation::Params& params) {
-        InferenceEngine::details::CNNNetworkImplPtr cnnNetworkImp = cloneNet(InferenceEngine::CNNNetwork(function));
-
-        auto transformer = getLowPrecisionTransformer(params);
-        transformer.transform(*cnnNetworkImp);
-
-        return InferenceEngine::CNNNetwork(cnnNetworkImp);
-    }
-
-    InferenceEngine::CNNNetwork transform(const InferenceEngine::details::LowPrecisionTransformations& transformations) {
-        InferenceEngine::details::CNNNetworkImplPtr cnnNetworkImp = cloneNet(InferenceEngine::CNNNetwork(function));
+        const InferenceEngine::details::LayerTransformation::Params& params) const;
 
-        InferenceEngine::details::LowPrecisionTransformer transformer(transformations);
-        transformer.transform(*cnnNetworkImp);
+    InferenceEngine::CNNNetwork transform(InferenceEngine::details::LayerTransformation::Params& params);
 
-        return InferenceEngine::CNNNetwork(cnnNetworkImp);
-    }
+    InferenceEngine::CNNNetwork transform(const InferenceEngine::details::LowPrecisionTransformations& transformations);
 
-    static std::string toString(const InferenceEngine::details::LayerTransformation::Params& params) {
-        std::ostringstream result;
-        result <<
-            (params.supportAsymmetricQuantization ? "asymmetric" : "symmetric") << "_" <<
-            params.precisionsOnActivations << "_" <<
-            params.precisionsOnWeights << "_" <<
-            params.quantizedTensorAlignmentOnActivations;
+    static void checkParentPrecision(const InferenceEngine::CNNLayerPtr& layer, const bool lowPrecision);
 
-        return result.str();
-    }
+    static std::string toString(const InferenceEngine::details::LayerTransformation::Params& params);
 };
 
 }  // namespace LayerTestsUtils

From 9af51a165faa58d6f3f7c2eb6891f8cabf19bdb4 Mon Sep 17 00:00:00 2001
From: Denis Orlov <denis.orlov@intel.com>
Date: Sat, 30 May 2020 00:43:42 +0300
Subject: [PATCH 24/24] [GNA] Workaround support for callbacks (#591)

---
 inference-engine/src/gna_plugin/gna_infer_request.hpp      | 7 +++++++
 .../gna/shared_tests_instances/skip_tests_config.cpp       | 2 +-
 2 files changed, 8 insertions(+), 1 deletion(-)

diff --git a/inference-engine/src/gna_plugin/gna_infer_request.hpp b/inference-engine/src/gna_plugin/gna_infer_request.hpp
index 17688f4688a5ce..9641c17011689c 100644
--- a/inference-engine/src/gna_plugin/gna_infer_request.hpp
+++ b/inference-engine/src/gna_plugin/gna_infer_request.hpp
@@ -69,6 +69,13 @@ class GNAInferRequest : public InferenceEngine::AsyncInferRequestInternal {
         // execute input pre-processing.
         execDataPreprocessing(_inputs);
         inferRequestIdx = plg->QueueInference(_inputs, _outputs);
+        // workaround to unblock callback-based flows
+        if (_callback) {
+            auto infer_request = _publicInterface.lock();
+            IE_ASSERT(infer_request != nullptr);
+            auto res = Wait(0);
+            _callback(infer_request, res);
+        }
     }
 
     InferenceEngine::StatusCode Wait(int64_t millis_timeout) override {
diff --git a/inference-engine/tests/functional/plugin/gna/shared_tests_instances/skip_tests_config.cpp b/inference-engine/tests/functional/plugin/gna/shared_tests_instances/skip_tests_config.cpp
index c17bf7445ceb9d..5348638b62d610 100644
--- a/inference-engine/tests/functional/plugin/gna/shared_tests_instances/skip_tests_config.cpp
+++ b/inference-engine/tests/functional/plugin/gna/shared_tests_instances/skip_tests_config.cpp
@@ -10,6 +10,6 @@
 std::vector<std::string> disabledTestPatterns() {
     return {
         // TODO: FIX BUG 31661
-        ".*Behavior.*Callback.*"
+        ".*Behavior.*CallbackThrowException.*"
     };
 }